drm/amdgpu: add navi12 asic type
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
e2a75f88 68MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 69MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 70MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 71MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 72MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 73MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
23c6268e 74MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 75MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
e2a75f88 76
2dc80b00
S
77#define AMDGPU_RESUME_MS 2000
78
d38ceaf9 79static const char *amdgpu_asic_name[] = {
da69c161
KW
80 "TAHITI",
81 "PITCAIRN",
82 "VERDE",
83 "OLAND",
84 "HAINAN",
d38ceaf9
AD
85 "BONAIRE",
86 "KAVERI",
87 "KABINI",
88 "HAWAII",
89 "MULLINS",
90 "TOPAZ",
91 "TONGA",
48299f95 92 "FIJI",
d38ceaf9 93 "CARRIZO",
139f4917 94 "STONEY",
2cc0c0b5
FC
95 "POLARIS10",
96 "POLARIS11",
c4642a47 97 "POLARIS12",
48ff108d 98 "VEGAM",
d4196f01 99 "VEGA10",
8fab806a 100 "VEGA12",
956fcddc 101 "VEGA20",
2ca8a5d2 102 "RAVEN",
d6c3b24e 103 "ARCTURUS",
852a6626 104 "NAVI10",
87dbad02 105 "NAVI14",
9802f5d7 106 "NAVI12",
d38ceaf9
AD
107 "LAST",
108};
109
dcea6e65
KR
110/**
111 * DOC: pcie_replay_count
112 *
113 * The amdgpu driver provides a sysfs API for reporting the total number
114 * of PCIe replays (NAKs)
115 * The file pcie_replay_count is used for this and returns the total
116 * number of replays as a sum of the NAKs generated and NAKs received
117 */
118
119static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
120 struct device_attribute *attr, char *buf)
121{
122 struct drm_device *ddev = dev_get_drvdata(dev);
123 struct amdgpu_device *adev = ddev->dev_private;
124 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
125
126 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
127}
128
129static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
130 amdgpu_device_get_pcie_replay_count, NULL);
131
5494d864
AD
132static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
133
e3ecdffa
AD
134/**
135 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
136 *
137 * @dev: drm_device pointer
138 *
139 * Returns true if the device is a dGPU with HG/PX power control,
140 * otherwise return false.
141 */
d38ceaf9
AD
142bool amdgpu_device_is_px(struct drm_device *dev)
143{
144 struct amdgpu_device *adev = dev->dev_private;
145
2f7d10b3 146 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
147 return true;
148 return false;
149}
150
151/*
152 * MMIO register access helper functions.
153 */
e3ecdffa
AD
154/**
155 * amdgpu_mm_rreg - read a memory mapped IO register
156 *
157 * @adev: amdgpu_device pointer
158 * @reg: dword aligned register offset
159 * @acc_flags: access flags which require special behavior
160 *
161 * Returns the 32 bit value from the offset specified.
162 */
d38ceaf9 163uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 164 uint32_t acc_flags)
d38ceaf9 165{
f4b373f4
TSD
166 uint32_t ret;
167
43ca8efa 168 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 169 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 170
15d72fd7 171 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 172 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
173 else {
174 unsigned long flags;
d38ceaf9
AD
175
176 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
177 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
178 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
179 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 180 }
f4b373f4
TSD
181 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
182 return ret;
d38ceaf9
AD
183}
184
421a2a30
ML
185/*
186 * MMIO register read with bytes helper functions
187 * @offset:bytes offset from MMIO start
188 *
189*/
190
e3ecdffa
AD
191/**
192 * amdgpu_mm_rreg8 - read a memory mapped IO register
193 *
194 * @adev: amdgpu_device pointer
195 * @offset: byte aligned register offset
196 *
197 * Returns the 8 bit value from the offset specified.
198 */
421a2a30
ML
199uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
200 if (offset < adev->rmmio_size)
201 return (readb(adev->rmmio + offset));
202 BUG();
203}
204
205/*
206 * MMIO register write with bytes helper functions
207 * @offset:bytes offset from MMIO start
208 * @value: the value want to be written to the register
209 *
210*/
e3ecdffa
AD
211/**
212 * amdgpu_mm_wreg8 - read a memory mapped IO register
213 *
214 * @adev: amdgpu_device pointer
215 * @offset: byte aligned register offset
216 * @value: 8 bit value to write
217 *
218 * Writes the value specified to the offset specified.
219 */
421a2a30
ML
220void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
221 if (offset < adev->rmmio_size)
222 writeb(value, adev->rmmio + offset);
223 else
224 BUG();
225}
226
e3ecdffa
AD
227/**
228 * amdgpu_mm_wreg - write to a memory mapped IO register
229 *
230 * @adev: amdgpu_device pointer
231 * @reg: dword aligned register offset
232 * @v: 32 bit value to write to the register
233 * @acc_flags: access flags which require special behavior
234 *
235 * Writes the value specified to the offset specified.
236 */
d38ceaf9 237void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 238 uint32_t acc_flags)
d38ceaf9 239{
f4b373f4 240 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 241
47ed4e1c
KW
242 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
243 adev->last_mm_index = v;
244 }
245
43ca8efa 246 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 247 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 248
15d72fd7 249 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
250 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
251 else {
252 unsigned long flags;
253
254 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
255 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
256 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
257 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
258 }
47ed4e1c
KW
259
260 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
261 udelay(500);
262 }
d38ceaf9
AD
263}
264
4fa1c6a6
TZ
265/**
266 * amdgpu_mm_rreg64 - read a 64 bit memory mapped IO register
267 *
268 * @adev: amdgpu_device pointer
269 * @reg: dword aligned register offset
270 *
271 * Returns the 64 bit value from the offset specified.
272 */
273uint64_t amdgpu_mm_rreg64(struct amdgpu_device *adev, uint32_t reg)
274{
275 uint64_t ret;
276
277 if ((reg * 4) < adev->rmmio_size)
278 ret = readq(((void __iomem *)adev->rmmio) + (reg * 4));
279 else
280 BUG();
281
282 return ret;
283}
284
285/**
286 * amdgpu_mm_wreg64 - write to a 64 bit memory mapped IO register
287 *
288 * @adev: amdgpu_device pointer
289 * @reg: dword aligned register offset
290 * @v: 64 bit value to write to the register
291 *
292 * Writes the value specified to the offset specified.
293 */
294void amdgpu_mm_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
295{
296 if ((reg * 4) < adev->rmmio_size)
297 writeq(v, ((void __iomem *)adev->rmmio) + (reg * 4));
298 else
299 BUG();
300}
301
e3ecdffa
AD
302/**
303 * amdgpu_io_rreg - read an IO register
304 *
305 * @adev: amdgpu_device pointer
306 * @reg: dword aligned register offset
307 *
308 * Returns the 32 bit value from the offset specified.
309 */
d38ceaf9
AD
310u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
311{
312 if ((reg * 4) < adev->rio_mem_size)
313 return ioread32(adev->rio_mem + (reg * 4));
314 else {
315 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
316 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
317 }
318}
319
e3ecdffa
AD
320/**
321 * amdgpu_io_wreg - write to an IO register
322 *
323 * @adev: amdgpu_device pointer
324 * @reg: dword aligned register offset
325 * @v: 32 bit value to write to the register
326 *
327 * Writes the value specified to the offset specified.
328 */
d38ceaf9
AD
329void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
330{
47ed4e1c
KW
331 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
332 adev->last_mm_index = v;
333 }
d38ceaf9
AD
334
335 if ((reg * 4) < adev->rio_mem_size)
336 iowrite32(v, adev->rio_mem + (reg * 4));
337 else {
338 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
339 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
340 }
47ed4e1c
KW
341
342 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
343 udelay(500);
344 }
d38ceaf9
AD
345}
346
347/**
348 * amdgpu_mm_rdoorbell - read a doorbell dword
349 *
350 * @adev: amdgpu_device pointer
351 * @index: doorbell index
352 *
353 * Returns the value in the doorbell aperture at the
354 * requested doorbell index (CIK).
355 */
356u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
357{
358 if (index < adev->doorbell.num_doorbells) {
359 return readl(adev->doorbell.ptr + index);
360 } else {
361 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
362 return 0;
363 }
364}
365
366/**
367 * amdgpu_mm_wdoorbell - write a doorbell dword
368 *
369 * @adev: amdgpu_device pointer
370 * @index: doorbell index
371 * @v: value to write
372 *
373 * Writes @v to the doorbell aperture at the
374 * requested doorbell index (CIK).
375 */
376void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
377{
378 if (index < adev->doorbell.num_doorbells) {
379 writel(v, adev->doorbell.ptr + index);
380 } else {
381 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
382 }
383}
384
832be404
KW
385/**
386 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
387 *
388 * @adev: amdgpu_device pointer
389 * @index: doorbell index
390 *
391 * Returns the value in the doorbell aperture at the
392 * requested doorbell index (VEGA10+).
393 */
394u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
395{
396 if (index < adev->doorbell.num_doorbells) {
397 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
398 } else {
399 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
400 return 0;
401 }
402}
403
404/**
405 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
406 *
407 * @adev: amdgpu_device pointer
408 * @index: doorbell index
409 * @v: value to write
410 *
411 * Writes @v to the doorbell aperture at the
412 * requested doorbell index (VEGA10+).
413 */
414void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
415{
416 if (index < adev->doorbell.num_doorbells) {
417 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
418 } else {
419 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
420 }
421}
422
d38ceaf9
AD
423/**
424 * amdgpu_invalid_rreg - dummy reg read function
425 *
426 * @adev: amdgpu device pointer
427 * @reg: offset of register
428 *
429 * Dummy register read function. Used for register blocks
430 * that certain asics don't have (all asics).
431 * Returns the value in the register.
432 */
433static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
434{
435 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
436 BUG();
437 return 0;
438}
439
440/**
441 * amdgpu_invalid_wreg - dummy reg write function
442 *
443 * @adev: amdgpu device pointer
444 * @reg: offset of register
445 * @v: value to write to the register
446 *
447 * Dummy register read function. Used for register blocks
448 * that certain asics don't have (all asics).
449 */
450static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
451{
452 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
453 reg, v);
454 BUG();
455}
456
4fa1c6a6
TZ
457/**
458 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
459 *
460 * @adev: amdgpu device pointer
461 * @reg: offset of register
462 *
463 * Dummy register read function. Used for register blocks
464 * that certain asics don't have (all asics).
465 * Returns the value in the register.
466 */
467static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
468{
469 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
470 BUG();
471 return 0;
472}
473
474/**
475 * amdgpu_invalid_wreg64 - dummy reg write function
476 *
477 * @adev: amdgpu device pointer
478 * @reg: offset of register
479 * @v: value to write to the register
480 *
481 * Dummy register read function. Used for register blocks
482 * that certain asics don't have (all asics).
483 */
484static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
485{
486 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
487 reg, v);
488 BUG();
489}
490
d38ceaf9
AD
491/**
492 * amdgpu_block_invalid_rreg - dummy reg read function
493 *
494 * @adev: amdgpu device pointer
495 * @block: offset of instance
496 * @reg: offset of register
497 *
498 * Dummy register read function. Used for register blocks
499 * that certain asics don't have (all asics).
500 * Returns the value in the register.
501 */
502static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
503 uint32_t block, uint32_t reg)
504{
505 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
506 reg, block);
507 BUG();
508 return 0;
509}
510
511/**
512 * amdgpu_block_invalid_wreg - dummy reg write function
513 *
514 * @adev: amdgpu device pointer
515 * @block: offset of instance
516 * @reg: offset of register
517 * @v: value to write to the register
518 *
519 * Dummy register read function. Used for register blocks
520 * that certain asics don't have (all asics).
521 */
522static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
523 uint32_t block,
524 uint32_t reg, uint32_t v)
525{
526 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
527 reg, block, v);
528 BUG();
529}
530
e3ecdffa
AD
531/**
532 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
533 *
534 * @adev: amdgpu device pointer
535 *
536 * Allocates a scratch page of VRAM for use by various things in the
537 * driver.
538 */
06ec9070 539static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 540{
a4a02777
CK
541 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
542 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
543 &adev->vram_scratch.robj,
544 &adev->vram_scratch.gpu_addr,
545 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
546}
547
e3ecdffa
AD
548/**
549 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
550 *
551 * @adev: amdgpu device pointer
552 *
553 * Frees the VRAM scratch page.
554 */
06ec9070 555static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 556{
078af1a3 557 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
558}
559
560/**
9c3f2b54 561 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
562 *
563 * @adev: amdgpu_device pointer
564 * @registers: pointer to the register array
565 * @array_size: size of the register array
566 *
567 * Programs an array or registers with and and or masks.
568 * This is a helper for setting golden registers.
569 */
9c3f2b54
AD
570void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
571 const u32 *registers,
572 const u32 array_size)
d38ceaf9
AD
573{
574 u32 tmp, reg, and_mask, or_mask;
575 int i;
576
577 if (array_size % 3)
578 return;
579
580 for (i = 0; i < array_size; i +=3) {
581 reg = registers[i + 0];
582 and_mask = registers[i + 1];
583 or_mask = registers[i + 2];
584
585 if (and_mask == 0xffffffff) {
586 tmp = or_mask;
587 } else {
588 tmp = RREG32(reg);
589 tmp &= ~and_mask;
e0d07657
HZ
590 if (adev->family >= AMDGPU_FAMILY_AI)
591 tmp |= (or_mask & and_mask);
592 else
593 tmp |= or_mask;
d38ceaf9
AD
594 }
595 WREG32(reg, tmp);
596 }
597}
598
e3ecdffa
AD
599/**
600 * amdgpu_device_pci_config_reset - reset the GPU
601 *
602 * @adev: amdgpu_device pointer
603 *
604 * Resets the GPU using the pci config reset sequence.
605 * Only applicable to asics prior to vega10.
606 */
8111c387 607void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
608{
609 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
610}
611
612/*
613 * GPU doorbell aperture helpers function.
614 */
615/**
06ec9070 616 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
617 *
618 * @adev: amdgpu_device pointer
619 *
620 * Init doorbell driver information (CIK)
621 * Returns 0 on success, error on failure.
622 */
06ec9070 623static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 624{
6585661d 625
705e519e
CK
626 /* No doorbell on SI hardware generation */
627 if (adev->asic_type < CHIP_BONAIRE) {
628 adev->doorbell.base = 0;
629 adev->doorbell.size = 0;
630 adev->doorbell.num_doorbells = 0;
631 adev->doorbell.ptr = NULL;
632 return 0;
633 }
634
d6895ad3
CK
635 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
636 return -EINVAL;
637
22357775
AD
638 amdgpu_asic_init_doorbell_index(adev);
639
d38ceaf9
AD
640 /* doorbell bar mapping */
641 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
642 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
643
edf600da 644 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 645 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
646 if (adev->doorbell.num_doorbells == 0)
647 return -EINVAL;
648
ec3db8a6 649 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
650 * paging queue doorbell use the second page. The
651 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
652 * doorbells are in the first page. So with paging queue enabled,
653 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
654 */
655 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 656 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 657
8972e5d2
CK
658 adev->doorbell.ptr = ioremap(adev->doorbell.base,
659 adev->doorbell.num_doorbells *
660 sizeof(u32));
661 if (adev->doorbell.ptr == NULL)
d38ceaf9 662 return -ENOMEM;
d38ceaf9
AD
663
664 return 0;
665}
666
667/**
06ec9070 668 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
669 *
670 * @adev: amdgpu_device pointer
671 *
672 * Tear down doorbell driver information (CIK)
673 */
06ec9070 674static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
675{
676 iounmap(adev->doorbell.ptr);
677 adev->doorbell.ptr = NULL;
678}
679
22cb0164 680
d38ceaf9
AD
681
682/*
06ec9070 683 * amdgpu_device_wb_*()
455a7bc2 684 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 685 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
686 */
687
688/**
06ec9070 689 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
690 *
691 * @adev: amdgpu_device pointer
692 *
693 * Disables Writeback and frees the Writeback memory (all asics).
694 * Used at driver shutdown.
695 */
06ec9070 696static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
697{
698 if (adev->wb.wb_obj) {
a76ed485
AD
699 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
700 &adev->wb.gpu_addr,
701 (void **)&adev->wb.wb);
d38ceaf9
AD
702 adev->wb.wb_obj = NULL;
703 }
704}
705
706/**
06ec9070 707 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
708 *
709 * @adev: amdgpu_device pointer
710 *
455a7bc2 711 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
712 * Used at driver startup.
713 * Returns 0 on success or an -error on failure.
714 */
06ec9070 715static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
716{
717 int r;
718
719 if (adev->wb.wb_obj == NULL) {
97407b63
AD
720 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
721 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
722 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
723 &adev->wb.wb_obj, &adev->wb.gpu_addr,
724 (void **)&adev->wb.wb);
d38ceaf9
AD
725 if (r) {
726 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
727 return r;
728 }
d38ceaf9
AD
729
730 adev->wb.num_wb = AMDGPU_MAX_WB;
731 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
732
733 /* clear wb memory */
73469585 734 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
735 }
736
737 return 0;
738}
739
740/**
131b4b36 741 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
742 *
743 * @adev: amdgpu_device pointer
744 * @wb: wb index
745 *
746 * Allocate a wb slot for use by the driver (all asics).
747 * Returns 0 on success or -EINVAL on failure.
748 */
131b4b36 749int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
750{
751 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 752
97407b63 753 if (offset < adev->wb.num_wb) {
7014285a 754 __set_bit(offset, adev->wb.used);
63ae07ca 755 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
756 return 0;
757 } else {
758 return -EINVAL;
759 }
760}
761
d38ceaf9 762/**
131b4b36 763 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
764 *
765 * @adev: amdgpu_device pointer
766 * @wb: wb index
767 *
768 * Free a wb slot allocated for use by the driver (all asics)
769 */
131b4b36 770void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 771{
73469585 772 wb >>= 3;
d38ceaf9 773 if (wb < adev->wb.num_wb)
73469585 774 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
775}
776
d6895ad3
CK
777/**
778 * amdgpu_device_resize_fb_bar - try to resize FB BAR
779 *
780 * @adev: amdgpu_device pointer
781 *
782 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
783 * to fail, but if any of the BARs is not accessible after the size we abort
784 * driver loading by returning -ENODEV.
785 */
786int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
787{
770d13b1 788 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 789 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
790 struct pci_bus *root;
791 struct resource *res;
792 unsigned i;
d6895ad3
CK
793 u16 cmd;
794 int r;
795
0c03b912 796 /* Bypass for VF */
797 if (amdgpu_sriov_vf(adev))
798 return 0;
799
31b8adab
CK
800 /* Check if the root BUS has 64bit memory resources */
801 root = adev->pdev->bus;
802 while (root->parent)
803 root = root->parent;
804
805 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 806 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
807 res->start > 0x100000000ull)
808 break;
809 }
810
811 /* Trying to resize is pointless without a root hub window above 4GB */
812 if (!res)
813 return 0;
814
d6895ad3
CK
815 /* Disable memory decoding while we change the BAR addresses and size */
816 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
817 pci_write_config_word(adev->pdev, PCI_COMMAND,
818 cmd & ~PCI_COMMAND_MEMORY);
819
820 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 821 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
822 if (adev->asic_type >= CHIP_BONAIRE)
823 pci_release_resource(adev->pdev, 2);
824
825 pci_release_resource(adev->pdev, 0);
826
827 r = pci_resize_resource(adev->pdev, 0, rbar_size);
828 if (r == -ENOSPC)
829 DRM_INFO("Not enough PCI address space for a large BAR.");
830 else if (r && r != -ENOTSUPP)
831 DRM_ERROR("Problem resizing BAR0 (%d).", r);
832
833 pci_assign_unassigned_bus_resources(adev->pdev->bus);
834
835 /* When the doorbell or fb BAR isn't available we have no chance of
836 * using the device.
837 */
06ec9070 838 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
839 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
840 return -ENODEV;
841
842 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
843
844 return 0;
845}
a05502e5 846
d38ceaf9
AD
847/*
848 * GPU helpers function.
849 */
850/**
39c640c0 851 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
852 *
853 * @adev: amdgpu_device pointer
854 *
c836fec5
JQ
855 * Check if the asic has been initialized (all asics) at driver startup
856 * or post is needed if hw reset is performed.
857 * Returns true if need or false if not.
d38ceaf9 858 */
39c640c0 859bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
860{
861 uint32_t reg;
862
bec86378
ML
863 if (amdgpu_sriov_vf(adev))
864 return false;
865
866 if (amdgpu_passthrough(adev)) {
1da2c326
ML
867 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
868 * some old smc fw still need driver do vPost otherwise gpu hang, while
869 * those smc fw version above 22.15 doesn't have this flaw, so we force
870 * vpost executed for smc version below 22.15
bec86378
ML
871 */
872 if (adev->asic_type == CHIP_FIJI) {
873 int err;
874 uint32_t fw_ver;
875 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
876 /* force vPost if error occured */
877 if (err)
878 return true;
879
880 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
881 if (fw_ver < 0x00160e00)
882 return true;
bec86378 883 }
bec86378 884 }
91fe77eb 885
886 if (adev->has_hw_reset) {
887 adev->has_hw_reset = false;
888 return true;
889 }
890
891 /* bios scratch used on CIK+ */
892 if (adev->asic_type >= CHIP_BONAIRE)
893 return amdgpu_atombios_scratch_need_asic_init(adev);
894
895 /* check MEM_SIZE for older asics */
896 reg = amdgpu_asic_get_config_memsize(adev);
897
898 if ((reg != 0) && (reg != 0xffffffff))
899 return false;
900
901 return true;
bec86378
ML
902}
903
d38ceaf9
AD
904/* if we get transitioned to only one device, take VGA back */
905/**
06ec9070 906 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
907 *
908 * @cookie: amdgpu_device pointer
909 * @state: enable/disable vga decode
910 *
911 * Enable/disable vga decode (all asics).
912 * Returns VGA resource flags.
913 */
06ec9070 914static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
915{
916 struct amdgpu_device *adev = cookie;
917 amdgpu_asic_set_vga_state(adev, state);
918 if (state)
919 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
920 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
921 else
922 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
923}
924
e3ecdffa
AD
925/**
926 * amdgpu_device_check_block_size - validate the vm block size
927 *
928 * @adev: amdgpu_device pointer
929 *
930 * Validates the vm block size specified via module parameter.
931 * The vm block size defines number of bits in page table versus page directory,
932 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
933 * page table and the remaining bits are in the page directory.
934 */
06ec9070 935static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
936{
937 /* defines number of bits in page table versus page directory,
938 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
939 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
940 if (amdgpu_vm_block_size == -1)
941 return;
a1adf8be 942
bab4fee7 943 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
944 dev_warn(adev->dev, "VM page table size (%d) too small\n",
945 amdgpu_vm_block_size);
97489129 946 amdgpu_vm_block_size = -1;
a1adf8be 947 }
a1adf8be
CZ
948}
949
e3ecdffa
AD
950/**
951 * amdgpu_device_check_vm_size - validate the vm size
952 *
953 * @adev: amdgpu_device pointer
954 *
955 * Validates the vm size in GB specified via module parameter.
956 * The VM size is the size of the GPU virtual memory space in GB.
957 */
06ec9070 958static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 959{
64dab074
AD
960 /* no need to check the default value */
961 if (amdgpu_vm_size == -1)
962 return;
963
83ca145d
ZJ
964 if (amdgpu_vm_size < 1) {
965 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
966 amdgpu_vm_size);
f3368128 967 amdgpu_vm_size = -1;
83ca145d 968 }
83ca145d
ZJ
969}
970
7951e376
RZ
971static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
972{
973 struct sysinfo si;
974 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
975 uint64_t total_memory;
976 uint64_t dram_size_seven_GB = 0x1B8000000;
977 uint64_t dram_size_three_GB = 0xB8000000;
978
979 if (amdgpu_smu_memory_pool_size == 0)
980 return;
981
982 if (!is_os_64) {
983 DRM_WARN("Not 64-bit OS, feature not supported\n");
984 goto def_value;
985 }
986 si_meminfo(&si);
987 total_memory = (uint64_t)si.totalram * si.mem_unit;
988
989 if ((amdgpu_smu_memory_pool_size == 1) ||
990 (amdgpu_smu_memory_pool_size == 2)) {
991 if (total_memory < dram_size_three_GB)
992 goto def_value1;
993 } else if ((amdgpu_smu_memory_pool_size == 4) ||
994 (amdgpu_smu_memory_pool_size == 8)) {
995 if (total_memory < dram_size_seven_GB)
996 goto def_value1;
997 } else {
998 DRM_WARN("Smu memory pool size not supported\n");
999 goto def_value;
1000 }
1001 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1002
1003 return;
1004
1005def_value1:
1006 DRM_WARN("No enough system memory\n");
1007def_value:
1008 adev->pm.smu_prv_buffer_size = 0;
1009}
1010
d38ceaf9 1011/**
06ec9070 1012 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1013 *
1014 * @adev: amdgpu_device pointer
1015 *
1016 * Validates certain module parameters and updates
1017 * the associated values used by the driver (all asics).
1018 */
912dfc84 1019static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1020{
912dfc84
EQ
1021 int ret = 0;
1022
5b011235
CZ
1023 if (amdgpu_sched_jobs < 4) {
1024 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1025 amdgpu_sched_jobs);
1026 amdgpu_sched_jobs = 4;
76117507 1027 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1028 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1029 amdgpu_sched_jobs);
1030 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1031 }
d38ceaf9 1032
83e74db6 1033 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1034 /* gart size must be greater or equal to 32M */
1035 dev_warn(adev->dev, "gart size (%d) too small\n",
1036 amdgpu_gart_size);
83e74db6 1037 amdgpu_gart_size = -1;
d38ceaf9
AD
1038 }
1039
36d38372 1040 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1041 /* gtt size must be greater or equal to 32M */
36d38372
CK
1042 dev_warn(adev->dev, "gtt size (%d) too small\n",
1043 amdgpu_gtt_size);
1044 amdgpu_gtt_size = -1;
d38ceaf9
AD
1045 }
1046
d07f14be
RH
1047 /* valid range is between 4 and 9 inclusive */
1048 if (amdgpu_vm_fragment_size != -1 &&
1049 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1050 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1051 amdgpu_vm_fragment_size = -1;
1052 }
1053
7951e376
RZ
1054 amdgpu_device_check_smu_prv_buffer_size(adev);
1055
06ec9070 1056 amdgpu_device_check_vm_size(adev);
d38ceaf9 1057
06ec9070 1058 amdgpu_device_check_block_size(adev);
6a7f76e7 1059
912dfc84
EQ
1060 ret = amdgpu_device_get_job_timeout_settings(adev);
1061 if (ret) {
1062 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
1063 return ret;
8854695a 1064 }
19aede77
AD
1065
1066 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1067
1068 return ret;
d38ceaf9
AD
1069}
1070
1071/**
1072 * amdgpu_switcheroo_set_state - set switcheroo state
1073 *
1074 * @pdev: pci dev pointer
1694467b 1075 * @state: vga_switcheroo state
d38ceaf9
AD
1076 *
1077 * Callback for the switcheroo driver. Suspends or resumes the
1078 * the asics before or after it is powered up using ACPI methods.
1079 */
1080static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1081{
1082 struct drm_device *dev = pci_get_drvdata(pdev);
1083
1084 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1085 return;
1086
1087 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1088 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1089 /* don't suspend or resume card normally */
1090 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1091
810ddc3a 1092 amdgpu_device_resume(dev, true, true);
d38ceaf9 1093
d38ceaf9
AD
1094 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1095 drm_kms_helper_poll_enable(dev);
1096 } else {
7ca85295 1097 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1098 drm_kms_helper_poll_disable(dev);
1099 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1100 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1101 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1102 }
1103}
1104
1105/**
1106 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1107 *
1108 * @pdev: pci dev pointer
1109 *
1110 * Callback for the switcheroo driver. Check of the switcheroo
1111 * state can be changed.
1112 * Returns true if the state can be changed, false if not.
1113 */
1114static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1115{
1116 struct drm_device *dev = pci_get_drvdata(pdev);
1117
1118 /*
1119 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1120 * locking inversion with the driver load path. And the access here is
1121 * completely racy anyway. So don't bother with locking for now.
1122 */
1123 return dev->open_count == 0;
1124}
1125
1126static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1127 .set_gpu_state = amdgpu_switcheroo_set_state,
1128 .reprobe = NULL,
1129 .can_switch = amdgpu_switcheroo_can_switch,
1130};
1131
e3ecdffa
AD
1132/**
1133 * amdgpu_device_ip_set_clockgating_state - set the CG state
1134 *
87e3f136 1135 * @dev: amdgpu_device pointer
e3ecdffa
AD
1136 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1137 * @state: clockgating state (gate or ungate)
1138 *
1139 * Sets the requested clockgating state for all instances of
1140 * the hardware IP specified.
1141 * Returns the error code from the last instance.
1142 */
43fa561f 1143int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1144 enum amd_ip_block_type block_type,
1145 enum amd_clockgating_state state)
d38ceaf9 1146{
43fa561f 1147 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1148 int i, r = 0;
1149
1150 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1151 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1152 continue;
c722865a
RZ
1153 if (adev->ip_blocks[i].version->type != block_type)
1154 continue;
1155 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1156 continue;
1157 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1158 (void *)adev, state);
1159 if (r)
1160 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1161 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1162 }
1163 return r;
1164}
1165
e3ecdffa
AD
1166/**
1167 * amdgpu_device_ip_set_powergating_state - set the PG state
1168 *
87e3f136 1169 * @dev: amdgpu_device pointer
e3ecdffa
AD
1170 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1171 * @state: powergating state (gate or ungate)
1172 *
1173 * Sets the requested powergating state for all instances of
1174 * the hardware IP specified.
1175 * Returns the error code from the last instance.
1176 */
43fa561f 1177int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1178 enum amd_ip_block_type block_type,
1179 enum amd_powergating_state state)
d38ceaf9 1180{
43fa561f 1181 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1182 int i, r = 0;
1183
1184 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1185 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1186 continue;
c722865a
RZ
1187 if (adev->ip_blocks[i].version->type != block_type)
1188 continue;
1189 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1190 continue;
1191 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1192 (void *)adev, state);
1193 if (r)
1194 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1195 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1196 }
1197 return r;
1198}
1199
e3ecdffa
AD
1200/**
1201 * amdgpu_device_ip_get_clockgating_state - get the CG state
1202 *
1203 * @adev: amdgpu_device pointer
1204 * @flags: clockgating feature flags
1205 *
1206 * Walks the list of IPs on the device and updates the clockgating
1207 * flags for each IP.
1208 * Updates @flags with the feature flags for each hardware IP where
1209 * clockgating is enabled.
1210 */
2990a1fc
AD
1211void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1212 u32 *flags)
6cb2d4e4
HR
1213{
1214 int i;
1215
1216 for (i = 0; i < adev->num_ip_blocks; i++) {
1217 if (!adev->ip_blocks[i].status.valid)
1218 continue;
1219 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1220 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1221 }
1222}
1223
e3ecdffa
AD
1224/**
1225 * amdgpu_device_ip_wait_for_idle - wait for idle
1226 *
1227 * @adev: amdgpu_device pointer
1228 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1229 *
1230 * Waits for the request hardware IP to be idle.
1231 * Returns 0 for success or a negative error code on failure.
1232 */
2990a1fc
AD
1233int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1234 enum amd_ip_block_type block_type)
5dbbb60b
AD
1235{
1236 int i, r;
1237
1238 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1239 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1240 continue;
a1255107
AD
1241 if (adev->ip_blocks[i].version->type == block_type) {
1242 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1243 if (r)
1244 return r;
1245 break;
1246 }
1247 }
1248 return 0;
1249
1250}
1251
e3ecdffa
AD
1252/**
1253 * amdgpu_device_ip_is_idle - is the hardware IP idle
1254 *
1255 * @adev: amdgpu_device pointer
1256 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1257 *
1258 * Check if the hardware IP is idle or not.
1259 * Returns true if it the IP is idle, false if not.
1260 */
2990a1fc
AD
1261bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1262 enum amd_ip_block_type block_type)
5dbbb60b
AD
1263{
1264 int i;
1265
1266 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1267 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1268 continue;
a1255107
AD
1269 if (adev->ip_blocks[i].version->type == block_type)
1270 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1271 }
1272 return true;
1273
1274}
1275
e3ecdffa
AD
1276/**
1277 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1278 *
1279 * @adev: amdgpu_device pointer
87e3f136 1280 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1281 *
1282 * Returns a pointer to the hardware IP block structure
1283 * if it exists for the asic, otherwise NULL.
1284 */
2990a1fc
AD
1285struct amdgpu_ip_block *
1286amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1287 enum amd_ip_block_type type)
d38ceaf9
AD
1288{
1289 int i;
1290
1291 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1292 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1293 return &adev->ip_blocks[i];
1294
1295 return NULL;
1296}
1297
1298/**
2990a1fc 1299 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1300 *
1301 * @adev: amdgpu_device pointer
5fc3aeeb 1302 * @type: enum amd_ip_block_type
d38ceaf9
AD
1303 * @major: major version
1304 * @minor: minor version
1305 *
1306 * return 0 if equal or greater
1307 * return 1 if smaller or the ip_block doesn't exist
1308 */
2990a1fc
AD
1309int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1310 enum amd_ip_block_type type,
1311 u32 major, u32 minor)
d38ceaf9 1312{
2990a1fc 1313 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1314
a1255107
AD
1315 if (ip_block && ((ip_block->version->major > major) ||
1316 ((ip_block->version->major == major) &&
1317 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1318 return 0;
1319
1320 return 1;
1321}
1322
a1255107 1323/**
2990a1fc 1324 * amdgpu_device_ip_block_add
a1255107
AD
1325 *
1326 * @adev: amdgpu_device pointer
1327 * @ip_block_version: pointer to the IP to add
1328 *
1329 * Adds the IP block driver information to the collection of IPs
1330 * on the asic.
1331 */
2990a1fc
AD
1332int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1333 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1334{
1335 if (!ip_block_version)
1336 return -EINVAL;
1337
e966a725 1338 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1339 ip_block_version->funcs->name);
1340
a1255107
AD
1341 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1342
1343 return 0;
1344}
1345
e3ecdffa
AD
1346/**
1347 * amdgpu_device_enable_virtual_display - enable virtual display feature
1348 *
1349 * @adev: amdgpu_device pointer
1350 *
1351 * Enabled the virtual display feature if the user has enabled it via
1352 * the module parameter virtual_display. This feature provides a virtual
1353 * display hardware on headless boards or in virtualized environments.
1354 * This function parses and validates the configuration string specified by
1355 * the user and configues the virtual display configuration (number of
1356 * virtual connectors, crtcs, etc.) specified.
1357 */
483ef985 1358static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1359{
1360 adev->enable_virtual_display = false;
1361
1362 if (amdgpu_virtual_display) {
1363 struct drm_device *ddev = adev->ddev;
1364 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1365 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1366
1367 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1368 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1369 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1370 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1371 if (!strcmp("all", pciaddname)
1372 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1373 long num_crtc;
1374 int res = -1;
1375
9accf2fd 1376 adev->enable_virtual_display = true;
0f66356d
ED
1377
1378 if (pciaddname_tmp)
1379 res = kstrtol(pciaddname_tmp, 10,
1380 &num_crtc);
1381
1382 if (!res) {
1383 if (num_crtc < 1)
1384 num_crtc = 1;
1385 if (num_crtc > 6)
1386 num_crtc = 6;
1387 adev->mode_info.num_crtc = num_crtc;
1388 } else {
1389 adev->mode_info.num_crtc = 1;
1390 }
9accf2fd
ED
1391 break;
1392 }
1393 }
1394
0f66356d
ED
1395 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1396 amdgpu_virtual_display, pci_address_name,
1397 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1398
1399 kfree(pciaddstr);
1400 }
1401}
1402
e3ecdffa
AD
1403/**
1404 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1405 *
1406 * @adev: amdgpu_device pointer
1407 *
1408 * Parses the asic configuration parameters specified in the gpu info
1409 * firmware and makes them availale to the driver for use in configuring
1410 * the asic.
1411 * Returns 0 on success, -EINVAL on failure.
1412 */
e2a75f88
AD
1413static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1414{
e2a75f88
AD
1415 const char *chip_name;
1416 char fw_name[30];
1417 int err;
1418 const struct gpu_info_firmware_header_v1_0 *hdr;
1419
ab4fe3e1
HR
1420 adev->firmware.gpu_info_fw = NULL;
1421
e2a75f88
AD
1422 switch (adev->asic_type) {
1423 case CHIP_TOPAZ:
1424 case CHIP_TONGA:
1425 case CHIP_FIJI:
e2a75f88 1426 case CHIP_POLARIS10:
cc07f18d 1427 case CHIP_POLARIS11:
e2a75f88 1428 case CHIP_POLARIS12:
cc07f18d 1429 case CHIP_VEGAM:
e2a75f88
AD
1430 case CHIP_CARRIZO:
1431 case CHIP_STONEY:
1432#ifdef CONFIG_DRM_AMDGPU_SI
1433 case CHIP_VERDE:
1434 case CHIP_TAHITI:
1435 case CHIP_PITCAIRN:
1436 case CHIP_OLAND:
1437 case CHIP_HAINAN:
1438#endif
1439#ifdef CONFIG_DRM_AMDGPU_CIK
1440 case CHIP_BONAIRE:
1441 case CHIP_HAWAII:
1442 case CHIP_KAVERI:
1443 case CHIP_KABINI:
1444 case CHIP_MULLINS:
1445#endif
27c0bc71 1446 case CHIP_VEGA20:
e2a75f88
AD
1447 default:
1448 return 0;
1449 case CHIP_VEGA10:
1450 chip_name = "vega10";
1451 break;
3f76dced
AD
1452 case CHIP_VEGA12:
1453 chip_name = "vega12";
1454 break;
2d2e5e7e 1455 case CHIP_RAVEN:
54c4d17e
FX
1456 if (adev->rev_id >= 8)
1457 chip_name = "raven2";
741deade
AD
1458 else if (adev->pdev->device == 0x15d8)
1459 chip_name = "picasso";
54c4d17e
FX
1460 else
1461 chip_name = "raven";
2d2e5e7e 1462 break;
65e60f6e
LM
1463 case CHIP_ARCTURUS:
1464 chip_name = "arcturus";
1465 break;
23c6268e
HR
1466 case CHIP_NAVI10:
1467 chip_name = "navi10";
1468 break;
ed42cfe1
XY
1469 case CHIP_NAVI14:
1470 chip_name = "navi14";
1471 break;
e2a75f88
AD
1472 }
1473
1474 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1475 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1476 if (err) {
1477 dev_err(adev->dev,
1478 "Failed to load gpu_info firmware \"%s\"\n",
1479 fw_name);
1480 goto out;
1481 }
ab4fe3e1 1482 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1483 if (err) {
1484 dev_err(adev->dev,
1485 "Failed to validate gpu_info firmware \"%s\"\n",
1486 fw_name);
1487 goto out;
1488 }
1489
ab4fe3e1 1490 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1491 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1492
1493 switch (hdr->version_major) {
1494 case 1:
1495 {
1496 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1497 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1498 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1499
b5ab16bf
AD
1500 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1501 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1502 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1503 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1504 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1505 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1506 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1507 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1508 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1509 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1510 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1511 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1512 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1513 adev->gfx.cu_info.max_waves_per_simd =
1514 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1515 adev->gfx.cu_info.max_scratch_slots_per_cu =
1516 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1517 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1518 if (hdr->version_minor >= 1) {
35c2e910
HZ
1519 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1520 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1521 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1522 adev->gfx.config.num_sc_per_sh =
1523 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1524 adev->gfx.config.num_packer_per_sc =
1525 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1526 }
48321c3d
HW
1527#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1528 if (hdr->version_minor == 2) {
1529 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1530 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1531 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1532 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1533 }
1534#endif
e2a75f88
AD
1535 break;
1536 }
1537 default:
1538 dev_err(adev->dev,
1539 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1540 err = -EINVAL;
1541 goto out;
1542 }
1543out:
e2a75f88
AD
1544 return err;
1545}
1546
e3ecdffa
AD
1547/**
1548 * amdgpu_device_ip_early_init - run early init for hardware IPs
1549 *
1550 * @adev: amdgpu_device pointer
1551 *
1552 * Early initialization pass for hardware IPs. The hardware IPs that make
1553 * up each asic are discovered each IP's early_init callback is run. This
1554 * is the first stage in initializing the asic.
1555 * Returns 0 on success, negative error code on failure.
1556 */
06ec9070 1557static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1558{
aaa36a97 1559 int i, r;
d38ceaf9 1560
483ef985 1561 amdgpu_device_enable_virtual_display(adev);
a6be7570 1562
d38ceaf9 1563 switch (adev->asic_type) {
aaa36a97
AD
1564 case CHIP_TOPAZ:
1565 case CHIP_TONGA:
48299f95 1566 case CHIP_FIJI:
2cc0c0b5 1567 case CHIP_POLARIS10:
32cc7e53 1568 case CHIP_POLARIS11:
c4642a47 1569 case CHIP_POLARIS12:
32cc7e53 1570 case CHIP_VEGAM:
aaa36a97 1571 case CHIP_CARRIZO:
39bb0c92
SL
1572 case CHIP_STONEY:
1573 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1574 adev->family = AMDGPU_FAMILY_CZ;
1575 else
1576 adev->family = AMDGPU_FAMILY_VI;
1577
1578 r = vi_set_ip_blocks(adev);
1579 if (r)
1580 return r;
1581 break;
33f34802
KW
1582#ifdef CONFIG_DRM_AMDGPU_SI
1583 case CHIP_VERDE:
1584 case CHIP_TAHITI:
1585 case CHIP_PITCAIRN:
1586 case CHIP_OLAND:
1587 case CHIP_HAINAN:
295d0daf 1588 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1589 r = si_set_ip_blocks(adev);
1590 if (r)
1591 return r;
1592 break;
1593#endif
a2e73f56
AD
1594#ifdef CONFIG_DRM_AMDGPU_CIK
1595 case CHIP_BONAIRE:
1596 case CHIP_HAWAII:
1597 case CHIP_KAVERI:
1598 case CHIP_KABINI:
1599 case CHIP_MULLINS:
1600 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1601 adev->family = AMDGPU_FAMILY_CI;
1602 else
1603 adev->family = AMDGPU_FAMILY_KV;
1604
1605 r = cik_set_ip_blocks(adev);
1606 if (r)
1607 return r;
1608 break;
1609#endif
e48a3cd9
AD
1610 case CHIP_VEGA10:
1611 case CHIP_VEGA12:
e4bd8170 1612 case CHIP_VEGA20:
e48a3cd9 1613 case CHIP_RAVEN:
61cf44c1 1614 case CHIP_ARCTURUS:
741deade 1615 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1616 adev->family = AMDGPU_FAMILY_RV;
1617 else
1618 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1619
1620 r = soc15_set_ip_blocks(adev);
1621 if (r)
1622 return r;
1623 break;
0a5b8c7b 1624 case CHIP_NAVI10:
7ecb5cd4 1625 case CHIP_NAVI14:
0a5b8c7b
HR
1626 adev->family = AMDGPU_FAMILY_NV;
1627
1628 r = nv_set_ip_blocks(adev);
1629 if (r)
1630 return r;
1631 break;
d38ceaf9
AD
1632 default:
1633 /* FIXME: not supported yet */
1634 return -EINVAL;
1635 }
1636
e2a75f88
AD
1637 r = amdgpu_device_parse_gpu_info_fw(adev);
1638 if (r)
1639 return r;
1640
1884734a 1641 amdgpu_amdkfd_device_probe(adev);
1642
3149d9da
XY
1643 if (amdgpu_sriov_vf(adev)) {
1644 r = amdgpu_virt_request_full_gpu(adev, true);
1645 if (r)
5ffa61c1 1646 return -EAGAIN;
3149d9da
XY
1647 }
1648
3b94fb10 1649 adev->pm.pp_feature = amdgpu_pp_feature_mask;
00544006
HR
1650 if (amdgpu_sriov_vf(adev))
1651 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1652
d38ceaf9
AD
1653 for (i = 0; i < adev->num_ip_blocks; i++) {
1654 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1655 DRM_ERROR("disabled ip block: %d <%s>\n",
1656 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1657 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1658 } else {
a1255107
AD
1659 if (adev->ip_blocks[i].version->funcs->early_init) {
1660 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1661 if (r == -ENOENT) {
a1255107 1662 adev->ip_blocks[i].status.valid = false;
2c1a2784 1663 } else if (r) {
a1255107
AD
1664 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1665 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1666 return r;
2c1a2784 1667 } else {
a1255107 1668 adev->ip_blocks[i].status.valid = true;
2c1a2784 1669 }
974e6b64 1670 } else {
a1255107 1671 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1672 }
d38ceaf9 1673 }
21a249ca
AD
1674 /* get the vbios after the asic_funcs are set up */
1675 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1676 /* Read BIOS */
1677 if (!amdgpu_get_bios(adev))
1678 return -EINVAL;
1679
1680 r = amdgpu_atombios_init(adev);
1681 if (r) {
1682 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1683 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1684 return r;
1685 }
1686 }
d38ceaf9
AD
1687 }
1688
395d1fb9
NH
1689 adev->cg_flags &= amdgpu_cg_mask;
1690 adev->pg_flags &= amdgpu_pg_mask;
1691
d38ceaf9
AD
1692 return 0;
1693}
1694
0a4f2520
RZ
1695static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1696{
1697 int i, r;
1698
1699 for (i = 0; i < adev->num_ip_blocks; i++) {
1700 if (!adev->ip_blocks[i].status.sw)
1701 continue;
1702 if (adev->ip_blocks[i].status.hw)
1703 continue;
1704 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1705 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1706 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1707 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1708 if (r) {
1709 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1710 adev->ip_blocks[i].version->funcs->name, r);
1711 return r;
1712 }
1713 adev->ip_blocks[i].status.hw = true;
1714 }
1715 }
1716
1717 return 0;
1718}
1719
1720static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1721{
1722 int i, r;
1723
1724 for (i = 0; i < adev->num_ip_blocks; i++) {
1725 if (!adev->ip_blocks[i].status.sw)
1726 continue;
1727 if (adev->ip_blocks[i].status.hw)
1728 continue;
1729 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1730 if (r) {
1731 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1732 adev->ip_blocks[i].version->funcs->name, r);
1733 return r;
1734 }
1735 adev->ip_blocks[i].status.hw = true;
1736 }
1737
1738 return 0;
1739}
1740
7a3e0bb2
RZ
1741static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1742{
1743 int r = 0;
1744 int i;
80f41f84 1745 uint32_t smu_version;
7a3e0bb2
RZ
1746
1747 if (adev->asic_type >= CHIP_VEGA10) {
1748 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1749 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1750 continue;
1751
1752 /* no need to do the fw loading again if already done*/
1753 if (adev->ip_blocks[i].status.hw == true)
1754 break;
1755
1756 if (adev->in_gpu_reset || adev->in_suspend) {
1757 r = adev->ip_blocks[i].version->funcs->resume(adev);
1758 if (r) {
1759 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1760 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1761 return r;
1762 }
1763 } else {
1764 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1765 if (r) {
1766 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1767 adev->ip_blocks[i].version->funcs->name, r);
1768 return r;
7a3e0bb2 1769 }
7a3e0bb2 1770 }
482f0e53
ML
1771
1772 adev->ip_blocks[i].status.hw = true;
1773 break;
7a3e0bb2
RZ
1774 }
1775 }
482f0e53 1776
80f41f84 1777 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1778
80f41f84 1779 return r;
7a3e0bb2
RZ
1780}
1781
e3ecdffa
AD
1782/**
1783 * amdgpu_device_ip_init - run init for hardware IPs
1784 *
1785 * @adev: amdgpu_device pointer
1786 *
1787 * Main initialization pass for hardware IPs. The list of all the hardware
1788 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1789 * are run. sw_init initializes the software state associated with each IP
1790 * and hw_init initializes the hardware associated with each IP.
1791 * Returns 0 on success, negative error code on failure.
1792 */
06ec9070 1793static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1794{
1795 int i, r;
1796
c030f2e4 1797 r = amdgpu_ras_init(adev);
1798 if (r)
1799 return r;
1800
d38ceaf9 1801 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1802 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1803 continue;
a1255107 1804 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1805 if (r) {
a1255107
AD
1806 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1807 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1808 goto init_failed;
2c1a2784 1809 }
a1255107 1810 adev->ip_blocks[i].status.sw = true;
bfca0289 1811
d38ceaf9 1812 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1813 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1814 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1815 if (r) {
1816 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1817 goto init_failed;
2c1a2784 1818 }
a1255107 1819 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1820 if (r) {
1821 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1822 goto init_failed;
2c1a2784 1823 }
06ec9070 1824 r = amdgpu_device_wb_init(adev);
2c1a2784 1825 if (r) {
06ec9070 1826 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1827 goto init_failed;
2c1a2784 1828 }
a1255107 1829 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1830
1831 /* right after GMC hw init, we create CSA */
f92d5c61 1832 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1833 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1834 AMDGPU_GEM_DOMAIN_VRAM,
1835 AMDGPU_CSA_SIZE);
2493664f
ML
1836 if (r) {
1837 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1838 goto init_failed;
2493664f
ML
1839 }
1840 }
d38ceaf9
AD
1841 }
1842 }
1843
533aed27
AG
1844 r = amdgpu_ib_pool_init(adev);
1845 if (r) {
1846 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1847 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1848 goto init_failed;
1849 }
1850
c8963ea4
RZ
1851 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1852 if (r)
72d3f592 1853 goto init_failed;
0a4f2520
RZ
1854
1855 r = amdgpu_device_ip_hw_init_phase1(adev);
1856 if (r)
72d3f592 1857 goto init_failed;
0a4f2520 1858
7a3e0bb2
RZ
1859 r = amdgpu_device_fw_loading(adev);
1860 if (r)
72d3f592 1861 goto init_failed;
7a3e0bb2 1862
0a4f2520
RZ
1863 r = amdgpu_device_ip_hw_init_phase2(adev);
1864 if (r)
72d3f592 1865 goto init_failed;
d38ceaf9 1866
3e2e2ab5
HZ
1867 if (adev->gmc.xgmi.num_physical_nodes > 1)
1868 amdgpu_xgmi_add_device(adev);
1884734a 1869 amdgpu_amdkfd_device_init(adev);
c6332b97 1870
72d3f592 1871init_failed:
d3c117e5 1872 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1873 if (!r)
1874 amdgpu_virt_init_data_exchange(adev);
c6332b97 1875 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1876 }
c6332b97 1877
72d3f592 1878 return r;
d38ceaf9
AD
1879}
1880
e3ecdffa
AD
1881/**
1882 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1883 *
1884 * @adev: amdgpu_device pointer
1885 *
1886 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1887 * this function before a GPU reset. If the value is retained after a
1888 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1889 */
06ec9070 1890static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1891{
1892 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1893}
1894
e3ecdffa
AD
1895/**
1896 * amdgpu_device_check_vram_lost - check if vram is valid
1897 *
1898 * @adev: amdgpu_device pointer
1899 *
1900 * Checks the reset magic value written to the gart pointer in VRAM.
1901 * The driver calls this after a GPU reset to see if the contents of
1902 * VRAM is lost or now.
1903 * returns true if vram is lost, false if not.
1904 */
06ec9070 1905static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1906{
1907 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1908 AMDGPU_RESET_MAGIC_NUM);
1909}
1910
e3ecdffa 1911/**
1112a46b 1912 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1913 *
1914 * @adev: amdgpu_device pointer
1915 *
e3ecdffa 1916 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1917 * set_clockgating_state callbacks are run.
1918 * Late initialization pass enabling clockgating for hardware IPs.
1919 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1920 * Returns 0 on success, negative error code on failure.
1921 */
fdd34271 1922
1112a46b
RZ
1923static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1924 enum amd_clockgating_state state)
d38ceaf9 1925{
1112a46b 1926 int i, j, r;
d38ceaf9 1927
4a2ba394
SL
1928 if (amdgpu_emu_mode == 1)
1929 return 0;
1930
1112a46b
RZ
1931 for (j = 0; j < adev->num_ip_blocks; j++) {
1932 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1933 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1934 continue;
4a446d55 1935 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1936 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1937 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1938 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1939 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1940 /* enable clockgating to save power */
a1255107 1941 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1942 state);
4a446d55
AD
1943 if (r) {
1944 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1945 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1946 return r;
1947 }
b0b00ff1 1948 }
d38ceaf9 1949 }
06b18f61 1950
c9f96fd5
RZ
1951 return 0;
1952}
1953
1112a46b 1954static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1955{
1112a46b 1956 int i, j, r;
06b18f61 1957
c9f96fd5
RZ
1958 if (amdgpu_emu_mode == 1)
1959 return 0;
1960
1112a46b
RZ
1961 for (j = 0; j < adev->num_ip_blocks; j++) {
1962 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1963 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1964 continue;
1965 /* skip CG for VCE/UVD, it's handled specially */
1966 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1967 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1968 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1969 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1970 /* enable powergating to save power */
1971 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1972 state);
c9f96fd5
RZ
1973 if (r) {
1974 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1975 adev->ip_blocks[i].version->funcs->name, r);
1976 return r;
1977 }
1978 }
1979 }
2dc80b00
S
1980 return 0;
1981}
1982
beff74bc
AD
1983static int amdgpu_device_enable_mgpu_fan_boost(void)
1984{
1985 struct amdgpu_gpu_instance *gpu_ins;
1986 struct amdgpu_device *adev;
1987 int i, ret = 0;
1988
1989 mutex_lock(&mgpu_info.mutex);
1990
1991 /*
1992 * MGPU fan boost feature should be enabled
1993 * only when there are two or more dGPUs in
1994 * the system
1995 */
1996 if (mgpu_info.num_dgpu < 2)
1997 goto out;
1998
1999 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2000 gpu_ins = &(mgpu_info.gpu_ins[i]);
2001 adev = gpu_ins->adev;
2002 if (!(adev->flags & AMD_IS_APU) &&
2003 !gpu_ins->mgpu_fan_enabled &&
2004 adev->powerplay.pp_funcs &&
2005 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2006 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2007 if (ret)
2008 break;
2009
2010 gpu_ins->mgpu_fan_enabled = 1;
2011 }
2012 }
2013
2014out:
2015 mutex_unlock(&mgpu_info.mutex);
2016
2017 return ret;
2018}
2019
e3ecdffa
AD
2020/**
2021 * amdgpu_device_ip_late_init - run late init for hardware IPs
2022 *
2023 * @adev: amdgpu_device pointer
2024 *
2025 * Late initialization pass for hardware IPs. The list of all the hardware
2026 * IPs that make up the asic is walked and the late_init callbacks are run.
2027 * late_init covers any special initialization that an IP requires
2028 * after all of the have been initialized or something that needs to happen
2029 * late in the init process.
2030 * Returns 0 on success, negative error code on failure.
2031 */
06ec9070 2032static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
2033{
2034 int i = 0, r;
2035
2036 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2037 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2038 continue;
2039 if (adev->ip_blocks[i].version->funcs->late_init) {
2040 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2041 if (r) {
2042 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2043 adev->ip_blocks[i].version->funcs->name, r);
2044 return r;
2045 }
2dc80b00 2046 }
73f847db 2047 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2048 }
2049
1112a46b
RZ
2050 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2051 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2052
06ec9070 2053 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2054
beff74bc
AD
2055 r = amdgpu_device_enable_mgpu_fan_boost();
2056 if (r)
2057 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2058
2059 /* set to low pstate by default */
2060 amdgpu_xgmi_set_pstate(adev, 0);
2061
d38ceaf9
AD
2062 return 0;
2063}
2064
e3ecdffa
AD
2065/**
2066 * amdgpu_device_ip_fini - run fini for hardware IPs
2067 *
2068 * @adev: amdgpu_device pointer
2069 *
2070 * Main teardown pass for hardware IPs. The list of all the hardware
2071 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2072 * are run. hw_fini tears down the hardware associated with each IP
2073 * and sw_fini tears down any software state associated with each IP.
2074 * Returns 0 on success, negative error code on failure.
2075 */
06ec9070 2076static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2077{
2078 int i, r;
2079
c030f2e4 2080 amdgpu_ras_pre_fini(adev);
2081
a82400b5
AG
2082 if (adev->gmc.xgmi.num_physical_nodes > 1)
2083 amdgpu_xgmi_remove_device(adev);
2084
1884734a 2085 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2086
2087 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2088 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2089
3e96dbfd
AD
2090 /* need to disable SMC first */
2091 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2092 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2093 continue;
fdd34271 2094 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2095 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2096 /* XXX handle errors */
2097 if (r) {
2098 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2099 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2100 }
a1255107 2101 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2102 break;
2103 }
2104 }
2105
d38ceaf9 2106 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2107 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2108 continue;
8201a67a 2109
a1255107 2110 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2111 /* XXX handle errors */
2c1a2784 2112 if (r) {
a1255107
AD
2113 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2114 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2115 }
8201a67a 2116
a1255107 2117 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2118 }
2119
9950cda2 2120
d38ceaf9 2121 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2122 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2123 continue;
c12aba3a
ML
2124
2125 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2126 amdgpu_ucode_free_bo(adev);
1e256e27 2127 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2128 amdgpu_device_wb_fini(adev);
2129 amdgpu_device_vram_scratch_fini(adev);
533aed27 2130 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2131 }
2132
a1255107 2133 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2134 /* XXX handle errors */
2c1a2784 2135 if (r) {
a1255107
AD
2136 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2137 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2138 }
a1255107
AD
2139 adev->ip_blocks[i].status.sw = false;
2140 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2141 }
2142
a6dcfd9c 2143 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2144 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2145 continue;
a1255107
AD
2146 if (adev->ip_blocks[i].version->funcs->late_fini)
2147 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2148 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2149 }
2150
c030f2e4 2151 amdgpu_ras_fini(adev);
2152
030308fc 2153 if (amdgpu_sriov_vf(adev))
24136135
ML
2154 if (amdgpu_virt_release_full_gpu(adev, false))
2155 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2156
d38ceaf9
AD
2157 return 0;
2158}
2159
e3ecdffa 2160/**
beff74bc 2161 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2162 *
1112a46b 2163 * @work: work_struct.
e3ecdffa 2164 */
beff74bc 2165static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2166{
2167 struct amdgpu_device *adev =
beff74bc 2168 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2169 int r;
2170
2171 r = amdgpu_ib_ring_tests(adev);
2172 if (r)
2173 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2174}
2175
1e317b99
RZ
2176static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2177{
2178 struct amdgpu_device *adev =
2179 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2180
2181 mutex_lock(&adev->gfx.gfx_off_mutex);
2182 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2183 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2184 adev->gfx.gfx_off_state = true;
2185 }
2186 mutex_unlock(&adev->gfx.gfx_off_mutex);
2187}
2188
e3ecdffa 2189/**
e7854a03 2190 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2191 *
2192 * @adev: amdgpu_device pointer
2193 *
2194 * Main suspend function for hardware IPs. The list of all the hardware
2195 * IPs that make up the asic is walked, clockgating is disabled and the
2196 * suspend callbacks are run. suspend puts the hardware and software state
2197 * in each IP into a state suitable for suspend.
2198 * Returns 0 on success, negative error code on failure.
2199 */
e7854a03
AD
2200static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2201{
2202 int i, r;
2203
05df1f01 2204 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2205 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2206
e7854a03
AD
2207 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2208 if (!adev->ip_blocks[i].status.valid)
2209 continue;
2210 /* displays are handled separately */
2211 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2212 /* XXX handle errors */
2213 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2214 /* XXX handle errors */
2215 if (r) {
2216 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2217 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2218 return r;
e7854a03 2219 }
482f0e53 2220 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2221 }
2222 }
2223
e7854a03
AD
2224 return 0;
2225}
2226
2227/**
2228 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2229 *
2230 * @adev: amdgpu_device pointer
2231 *
2232 * Main suspend function for hardware IPs. The list of all the hardware
2233 * IPs that make up the asic is walked, clockgating is disabled and the
2234 * suspend callbacks are run. suspend puts the hardware and software state
2235 * in each IP into a state suitable for suspend.
2236 * Returns 0 on success, negative error code on failure.
2237 */
2238static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2239{
2240 int i, r;
2241
2242 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2243 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2244 continue;
e7854a03
AD
2245 /* displays are handled in phase1 */
2246 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2247 continue;
d38ceaf9 2248 /* XXX handle errors */
a1255107 2249 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2250 /* XXX handle errors */
2c1a2784 2251 if (r) {
a1255107
AD
2252 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2253 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2254 }
a3a09142
AD
2255 /* handle putting the SMC in the appropriate state */
2256 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2257 if (is_support_sw_smu(adev)) {
2258 /* todo */
2259 } else if (adev->powerplay.pp_funcs &&
482f0e53 2260 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2261 r = adev->powerplay.pp_funcs->set_mp1_state(
2262 adev->powerplay.pp_handle,
2263 adev->mp1_state);
2264 if (r) {
2265 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2266 adev->mp1_state, r);
482f0e53 2267 return r;
a3a09142 2268 }
482f0e53 2269 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2270 }
2271 }
d38ceaf9
AD
2272 }
2273
2274 return 0;
2275}
2276
e7854a03
AD
2277/**
2278 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2279 *
2280 * @adev: amdgpu_device pointer
2281 *
2282 * Main suspend function for hardware IPs. The list of all the hardware
2283 * IPs that make up the asic is walked, clockgating is disabled and the
2284 * suspend callbacks are run. suspend puts the hardware and software state
2285 * in each IP into a state suitable for suspend.
2286 * Returns 0 on success, negative error code on failure.
2287 */
2288int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2289{
2290 int r;
2291
e7819644
YT
2292 if (amdgpu_sriov_vf(adev))
2293 amdgpu_virt_request_full_gpu(adev, false);
2294
e7854a03
AD
2295 r = amdgpu_device_ip_suspend_phase1(adev);
2296 if (r)
2297 return r;
2298 r = amdgpu_device_ip_suspend_phase2(adev);
2299
e7819644
YT
2300 if (amdgpu_sriov_vf(adev))
2301 amdgpu_virt_release_full_gpu(adev, false);
2302
e7854a03
AD
2303 return r;
2304}
2305
06ec9070 2306static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2307{
2308 int i, r;
2309
2cb681b6
ML
2310 static enum amd_ip_block_type ip_order[] = {
2311 AMD_IP_BLOCK_TYPE_GMC,
2312 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2313 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2314 AMD_IP_BLOCK_TYPE_IH,
2315 };
a90ad3c2 2316
2cb681b6
ML
2317 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2318 int j;
2319 struct amdgpu_ip_block *block;
a90ad3c2 2320
2cb681b6
ML
2321 for (j = 0; j < adev->num_ip_blocks; j++) {
2322 block = &adev->ip_blocks[j];
2323
482f0e53 2324 block->status.hw = false;
2cb681b6
ML
2325 if (block->version->type != ip_order[i] ||
2326 !block->status.valid)
2327 continue;
2328
2329 r = block->version->funcs->hw_init(adev);
0aaeefcc 2330 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2331 if (r)
2332 return r;
482f0e53 2333 block->status.hw = true;
a90ad3c2
ML
2334 }
2335 }
2336
2337 return 0;
2338}
2339
06ec9070 2340static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2341{
2342 int i, r;
2343
2cb681b6
ML
2344 static enum amd_ip_block_type ip_order[] = {
2345 AMD_IP_BLOCK_TYPE_SMC,
2346 AMD_IP_BLOCK_TYPE_DCE,
2347 AMD_IP_BLOCK_TYPE_GFX,
2348 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2349 AMD_IP_BLOCK_TYPE_UVD,
2350 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2351 };
a90ad3c2 2352
2cb681b6
ML
2353 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2354 int j;
2355 struct amdgpu_ip_block *block;
a90ad3c2 2356
2cb681b6
ML
2357 for (j = 0; j < adev->num_ip_blocks; j++) {
2358 block = &adev->ip_blocks[j];
2359
2360 if (block->version->type != ip_order[i] ||
482f0e53
ML
2361 !block->status.valid ||
2362 block->status.hw)
2cb681b6
ML
2363 continue;
2364
2365 r = block->version->funcs->hw_init(adev);
0aaeefcc 2366 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2367 if (r)
2368 return r;
482f0e53 2369 block->status.hw = true;
a90ad3c2
ML
2370 }
2371 }
2372
2373 return 0;
2374}
2375
e3ecdffa
AD
2376/**
2377 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2378 *
2379 * @adev: amdgpu_device pointer
2380 *
2381 * First resume function for hardware IPs. The list of all the hardware
2382 * IPs that make up the asic is walked and the resume callbacks are run for
2383 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2384 * after a suspend and updates the software state as necessary. This
2385 * function is also used for restoring the GPU after a GPU reset.
2386 * Returns 0 on success, negative error code on failure.
2387 */
06ec9070 2388static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2389{
2390 int i, r;
2391
a90ad3c2 2392 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2393 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2394 continue;
a90ad3c2 2395 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2396 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2397 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2398
fcf0649f
CZ
2399 r = adev->ip_blocks[i].version->funcs->resume(adev);
2400 if (r) {
2401 DRM_ERROR("resume of IP block <%s> failed %d\n",
2402 adev->ip_blocks[i].version->funcs->name, r);
2403 return r;
2404 }
482f0e53 2405 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2406 }
2407 }
2408
2409 return 0;
2410}
2411
e3ecdffa
AD
2412/**
2413 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2414 *
2415 * @adev: amdgpu_device pointer
2416 *
2417 * First resume function for hardware IPs. The list of all the hardware
2418 * IPs that make up the asic is walked and the resume callbacks are run for
2419 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2420 * functional state after a suspend and updates the software state as
2421 * necessary. This function is also used for restoring the GPU after a GPU
2422 * reset.
2423 * Returns 0 on success, negative error code on failure.
2424 */
06ec9070 2425static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2426{
2427 int i, r;
2428
2429 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2430 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2431 continue;
fcf0649f 2432 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2433 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2434 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2435 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2436 continue;
a1255107 2437 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2438 if (r) {
a1255107
AD
2439 DRM_ERROR("resume of IP block <%s> failed %d\n",
2440 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2441 return r;
2c1a2784 2442 }
482f0e53 2443 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2444 }
2445
2446 return 0;
2447}
2448
e3ecdffa
AD
2449/**
2450 * amdgpu_device_ip_resume - run resume for hardware IPs
2451 *
2452 * @adev: amdgpu_device pointer
2453 *
2454 * Main resume function for hardware IPs. The hardware IPs
2455 * are split into two resume functions because they are
2456 * are also used in in recovering from a GPU reset and some additional
2457 * steps need to be take between them. In this case (S3/S4) they are
2458 * run sequentially.
2459 * Returns 0 on success, negative error code on failure.
2460 */
06ec9070 2461static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2462{
2463 int r;
2464
06ec9070 2465 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2466 if (r)
2467 return r;
7a3e0bb2
RZ
2468
2469 r = amdgpu_device_fw_loading(adev);
2470 if (r)
2471 return r;
2472
06ec9070 2473 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2474
2475 return r;
2476}
2477
e3ecdffa
AD
2478/**
2479 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2480 *
2481 * @adev: amdgpu_device pointer
2482 *
2483 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2484 */
4e99a44e 2485static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2486{
6867e1b5
ML
2487 if (amdgpu_sriov_vf(adev)) {
2488 if (adev->is_atom_fw) {
2489 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2490 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2491 } else {
2492 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2493 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2494 }
2495
2496 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2497 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2498 }
048765ad
AR
2499}
2500
e3ecdffa
AD
2501/**
2502 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2503 *
2504 * @asic_type: AMD asic type
2505 *
2506 * Check if there is DC (new modesetting infrastructre) support for an asic.
2507 * returns true if DC has support, false if not.
2508 */
4562236b
HW
2509bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2510{
2511 switch (asic_type) {
2512#if defined(CONFIG_DRM_AMD_DC)
2513 case CHIP_BONAIRE:
0d6fbccb 2514 case CHIP_KAVERI:
367e6687
AD
2515 case CHIP_KABINI:
2516 case CHIP_MULLINS:
d9fda248
HW
2517 /*
2518 * We have systems in the wild with these ASICs that require
2519 * LVDS and VGA support which is not supported with DC.
2520 *
2521 * Fallback to the non-DC driver here by default so as not to
2522 * cause regressions.
2523 */
2524 return amdgpu_dc > 0;
2525 case CHIP_HAWAII:
4562236b
HW
2526 case CHIP_CARRIZO:
2527 case CHIP_STONEY:
4562236b 2528 case CHIP_POLARIS10:
675fd32b 2529 case CHIP_POLARIS11:
2c8ad2d5 2530 case CHIP_POLARIS12:
675fd32b 2531 case CHIP_VEGAM:
4562236b
HW
2532 case CHIP_TONGA:
2533 case CHIP_FIJI:
42f8ffa1 2534 case CHIP_VEGA10:
dca7b401 2535 case CHIP_VEGA12:
c6034aa2 2536 case CHIP_VEGA20:
dc37a9a0 2537#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2538 case CHIP_RAVEN:
b4f199c7
HW
2539#endif
2540#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2541 case CHIP_NAVI10:
8fceceb6 2542 case CHIP_NAVI14:
42f8ffa1 2543#endif
fd187853 2544 return amdgpu_dc != 0;
4562236b
HW
2545#endif
2546 default:
2547 return false;
2548 }
2549}
2550
2551/**
2552 * amdgpu_device_has_dc_support - check if dc is supported
2553 *
2554 * @adev: amdgpu_device_pointer
2555 *
2556 * Returns true for supported, false for not supported
2557 */
2558bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2559{
2555039d
XY
2560 if (amdgpu_sriov_vf(adev))
2561 return false;
2562
4562236b
HW
2563 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2564}
2565
d4535e2c
AG
2566
2567static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2568{
2569 struct amdgpu_device *adev =
2570 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2571
2572 adev->asic_reset_res = amdgpu_asic_reset(adev);
2573 if (adev->asic_reset_res)
fed184e9 2574 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2575 adev->asic_reset_res, adev->ddev->unique);
2576}
2577
2578
d38ceaf9
AD
2579/**
2580 * amdgpu_device_init - initialize the driver
2581 *
2582 * @adev: amdgpu_device pointer
87e3f136 2583 * @ddev: drm dev pointer
d38ceaf9
AD
2584 * @pdev: pci dev pointer
2585 * @flags: driver flags
2586 *
2587 * Initializes the driver info and hw (all asics).
2588 * Returns 0 for success or an error on failure.
2589 * Called at driver startup.
2590 */
2591int amdgpu_device_init(struct amdgpu_device *adev,
2592 struct drm_device *ddev,
2593 struct pci_dev *pdev,
2594 uint32_t flags)
2595{
2596 int r, i;
2597 bool runtime = false;
95844d20 2598 u32 max_MBps;
d38ceaf9
AD
2599
2600 adev->shutdown = false;
2601 adev->dev = &pdev->dev;
2602 adev->ddev = ddev;
2603 adev->pdev = pdev;
2604 adev->flags = flags;
2f7d10b3 2605 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2606 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2607 if (amdgpu_emu_mode == 1)
2608 adev->usec_timeout *= 2;
770d13b1 2609 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2610 adev->accel_working = false;
2611 adev->num_rings = 0;
2612 adev->mman.buffer_funcs = NULL;
2613 adev->mman.buffer_funcs_ring = NULL;
2614 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2615 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2616 adev->gmc.gmc_funcs = NULL;
f54d1867 2617 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2618 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2619
2620 adev->smc_rreg = &amdgpu_invalid_rreg;
2621 adev->smc_wreg = &amdgpu_invalid_wreg;
2622 adev->pcie_rreg = &amdgpu_invalid_rreg;
2623 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2624 adev->pciep_rreg = &amdgpu_invalid_rreg;
2625 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2626 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2627 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2628 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2629 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2630 adev->didt_rreg = &amdgpu_invalid_rreg;
2631 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2632 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2633 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2634 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2635 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2636
3e39ab90
AD
2637 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2638 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2639 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2640
2641 /* mutex initialization are all done here so we
2642 * can recall function without having locking issues */
d38ceaf9 2643 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2644 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2645 mutex_init(&adev->pm.mutex);
2646 mutex_init(&adev->gfx.gpu_clock_mutex);
2647 mutex_init(&adev->srbm_mutex);
b8866c26 2648 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2649 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2650 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2651 mutex_init(&adev->mn_lock);
e23b74aa 2652 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2653 hash_init(adev->mn_hash);
13a752e3 2654 mutex_init(&adev->lock_reset);
bb5a2bdf 2655 mutex_init(&adev->virt.dpm_mutex);
32eaeae0 2656 mutex_init(&adev->psp.mutex);
d38ceaf9 2657
912dfc84
EQ
2658 r = amdgpu_device_check_arguments(adev);
2659 if (r)
2660 return r;
d38ceaf9 2661
d38ceaf9
AD
2662 spin_lock_init(&adev->mmio_idx_lock);
2663 spin_lock_init(&adev->smc_idx_lock);
2664 spin_lock_init(&adev->pcie_idx_lock);
2665 spin_lock_init(&adev->uvd_ctx_idx_lock);
2666 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2667 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2668 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2669 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2670 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2671
0c4e7fa5
CZ
2672 INIT_LIST_HEAD(&adev->shadow_list);
2673 mutex_init(&adev->shadow_list_lock);
2674
795f2813
AR
2675 INIT_LIST_HEAD(&adev->ring_lru_list);
2676 spin_lock_init(&adev->ring_lru_list_lock);
2677
beff74bc
AD
2678 INIT_DELAYED_WORK(&adev->delayed_init_work,
2679 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2680 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2681 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2682
d4535e2c
AG
2683 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2684
d23ee13f 2685 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2686 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2687
0fa49558
AX
2688 /* Registers mapping */
2689 /* TODO: block userspace mapping of io register */
da69c161
KW
2690 if (adev->asic_type >= CHIP_BONAIRE) {
2691 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2692 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2693 } else {
2694 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2695 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2696 }
d38ceaf9 2697
d38ceaf9
AD
2698 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2699 if (adev->rmmio == NULL) {
2700 return -ENOMEM;
2701 }
2702 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2703 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2704
d38ceaf9
AD
2705 /* io port mapping */
2706 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2707 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2708 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2709 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2710 break;
2711 }
2712 }
2713 if (adev->rio_mem == NULL)
b64a18c5 2714 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2715
b2109d8e
JX
2716 /* enable PCIE atomic ops */
2717 r = pci_enable_atomic_ops_to_root(adev->pdev,
2718 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2719 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2720 if (r) {
2721 adev->have_atomics_support = false;
2722 DRM_INFO("PCIE atomic ops is not supported\n");
2723 } else {
2724 adev->have_atomics_support = true;
2725 }
2726
5494d864
AD
2727 amdgpu_device_get_pcie_info(adev);
2728
b239c017
JX
2729 if (amdgpu_mcbp)
2730 DRM_INFO("MCBP is enabled\n");
2731
5f84cc63
JX
2732 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2733 adev->enable_mes = true;
2734
f54eeab4 2735 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2736 r = amdgpu_discovery_init(adev);
2737 if (r) {
2738 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2739 return r;
2740 }
2741 }
2742
d38ceaf9 2743 /* early init functions */
06ec9070 2744 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2745 if (r)
2746 return r;
2747
6585661d
OZ
2748 /* doorbell bar mapping and doorbell index init*/
2749 amdgpu_device_doorbell_init(adev);
2750
d38ceaf9
AD
2751 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2752 /* this will fail for cards that aren't VGA class devices, just
2753 * ignore it */
06ec9070 2754 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2755
e9bef455 2756 if (amdgpu_device_is_px(ddev))
d38ceaf9 2757 runtime = true;
84c8b22e
LW
2758 if (!pci_is_thunderbolt_attached(adev->pdev))
2759 vga_switcheroo_register_client(adev->pdev,
2760 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2761 if (runtime)
2762 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2763
9475a943
SL
2764 if (amdgpu_emu_mode == 1) {
2765 /* post the asic on emulation mode */
2766 emu_soc_asic_init(adev);
bfca0289 2767 goto fence_driver_init;
9475a943 2768 }
bfca0289 2769
4e99a44e
ML
2770 /* detect if we are with an SRIOV vbios */
2771 amdgpu_device_detect_sriov_bios(adev);
048765ad 2772
95e8e59e
AD
2773 /* check if we need to reset the asic
2774 * E.g., driver was not cleanly unloaded previously, etc.
2775 */
f14899fd 2776 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2777 r = amdgpu_asic_reset(adev);
2778 if (r) {
2779 dev_err(adev->dev, "asic reset on init failed\n");
2780 goto failed;
2781 }
2782 }
2783
d38ceaf9 2784 /* Post card if necessary */
39c640c0 2785 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2786 if (!adev->bios) {
bec86378 2787 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2788 r = -EINVAL;
2789 goto failed;
d38ceaf9 2790 }
bec86378 2791 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2792 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2793 if (r) {
2794 dev_err(adev->dev, "gpu post error!\n");
2795 goto failed;
2796 }
d38ceaf9
AD
2797 }
2798
88b64e95
AD
2799 if (adev->is_atom_fw) {
2800 /* Initialize clocks */
2801 r = amdgpu_atomfirmware_get_clock_info(adev);
2802 if (r) {
2803 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2804 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2805 goto failed;
2806 }
2807 } else {
a5bde2f9
AD
2808 /* Initialize clocks */
2809 r = amdgpu_atombios_get_clock_info(adev);
2810 if (r) {
2811 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2812 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2813 goto failed;
a5bde2f9
AD
2814 }
2815 /* init i2c buses */
4562236b
HW
2816 if (!amdgpu_device_has_dc_support(adev))
2817 amdgpu_atombios_i2c_init(adev);
2c1a2784 2818 }
d38ceaf9 2819
bfca0289 2820fence_driver_init:
d38ceaf9
AD
2821 /* Fence driver */
2822 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2823 if (r) {
2824 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2825 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2826 goto failed;
2c1a2784 2827 }
d38ceaf9
AD
2828
2829 /* init the mode config */
2830 drm_mode_config_init(adev->ddev);
2831
06ec9070 2832 r = amdgpu_device_ip_init(adev);
d38ceaf9 2833 if (r) {
8840a387 2834 /* failed in exclusive mode due to timeout */
2835 if (amdgpu_sriov_vf(adev) &&
2836 !amdgpu_sriov_runtime(adev) &&
2837 amdgpu_virt_mmio_blocked(adev) &&
2838 !amdgpu_virt_wait_reset(adev)) {
2839 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2840 /* Don't send request since VF is inactive. */
2841 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2842 adev->virt.ops = NULL;
8840a387 2843 r = -EAGAIN;
2844 goto failed;
2845 }
06ec9070 2846 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2847 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2848 if (amdgpu_virt_request_full_gpu(adev, false))
2849 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2850 goto failed;
d38ceaf9
AD
2851 }
2852
2853 adev->accel_working = true;
2854
e59c0205
AX
2855 amdgpu_vm_check_compute_bug(adev);
2856
95844d20
MO
2857 /* Initialize the buffer migration limit. */
2858 if (amdgpu_moverate >= 0)
2859 max_MBps = amdgpu_moverate;
2860 else
2861 max_MBps = 8; /* Allow 8 MB/s. */
2862 /* Get a log2 for easy divisions. */
2863 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2864
9bc92b9c
ML
2865 amdgpu_fbdev_init(adev);
2866
e9bc1bf7
YT
2867 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2868 amdgpu_pm_virt_sysfs_init(adev);
2869
d2f52ac8
RZ
2870 r = amdgpu_pm_sysfs_init(adev);
2871 if (r)
2872 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2873
5bb23532
OM
2874 r = amdgpu_ucode_sysfs_init(adev);
2875 if (r)
2876 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2877
75758255 2878 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2879 if (r)
d38ceaf9 2880 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2881
2882 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2883 if (r)
d38ceaf9 2884 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2885
50ab2533 2886 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2887 if (r)
50ab2533 2888 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2889
763efb6c 2890 r = amdgpu_debugfs_init(adev);
db95e218 2891 if (r)
763efb6c 2892 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2893
d38ceaf9
AD
2894 if ((amdgpu_testing & 1)) {
2895 if (adev->accel_working)
2896 amdgpu_test_moves(adev);
2897 else
2898 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2899 }
d38ceaf9
AD
2900 if (amdgpu_benchmarking) {
2901 if (adev->accel_working)
2902 amdgpu_benchmark(adev, amdgpu_benchmarking);
2903 else
2904 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2905 }
2906
2907 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2908 * explicit gating rather than handling it automatically.
2909 */
06ec9070 2910 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2911 if (r) {
06ec9070 2912 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2913 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2914 goto failed;
2c1a2784 2915 }
d38ceaf9 2916
108c6a63 2917 /* must succeed. */
511fdbc3 2918 amdgpu_ras_resume(adev);
108c6a63 2919
beff74bc
AD
2920 queue_delayed_work(system_wq, &adev->delayed_init_work,
2921 msecs_to_jiffies(AMDGPU_RESUME_MS));
2922
dcea6e65
KR
2923 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2924 if (r) {
2925 dev_err(adev->dev, "Could not create pcie_replay_count");
2926 return r;
2927 }
108c6a63 2928
d155bef0
AB
2929 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2930 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
2931 if (r)
2932 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2933
d38ceaf9 2934 return 0;
83ba126a
AD
2935
2936failed:
89041940 2937 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2938 if (runtime)
2939 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2940
83ba126a 2941 return r;
d38ceaf9
AD
2942}
2943
d38ceaf9
AD
2944/**
2945 * amdgpu_device_fini - tear down the driver
2946 *
2947 * @adev: amdgpu_device pointer
2948 *
2949 * Tear down the driver info (all asics).
2950 * Called at driver shutdown.
2951 */
2952void amdgpu_device_fini(struct amdgpu_device *adev)
2953{
2954 int r;
2955
2956 DRM_INFO("amdgpu: finishing device.\n");
2957 adev->shutdown = true;
e5b03032
ML
2958 /* disable all interrupts */
2959 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2960 if (adev->mode_info.mode_config_initialized){
2961 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2962 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2963 else
2964 drm_atomic_helper_shutdown(adev->ddev);
2965 }
d38ceaf9 2966 amdgpu_fence_driver_fini(adev);
58e955d9 2967 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2968 amdgpu_fbdev_fini(adev);
06ec9070 2969 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2970 if (adev->firmware.gpu_info_fw) {
2971 release_firmware(adev->firmware.gpu_info_fw);
2972 adev->firmware.gpu_info_fw = NULL;
2973 }
d38ceaf9 2974 adev->accel_working = false;
beff74bc 2975 cancel_delayed_work_sync(&adev->delayed_init_work);
d38ceaf9 2976 /* free i2c buses */
4562236b
HW
2977 if (!amdgpu_device_has_dc_support(adev))
2978 amdgpu_i2c_fini(adev);
bfca0289
SL
2979
2980 if (amdgpu_emu_mode != 1)
2981 amdgpu_atombios_fini(adev);
2982
d38ceaf9
AD
2983 kfree(adev->bios);
2984 adev->bios = NULL;
84c8b22e
LW
2985 if (!pci_is_thunderbolt_attached(adev->pdev))
2986 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2987 if (adev->flags & AMD_IS_PX)
2988 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2989 vga_client_register(adev->pdev, NULL, NULL, NULL);
2990 if (adev->rio_mem)
2991 pci_iounmap(adev->pdev, adev->rio_mem);
2992 adev->rio_mem = NULL;
2993 iounmap(adev->rmmio);
2994 adev->rmmio = NULL;
06ec9070 2995 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
2996 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2997 amdgpu_pm_virt_sysfs_fini(adev);
2998
d38ceaf9 2999 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3000 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 3001 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3002 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3003 amdgpu_pmu_fini(adev);
6698a3d0 3004 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3005 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3006 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3007}
3008
3009
3010/*
3011 * Suspend & resume.
3012 */
3013/**
810ddc3a 3014 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3015 *
87e3f136
DP
3016 * @dev: drm dev pointer
3017 * @suspend: suspend state
3018 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3019 *
3020 * Puts the hw in the suspend state (all asics).
3021 * Returns 0 for success or an error on failure.
3022 * Called at driver suspend.
3023 */
810ddc3a 3024int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
3025{
3026 struct amdgpu_device *adev;
3027 struct drm_crtc *crtc;
3028 struct drm_connector *connector;
5ceb54c6 3029 int r;
d38ceaf9
AD
3030
3031 if (dev == NULL || dev->dev_private == NULL) {
3032 return -ENODEV;
3033 }
3034
3035 adev = dev->dev_private;
3036
3037 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3038 return 0;
3039
44779b43 3040 adev->in_suspend = true;
d38ceaf9
AD
3041 drm_kms_helper_poll_disable(dev);
3042
5f818173
S
3043 if (fbcon)
3044 amdgpu_fbdev_set_suspend(adev, 1);
3045
beff74bc 3046 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3047
4562236b
HW
3048 if (!amdgpu_device_has_dc_support(adev)) {
3049 /* turn off display hw */
3050 drm_modeset_lock_all(dev);
3051 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3052 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
3053 }
3054 drm_modeset_unlock_all(dev);
fe1053b7
AD
3055 /* unpin the front buffers and cursors */
3056 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3057 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3058 struct drm_framebuffer *fb = crtc->primary->fb;
3059 struct amdgpu_bo *robj;
3060
91334223 3061 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3062 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3063 r = amdgpu_bo_reserve(aobj, true);
3064 if (r == 0) {
3065 amdgpu_bo_unpin(aobj);
3066 amdgpu_bo_unreserve(aobj);
3067 }
756e6880 3068 }
756e6880 3069
fe1053b7
AD
3070 if (fb == NULL || fb->obj[0] == NULL) {
3071 continue;
3072 }
3073 robj = gem_to_amdgpu_bo(fb->obj[0]);
3074 /* don't unpin kernel fb objects */
3075 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3076 r = amdgpu_bo_reserve(robj, true);
3077 if (r == 0) {
3078 amdgpu_bo_unpin(robj);
3079 amdgpu_bo_unreserve(robj);
3080 }
d38ceaf9
AD
3081 }
3082 }
3083 }
fe1053b7
AD
3084
3085 amdgpu_amdkfd_suspend(adev);
3086
5e6932fe 3087 amdgpu_ras_suspend(adev);
3088
fe1053b7
AD
3089 r = amdgpu_device_ip_suspend_phase1(adev);
3090
d38ceaf9
AD
3091 /* evict vram memory */
3092 amdgpu_bo_evict_vram(adev);
3093
5ceb54c6 3094 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3095
fe1053b7 3096 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3097
a0a71e49
AD
3098 /* evict remaining vram memory
3099 * This second call to evict vram is to evict the gart page table
3100 * using the CPU.
3101 */
d38ceaf9
AD
3102 amdgpu_bo_evict_vram(adev);
3103
3104 pci_save_state(dev->pdev);
3105 if (suspend) {
3106 /* Shut down the device */
3107 pci_disable_device(dev->pdev);
3108 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 3109 } else {
3110 r = amdgpu_asic_reset(adev);
3111 if (r)
3112 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
3113 }
3114
d38ceaf9
AD
3115 return 0;
3116}
3117
3118/**
810ddc3a 3119 * amdgpu_device_resume - initiate device resume
d38ceaf9 3120 *
87e3f136
DP
3121 * @dev: drm dev pointer
3122 * @resume: resume state
3123 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3124 *
3125 * Bring the hw back to operating state (all asics).
3126 * Returns 0 for success or an error on failure.
3127 * Called at driver resume.
3128 */
810ddc3a 3129int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
3130{
3131 struct drm_connector *connector;
3132 struct amdgpu_device *adev = dev->dev_private;
756e6880 3133 struct drm_crtc *crtc;
03161a6e 3134 int r = 0;
d38ceaf9
AD
3135
3136 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3137 return 0;
3138
d38ceaf9
AD
3139 if (resume) {
3140 pci_set_power_state(dev->pdev, PCI_D0);
3141 pci_restore_state(dev->pdev);
74b0b157 3142 r = pci_enable_device(dev->pdev);
03161a6e 3143 if (r)
4d3b9ae5 3144 return r;
d38ceaf9
AD
3145 }
3146
3147 /* post card */
39c640c0 3148 if (amdgpu_device_need_post(adev)) {
74b0b157 3149 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3150 if (r)
3151 DRM_ERROR("amdgpu asic init failed\n");
3152 }
d38ceaf9 3153
06ec9070 3154 r = amdgpu_device_ip_resume(adev);
e6707218 3155 if (r) {
06ec9070 3156 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3157 return r;
e6707218 3158 }
5ceb54c6
AD
3159 amdgpu_fence_driver_resume(adev);
3160
d38ceaf9 3161
06ec9070 3162 r = amdgpu_device_ip_late_init(adev);
03161a6e 3163 if (r)
4d3b9ae5 3164 return r;
d38ceaf9 3165
beff74bc
AD
3166 queue_delayed_work(system_wq, &adev->delayed_init_work,
3167 msecs_to_jiffies(AMDGPU_RESUME_MS));
3168
fe1053b7
AD
3169 if (!amdgpu_device_has_dc_support(adev)) {
3170 /* pin cursors */
3171 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3172 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3173
91334223 3174 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3175 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3176 r = amdgpu_bo_reserve(aobj, true);
3177 if (r == 0) {
3178 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3179 if (r != 0)
3180 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3181 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3182 amdgpu_bo_unreserve(aobj);
3183 }
756e6880
AD
3184 }
3185 }
3186 }
ba997709
YZ
3187 r = amdgpu_amdkfd_resume(adev);
3188 if (r)
3189 return r;
756e6880 3190
96a5d8d4 3191 /* Make sure IB tests flushed */
beff74bc 3192 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3193
d38ceaf9
AD
3194 /* blat the mode back in */
3195 if (fbcon) {
4562236b
HW
3196 if (!amdgpu_device_has_dc_support(adev)) {
3197 /* pre DCE11 */
3198 drm_helper_resume_force_mode(dev);
3199
3200 /* turn on display hw */
3201 drm_modeset_lock_all(dev);
3202 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3203 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3204 }
3205 drm_modeset_unlock_all(dev);
d38ceaf9 3206 }
4d3b9ae5 3207 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3208 }
3209
3210 drm_kms_helper_poll_enable(dev);
23a1a9e5 3211
5e6932fe 3212 amdgpu_ras_resume(adev);
3213
23a1a9e5
L
3214 /*
3215 * Most of the connector probing functions try to acquire runtime pm
3216 * refs to ensure that the GPU is powered on when connector polling is
3217 * performed. Since we're calling this from a runtime PM callback,
3218 * trying to acquire rpm refs will cause us to deadlock.
3219 *
3220 * Since we're guaranteed to be holding the rpm lock, it's safe to
3221 * temporarily disable the rpm helpers so this doesn't deadlock us.
3222 */
3223#ifdef CONFIG_PM
3224 dev->dev->power.disable_depth++;
3225#endif
4562236b
HW
3226 if (!amdgpu_device_has_dc_support(adev))
3227 drm_helper_hpd_irq_event(dev);
3228 else
3229 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3230#ifdef CONFIG_PM
3231 dev->dev->power.disable_depth--;
3232#endif
44779b43
RZ
3233 adev->in_suspend = false;
3234
4d3b9ae5 3235 return 0;
d38ceaf9
AD
3236}
3237
e3ecdffa
AD
3238/**
3239 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3240 *
3241 * @adev: amdgpu_device pointer
3242 *
3243 * The list of all the hardware IPs that make up the asic is walked and
3244 * the check_soft_reset callbacks are run. check_soft_reset determines
3245 * if the asic is still hung or not.
3246 * Returns true if any of the IPs are still in a hung state, false if not.
3247 */
06ec9070 3248static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3249{
3250 int i;
3251 bool asic_hang = false;
3252
f993d628
ML
3253 if (amdgpu_sriov_vf(adev))
3254 return true;
3255
8bc04c29
AD
3256 if (amdgpu_asic_need_full_reset(adev))
3257 return true;
3258
63fbf42f 3259 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3260 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3261 continue;
a1255107
AD
3262 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3263 adev->ip_blocks[i].status.hang =
3264 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3265 if (adev->ip_blocks[i].status.hang) {
3266 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3267 asic_hang = true;
3268 }
3269 }
3270 return asic_hang;
3271}
3272
e3ecdffa
AD
3273/**
3274 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3275 *
3276 * @adev: amdgpu_device pointer
3277 *
3278 * The list of all the hardware IPs that make up the asic is walked and the
3279 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3280 * handles any IP specific hardware or software state changes that are
3281 * necessary for a soft reset to succeed.
3282 * Returns 0 on success, negative error code on failure.
3283 */
06ec9070 3284static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3285{
3286 int i, r = 0;
3287
3288 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3289 if (!adev->ip_blocks[i].status.valid)
d31a501e 3290 continue;
a1255107
AD
3291 if (adev->ip_blocks[i].status.hang &&
3292 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3293 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3294 if (r)
3295 return r;
3296 }
3297 }
3298
3299 return 0;
3300}
3301
e3ecdffa
AD
3302/**
3303 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3304 *
3305 * @adev: amdgpu_device pointer
3306 *
3307 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3308 * reset is necessary to recover.
3309 * Returns true if a full asic reset is required, false if not.
3310 */
06ec9070 3311static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3312{
da146d3b
AD
3313 int i;
3314
8bc04c29
AD
3315 if (amdgpu_asic_need_full_reset(adev))
3316 return true;
3317
da146d3b 3318 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3319 if (!adev->ip_blocks[i].status.valid)
da146d3b 3320 continue;
a1255107
AD
3321 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3322 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3323 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3324 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3325 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3326 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3327 DRM_INFO("Some block need full reset!\n");
3328 return true;
3329 }
3330 }
35d782fe
CZ
3331 }
3332 return false;
3333}
3334
e3ecdffa
AD
3335/**
3336 * amdgpu_device_ip_soft_reset - do a soft reset
3337 *
3338 * @adev: amdgpu_device pointer
3339 *
3340 * The list of all the hardware IPs that make up the asic is walked and the
3341 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3342 * IP specific hardware or software state changes that are necessary to soft
3343 * reset the IP.
3344 * Returns 0 on success, negative error code on failure.
3345 */
06ec9070 3346static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3347{
3348 int i, r = 0;
3349
3350 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3351 if (!adev->ip_blocks[i].status.valid)
35d782fe 3352 continue;
a1255107
AD
3353 if (adev->ip_blocks[i].status.hang &&
3354 adev->ip_blocks[i].version->funcs->soft_reset) {
3355 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3356 if (r)
3357 return r;
3358 }
3359 }
3360
3361 return 0;
3362}
3363
e3ecdffa
AD
3364/**
3365 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3366 *
3367 * @adev: amdgpu_device pointer
3368 *
3369 * The list of all the hardware IPs that make up the asic is walked and the
3370 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3371 * handles any IP specific hardware or software state changes that are
3372 * necessary after the IP has been soft reset.
3373 * Returns 0 on success, negative error code on failure.
3374 */
06ec9070 3375static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3376{
3377 int i, r = 0;
3378
3379 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3380 if (!adev->ip_blocks[i].status.valid)
35d782fe 3381 continue;
a1255107
AD
3382 if (adev->ip_blocks[i].status.hang &&
3383 adev->ip_blocks[i].version->funcs->post_soft_reset)
3384 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3385 if (r)
3386 return r;
3387 }
3388
3389 return 0;
3390}
3391
e3ecdffa 3392/**
c33adbc7 3393 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3394 *
3395 * @adev: amdgpu_device pointer
3396 *
3397 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3398 * restore things like GPUVM page tables after a GPU reset where
3399 * the contents of VRAM might be lost.
403009bf
CK
3400 *
3401 * Returns:
3402 * 0 on success, negative error code on failure.
e3ecdffa 3403 */
c33adbc7 3404static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3405{
c41d1cf6 3406 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3407 struct amdgpu_bo *shadow;
3408 long r = 1, tmo;
c41d1cf6
ML
3409
3410 if (amdgpu_sriov_runtime(adev))
b045d3af 3411 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3412 else
3413 tmo = msecs_to_jiffies(100);
3414
3415 DRM_INFO("recover vram bo from shadow start\n");
3416 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3417 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3418
3419 /* No need to recover an evicted BO */
3420 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3421 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3422 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3423 continue;
3424
3425 r = amdgpu_bo_restore_shadow(shadow, &next);
3426 if (r)
3427 break;
3428
c41d1cf6 3429 if (fence) {
1712fb1a 3430 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3431 dma_fence_put(fence);
3432 fence = next;
1712fb1a 3433 if (tmo == 0) {
3434 r = -ETIMEDOUT;
c41d1cf6 3435 break;
1712fb1a 3436 } else if (tmo < 0) {
3437 r = tmo;
3438 break;
3439 }
403009bf
CK
3440 } else {
3441 fence = next;
c41d1cf6 3442 }
c41d1cf6
ML
3443 }
3444 mutex_unlock(&adev->shadow_list_lock);
3445
403009bf
CK
3446 if (fence)
3447 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3448 dma_fence_put(fence);
3449
1712fb1a 3450 if (r < 0 || tmo <= 0) {
3451 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3452 return -EIO;
3453 }
c41d1cf6 3454
403009bf
CK
3455 DRM_INFO("recover vram bo from shadow done\n");
3456 return 0;
c41d1cf6
ML
3457}
3458
a90ad3c2 3459
e3ecdffa 3460/**
06ec9070 3461 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3462 *
3463 * @adev: amdgpu device pointer
87e3f136 3464 * @from_hypervisor: request from hypervisor
5740682e
ML
3465 *
3466 * do VF FLR and reinitialize Asic
3f48c681 3467 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3468 */
3469static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3470 bool from_hypervisor)
5740682e
ML
3471{
3472 int r;
3473
3474 if (from_hypervisor)
3475 r = amdgpu_virt_request_full_gpu(adev, true);
3476 else
3477 r = amdgpu_virt_reset_gpu(adev);
3478 if (r)
3479 return r;
a90ad3c2 3480
f81e8d53
WL
3481 amdgpu_amdkfd_pre_reset(adev);
3482
a90ad3c2 3483 /* Resume IP prior to SMC */
06ec9070 3484 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3485 if (r)
3486 goto error;
a90ad3c2
ML
3487
3488 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3489 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3490
7a3e0bb2
RZ
3491 r = amdgpu_device_fw_loading(adev);
3492 if (r)
3493 return r;
3494
a90ad3c2 3495 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3496 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3497 if (r)
3498 goto error;
a90ad3c2
ML
3499
3500 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3501 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3502 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3503
abc34253 3504error:
d3c117e5 3505 amdgpu_virt_init_data_exchange(adev);
abc34253 3506 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3507 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3508 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3509 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3510 }
3511
3512 return r;
3513}
3514
12938fad
CK
3515/**
3516 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3517 *
3518 * @adev: amdgpu device pointer
3519 *
3520 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3521 * a hung GPU.
3522 */
3523bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3524{
3525 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3526 DRM_INFO("Timeout, but no hardware hang detected.\n");
3527 return false;
3528 }
3529
3ba7b418
AG
3530 if (amdgpu_gpu_recovery == 0)
3531 goto disabled;
3532
3533 if (amdgpu_sriov_vf(adev))
3534 return true;
3535
3536 if (amdgpu_gpu_recovery == -1) {
3537 switch (adev->asic_type) {
fc42d47c
AG
3538 case CHIP_BONAIRE:
3539 case CHIP_HAWAII:
3ba7b418
AG
3540 case CHIP_TOPAZ:
3541 case CHIP_TONGA:
3542 case CHIP_FIJI:
3543 case CHIP_POLARIS10:
3544 case CHIP_POLARIS11:
3545 case CHIP_POLARIS12:
3546 case CHIP_VEGAM:
3547 case CHIP_VEGA20:
3548 case CHIP_VEGA10:
3549 case CHIP_VEGA12:
3550 break;
3551 default:
3552 goto disabled;
3553 }
12938fad
CK
3554 }
3555
3556 return true;
3ba7b418
AG
3557
3558disabled:
3559 DRM_INFO("GPU recovery disabled.\n");
3560 return false;
12938fad
CK
3561}
3562
5c6dd71e 3563
26bc5340
AG
3564static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3565 struct amdgpu_job *job,
3566 bool *need_full_reset_arg)
3567{
3568 int i, r = 0;
3569 bool need_full_reset = *need_full_reset_arg;
71182665 3570
71182665 3571 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3572 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3573 struct amdgpu_ring *ring = adev->rings[i];
3574
51687759 3575 if (!ring || !ring->sched.thread)
0875dc9e 3576 continue;
5740682e 3577
2f9d4084
ML
3578 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3579 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3580 }
d38ceaf9 3581
222b5f04
AG
3582 if(job)
3583 drm_sched_increase_karma(&job->base);
3584
1d721ed6 3585 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3586 if (!amdgpu_sriov_vf(adev)) {
3587
3588 if (!need_full_reset)
3589 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3590
3591 if (!need_full_reset) {
3592 amdgpu_device_ip_pre_soft_reset(adev);
3593 r = amdgpu_device_ip_soft_reset(adev);
3594 amdgpu_device_ip_post_soft_reset(adev);
3595 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3596 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3597 need_full_reset = true;
3598 }
3599 }
3600
3601 if (need_full_reset)
3602 r = amdgpu_device_ip_suspend(adev);
3603
3604 *need_full_reset_arg = need_full_reset;
3605 }
3606
3607 return r;
3608}
3609
3610static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3611 struct list_head *device_list_handle,
3612 bool *need_full_reset_arg)
3613{
3614 struct amdgpu_device *tmp_adev = NULL;
3615 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3616 int r = 0;
3617
3618 /*
3619 * ASIC reset has to be done on all HGMI hive nodes ASAP
3620 * to allow proper links negotiation in FW (within 1 sec)
3621 */
3622 if (need_full_reset) {
3623 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3624 /* For XGMI run all resets in parallel to speed up the process */
3625 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3626 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3627 r = -EALREADY;
3628 } else
3629 r = amdgpu_asic_reset(tmp_adev);
3630
3631 if (r) {
fed184e9 3632 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3633 r, tmp_adev->ddev->unique);
d4535e2c
AG
3634 break;
3635 }
3636 }
3637
3638 /* For XGMI wait for all PSP resets to complete before proceed */
3639 if (!r) {
3640 list_for_each_entry(tmp_adev, device_list_handle,
3641 gmc.xgmi.head) {
3642 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3643 flush_work(&tmp_adev->xgmi_reset_work);
3644 r = tmp_adev->asic_reset_res;
3645 if (r)
3646 break;
3647 }
3648 }
2be4c4a9 3649
3650 list_for_each_entry(tmp_adev, device_list_handle,
3651 gmc.xgmi.head) {
3652 amdgpu_ras_reserve_bad_pages(tmp_adev);
3653 }
26bc5340
AG
3654 }
3655 }
3656
3657
3658 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3659 if (need_full_reset) {
3660 /* post card */
3661 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3662 DRM_WARN("asic atom init failed!");
3663
3664 if (!r) {
3665 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3666 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3667 if (r)
3668 goto out;
3669
3670 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3671 if (vram_lost) {
77e7f829 3672 DRM_INFO("VRAM is lost due to GPU reset!\n");
26bc5340
AG
3673 atomic_inc(&tmp_adev->vram_lost_counter);
3674 }
3675
3676 r = amdgpu_gtt_mgr_recover(
3677 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3678 if (r)
3679 goto out;
3680
3681 r = amdgpu_device_fw_loading(tmp_adev);
3682 if (r)
3683 return r;
3684
3685 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3686 if (r)
3687 goto out;
3688
3689 if (vram_lost)
3690 amdgpu_device_fill_reset_magic(tmp_adev);
3691
fdafb359
EQ
3692 /*
3693 * Add this ASIC as tracked as reset was already
3694 * complete successfully.
3695 */
3696 amdgpu_register_gpu_instance(tmp_adev);
3697
7c04ca50 3698 r = amdgpu_device_ip_late_init(tmp_adev);
3699 if (r)
3700 goto out;
3701
e79a04d5 3702 /* must succeed. */
511fdbc3 3703 amdgpu_ras_resume(tmp_adev);
e79a04d5 3704
26bc5340
AG
3705 /* Update PSP FW topology after reset */
3706 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3707 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3708 }
3709 }
3710
3711
3712out:
3713 if (!r) {
3714 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3715 r = amdgpu_ib_ring_tests(tmp_adev);
3716 if (r) {
3717 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3718 r = amdgpu_device_ip_suspend(tmp_adev);
3719 need_full_reset = true;
3720 r = -EAGAIN;
3721 goto end;
3722 }
3723 }
3724
3725 if (!r)
3726 r = amdgpu_device_recover_vram(tmp_adev);
3727 else
3728 tmp_adev->asic_reset_res = r;
3729 }
3730
3731end:
3732 *need_full_reset_arg = need_full_reset;
3733 return r;
3734}
3735
1d721ed6 3736static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3737{
1d721ed6
AG
3738 if (trylock) {
3739 if (!mutex_trylock(&adev->lock_reset))
3740 return false;
3741 } else
3742 mutex_lock(&adev->lock_reset);
5740682e 3743
26bc5340
AG
3744 atomic_inc(&adev->gpu_reset_counter);
3745 adev->in_gpu_reset = 1;
a3a09142
AD
3746 switch (amdgpu_asic_reset_method(adev)) {
3747 case AMD_RESET_METHOD_MODE1:
3748 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3749 break;
3750 case AMD_RESET_METHOD_MODE2:
3751 adev->mp1_state = PP_MP1_STATE_RESET;
3752 break;
3753 default:
3754 adev->mp1_state = PP_MP1_STATE_NONE;
3755 break;
3756 }
7b184b00 3757 /* Block kfd: SRIOV would do it separately */
3758 if (!amdgpu_sriov_vf(adev))
3759 amdgpu_amdkfd_pre_reset(adev);
1d721ed6
AG
3760
3761 return true;
26bc5340 3762}
d38ceaf9 3763
26bc5340
AG
3764static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3765{
7b184b00 3766 /*unlock kfd: SRIOV would do it separately */
3767 if (!amdgpu_sriov_vf(adev))
3768 amdgpu_amdkfd_post_reset(adev);
89041940 3769 amdgpu_vf_error_trans_all(adev);
a3a09142 3770 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
3771 adev->in_gpu_reset = 0;
3772 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3773}
3774
3775
3776/**
3777 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3778 *
3779 * @adev: amdgpu device pointer
3780 * @job: which job trigger hang
3781 *
3782 * Attempt to reset the GPU if it has hung (all asics).
3783 * Attempt to do soft-reset or full-reset and reinitialize Asic
3784 * Returns 0 for success or an error on failure.
3785 */
3786
3787int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3788 struct amdgpu_job *job)
3789{
1d721ed6
AG
3790 struct list_head device_list, *device_list_handle = NULL;
3791 bool need_full_reset, job_signaled;
26bc5340 3792 struct amdgpu_hive_info *hive = NULL;
26bc5340 3793 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3794 int i, r = 0;
26bc5340 3795
1d721ed6 3796 need_full_reset = job_signaled = false;
26bc5340
AG
3797 INIT_LIST_HEAD(&device_list);
3798
3799 dev_info(adev->dev, "GPU reset begin!\n");
3800
beff74bc 3801 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3802
1d721ed6
AG
3803 hive = amdgpu_get_xgmi_hive(adev, false);
3804
26bc5340 3805 /*
1d721ed6
AG
3806 * Here we trylock to avoid chain of resets executing from
3807 * either trigger by jobs on different adevs in XGMI hive or jobs on
3808 * different schedulers for same device while this TO handler is running.
3809 * We always reset all schedulers for device and all devices for XGMI
3810 * hive so that should take care of them too.
26bc5340 3811 */
1d721ed6
AG
3812
3813 if (hive && !mutex_trylock(&hive->reset_lock)) {
3814 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3815 job->base.id, hive->hive_id);
26bc5340 3816 return 0;
1d721ed6 3817 }
26bc5340
AG
3818
3819 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3820 if (!amdgpu_device_lock_adev(adev, !hive)) {
3821 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3822 job->base.id);
3823 return 0;
26bc5340
AG
3824 }
3825
3826 /* Build list of devices to reset */
1d721ed6 3827 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340
AG
3828 if (!hive) {
3829 amdgpu_device_unlock_adev(adev);
3830 return -ENODEV;
3831 }
3832
3833 /*
3834 * In case we are in XGMI hive mode device reset is done for all the
3835 * nodes in the hive to retrain all XGMI links and hence the reset
3836 * sequence is executed in loop on all nodes.
3837 */
3838 device_list_handle = &hive->device_list;
3839 } else {
3840 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3841 device_list_handle = &device_list;
3842 }
3843
fdafb359
EQ
3844 /*
3845 * Mark these ASICs to be reseted as untracked first
3846 * And add them back after reset completed
3847 */
3848 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
3849 amdgpu_unregister_gpu_instance(tmp_adev);
3850
1d721ed6
AG
3851 /* block all schedulers and reset given job's ring */
3852 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
f1c1314b 3853 /* disable ras on ALL IPs */
3854 if (amdgpu_device_ip_need_full_reset(tmp_adev))
3855 amdgpu_ras_suspend(tmp_adev);
3856
1d721ed6
AG
3857 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3858 struct amdgpu_ring *ring = tmp_adev->rings[i];
3859
3860 if (!ring || !ring->sched.thread)
3861 continue;
3862
3863 drm_sched_stop(&ring->sched, &job->base);
3864 }
3865 }
3866
3867
3868 /*
3869 * Must check guilty signal here since after this point all old
3870 * HW fences are force signaled.
3871 *
3872 * job->base holds a reference to parent fence
3873 */
3874 if (job && job->base.s_fence->parent &&
3875 dma_fence_is_signaled(job->base.s_fence->parent))
3876 job_signaled = true;
3877
3878 if (!amdgpu_device_ip_need_full_reset(adev))
3879 device_list_handle = &device_list;
3880
3881 if (job_signaled) {
3882 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3883 goto skip_hw_reset;
3884 }
3885
3886
3887 /* Guilty job will be freed after this*/
3888 r = amdgpu_device_pre_asic_reset(adev,
3889 job,
3890 &need_full_reset);
3891 if (r) {
3892 /*TODO Should we stop ?*/
3893 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3894 r, adev->ddev->unique);
3895 adev->asic_reset_res = r;
3896 }
3897
26bc5340
AG
3898retry: /* Rest of adevs pre asic reset from XGMI hive. */
3899 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3900
3901 if (tmp_adev == adev)
3902 continue;
3903
1d721ed6 3904 amdgpu_device_lock_adev(tmp_adev, false);
26bc5340
AG
3905 r = amdgpu_device_pre_asic_reset(tmp_adev,
3906 NULL,
3907 &need_full_reset);
3908 /*TODO Should we stop ?*/
3909 if (r) {
3910 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3911 r, tmp_adev->ddev->unique);
3912 tmp_adev->asic_reset_res = r;
3913 }
3914 }
3915
3916 /* Actual ASIC resets if needed.*/
3917 /* TODO Implement XGMI hive reset logic for SRIOV */
3918 if (amdgpu_sriov_vf(adev)) {
3919 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3920 if (r)
3921 adev->asic_reset_res = r;
3922 } else {
3923 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3924 if (r && r == -EAGAIN)
3925 goto retry;
3926 }
3927
1d721ed6
AG
3928skip_hw_reset:
3929
26bc5340
AG
3930 /* Post ASIC reset for all devs .*/
3931 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
1d721ed6
AG
3932 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3933 struct amdgpu_ring *ring = tmp_adev->rings[i];
3934
3935 if (!ring || !ring->sched.thread)
3936 continue;
3937
3938 /* No point to resubmit jobs if we didn't HW reset*/
3939 if (!tmp_adev->asic_reset_res && !job_signaled)
3940 drm_sched_resubmit_jobs(&ring->sched);
3941
3942 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3943 }
3944
3945 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3946 drm_helper_resume_force_mode(tmp_adev->ddev);
3947 }
3948
3949 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3950
3951 if (r) {
3952 /* bad news, how to tell it to userspace ? */
3953 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3954 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3955 } else {
3956 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3957 }
3958
3959 amdgpu_device_unlock_adev(tmp_adev);
3960 }
3961
1d721ed6 3962 if (hive)
22d6575b 3963 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3964
3965 if (r)
3966 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3967 return r;
3968}
3969
e3ecdffa
AD
3970/**
3971 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3972 *
3973 * @adev: amdgpu_device pointer
3974 *
3975 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3976 * and lanes) of the slot the device is in. Handles APUs and
3977 * virtualized environments where PCIE config space may not be available.
3978 */
5494d864 3979static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3980{
5d9a6330 3981 struct pci_dev *pdev;
c5313457
HK
3982 enum pci_bus_speed speed_cap, platform_speed_cap;
3983 enum pcie_link_width platform_link_width;
d0dd7f0c 3984
cd474ba0
AD
3985 if (amdgpu_pcie_gen_cap)
3986 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3987
cd474ba0
AD
3988 if (amdgpu_pcie_lane_cap)
3989 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3990
cd474ba0
AD
3991 /* covers APUs as well */
3992 if (pci_is_root_bus(adev->pdev->bus)) {
3993 if (adev->pm.pcie_gen_mask == 0)
3994 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3995 if (adev->pm.pcie_mlw_mask == 0)
3996 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3997 return;
cd474ba0 3998 }
d0dd7f0c 3999
c5313457
HK
4000 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4001 return;
4002
dbaa922b
AD
4003 pcie_bandwidth_available(adev->pdev, NULL,
4004 &platform_speed_cap, &platform_link_width);
c5313457 4005
cd474ba0 4006 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4007 /* asic caps */
4008 pdev = adev->pdev;
4009 speed_cap = pcie_get_speed_cap(pdev);
4010 if (speed_cap == PCI_SPEED_UNKNOWN) {
4011 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4012 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4013 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4014 } else {
5d9a6330
AD
4015 if (speed_cap == PCIE_SPEED_16_0GT)
4016 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4017 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4018 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4019 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4020 else if (speed_cap == PCIE_SPEED_8_0GT)
4021 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4022 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4023 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4024 else if (speed_cap == PCIE_SPEED_5_0GT)
4025 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4026 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4027 else
4028 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4029 }
4030 /* platform caps */
c5313457 4031 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4032 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4033 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4034 } else {
c5313457 4035 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4036 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4037 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4038 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4039 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4040 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4041 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4042 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4043 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4044 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4045 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4046 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4047 else
4048 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4049
cd474ba0
AD
4050 }
4051 }
4052 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4053 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4054 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4055 } else {
c5313457 4056 switch (platform_link_width) {
5d9a6330 4057 case PCIE_LNK_X32:
cd474ba0
AD
4058 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4059 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4060 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4061 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4062 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4063 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4064 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4065 break;
5d9a6330 4066 case PCIE_LNK_X16:
cd474ba0
AD
4067 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4068 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4069 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4070 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4071 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4072 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4073 break;
5d9a6330 4074 case PCIE_LNK_X12:
cd474ba0
AD
4075 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4076 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4077 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4078 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4079 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4080 break;
5d9a6330 4081 case PCIE_LNK_X8:
cd474ba0
AD
4082 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4083 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4084 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4085 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4086 break;
5d9a6330 4087 case PCIE_LNK_X4:
cd474ba0
AD
4088 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4089 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4090 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4091 break;
5d9a6330 4092 case PCIE_LNK_X2:
cd474ba0
AD
4093 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4094 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4095 break;
5d9a6330 4096 case PCIE_LNK_X1:
cd474ba0
AD
4097 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4098 break;
4099 default:
4100 break;
4101 }
d0dd7f0c
AD
4102 }
4103 }
4104}
d38ceaf9 4105