drm/amd/display: Add ASICREV_IS_NAVI macros
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
e2a75f88 68MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 69MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 70MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 71MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 72MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 73MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
23c6268e 74MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 75MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 76MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 77
2dc80b00
S
78#define AMDGPU_RESUME_MS 2000
79
d38ceaf9 80static const char *amdgpu_asic_name[] = {
da69c161
KW
81 "TAHITI",
82 "PITCAIRN",
83 "VERDE",
84 "OLAND",
85 "HAINAN",
d38ceaf9
AD
86 "BONAIRE",
87 "KAVERI",
88 "KABINI",
89 "HAWAII",
90 "MULLINS",
91 "TOPAZ",
92 "TONGA",
48299f95 93 "FIJI",
d38ceaf9 94 "CARRIZO",
139f4917 95 "STONEY",
2cc0c0b5
FC
96 "POLARIS10",
97 "POLARIS11",
c4642a47 98 "POLARIS12",
48ff108d 99 "VEGAM",
d4196f01 100 "VEGA10",
8fab806a 101 "VEGA12",
956fcddc 102 "VEGA20",
2ca8a5d2 103 "RAVEN",
d6c3b24e 104 "ARCTURUS",
852a6626 105 "NAVI10",
87dbad02 106 "NAVI14",
9802f5d7 107 "NAVI12",
d38ceaf9
AD
108 "LAST",
109};
110
dcea6e65
KR
111/**
112 * DOC: pcie_replay_count
113 *
114 * The amdgpu driver provides a sysfs API for reporting the total number
115 * of PCIe replays (NAKs)
116 * The file pcie_replay_count is used for this and returns the total
117 * number of replays as a sum of the NAKs generated and NAKs received
118 */
119
120static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
121 struct device_attribute *attr, char *buf)
122{
123 struct drm_device *ddev = dev_get_drvdata(dev);
124 struct amdgpu_device *adev = ddev->dev_private;
125 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
126
127 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
128}
129
130static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
131 amdgpu_device_get_pcie_replay_count, NULL);
132
5494d864
AD
133static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
134
e3ecdffa
AD
135/**
136 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
137 *
138 * @dev: drm_device pointer
139 *
140 * Returns true if the device is a dGPU with HG/PX power control,
141 * otherwise return false.
142 */
d38ceaf9
AD
143bool amdgpu_device_is_px(struct drm_device *dev)
144{
145 struct amdgpu_device *adev = dev->dev_private;
146
2f7d10b3 147 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
148 return true;
149 return false;
150}
151
152/*
153 * MMIO register access helper functions.
154 */
e3ecdffa
AD
155/**
156 * amdgpu_mm_rreg - read a memory mapped IO register
157 *
158 * @adev: amdgpu_device pointer
159 * @reg: dword aligned register offset
160 * @acc_flags: access flags which require special behavior
161 *
162 * Returns the 32 bit value from the offset specified.
163 */
d38ceaf9 164uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 165 uint32_t acc_flags)
d38ceaf9 166{
f4b373f4
TSD
167 uint32_t ret;
168
43ca8efa 169 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 170 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 171
15d72fd7 172 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 173 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
174 else {
175 unsigned long flags;
d38ceaf9
AD
176
177 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
178 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
179 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
180 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 181 }
f4b373f4
TSD
182 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
183 return ret;
d38ceaf9
AD
184}
185
421a2a30
ML
186/*
187 * MMIO register read with bytes helper functions
188 * @offset:bytes offset from MMIO start
189 *
190*/
191
e3ecdffa
AD
192/**
193 * amdgpu_mm_rreg8 - read a memory mapped IO register
194 *
195 * @adev: amdgpu_device pointer
196 * @offset: byte aligned register offset
197 *
198 * Returns the 8 bit value from the offset specified.
199 */
421a2a30
ML
200uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
201 if (offset < adev->rmmio_size)
202 return (readb(adev->rmmio + offset));
203 BUG();
204}
205
206/*
207 * MMIO register write with bytes helper functions
208 * @offset:bytes offset from MMIO start
209 * @value: the value want to be written to the register
210 *
211*/
e3ecdffa
AD
212/**
213 * amdgpu_mm_wreg8 - read a memory mapped IO register
214 *
215 * @adev: amdgpu_device pointer
216 * @offset: byte aligned register offset
217 * @value: 8 bit value to write
218 *
219 * Writes the value specified to the offset specified.
220 */
421a2a30
ML
221void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
222 if (offset < adev->rmmio_size)
223 writeb(value, adev->rmmio + offset);
224 else
225 BUG();
226}
227
e3ecdffa
AD
228/**
229 * amdgpu_mm_wreg - write to a memory mapped IO register
230 *
231 * @adev: amdgpu_device pointer
232 * @reg: dword aligned register offset
233 * @v: 32 bit value to write to the register
234 * @acc_flags: access flags which require special behavior
235 *
236 * Writes the value specified to the offset specified.
237 */
d38ceaf9 238void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 239 uint32_t acc_flags)
d38ceaf9 240{
f4b373f4 241 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 242
47ed4e1c
KW
243 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
244 adev->last_mm_index = v;
245 }
246
43ca8efa 247 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 248 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 249
15d72fd7 250 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
251 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
252 else {
253 unsigned long flags;
254
255 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
256 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
257 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
258 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
259 }
47ed4e1c
KW
260
261 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
262 udelay(500);
263 }
d38ceaf9
AD
264}
265
4fa1c6a6
TZ
266/**
267 * amdgpu_mm_rreg64 - read a 64 bit memory mapped IO register
268 *
269 * @adev: amdgpu_device pointer
270 * @reg: dword aligned register offset
271 *
272 * Returns the 64 bit value from the offset specified.
273 */
274uint64_t amdgpu_mm_rreg64(struct amdgpu_device *adev, uint32_t reg)
275{
276 uint64_t ret;
277
278 if ((reg * 4) < adev->rmmio_size)
279 ret = readq(((void __iomem *)adev->rmmio) + (reg * 4));
280 else
281 BUG();
282
283 return ret;
284}
285
286/**
287 * amdgpu_mm_wreg64 - write to a 64 bit memory mapped IO register
288 *
289 * @adev: amdgpu_device pointer
290 * @reg: dword aligned register offset
291 * @v: 64 bit value to write to the register
292 *
293 * Writes the value specified to the offset specified.
294 */
295void amdgpu_mm_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
296{
297 if ((reg * 4) < adev->rmmio_size)
298 writeq(v, ((void __iomem *)adev->rmmio) + (reg * 4));
299 else
300 BUG();
301}
302
e3ecdffa
AD
303/**
304 * amdgpu_io_rreg - read an IO register
305 *
306 * @adev: amdgpu_device pointer
307 * @reg: dword aligned register offset
308 *
309 * Returns the 32 bit value from the offset specified.
310 */
d38ceaf9
AD
311u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
312{
313 if ((reg * 4) < adev->rio_mem_size)
314 return ioread32(adev->rio_mem + (reg * 4));
315 else {
316 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
317 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
318 }
319}
320
e3ecdffa
AD
321/**
322 * amdgpu_io_wreg - write to an IO register
323 *
324 * @adev: amdgpu_device pointer
325 * @reg: dword aligned register offset
326 * @v: 32 bit value to write to the register
327 *
328 * Writes the value specified to the offset specified.
329 */
d38ceaf9
AD
330void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
331{
47ed4e1c
KW
332 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
333 adev->last_mm_index = v;
334 }
d38ceaf9
AD
335
336 if ((reg * 4) < adev->rio_mem_size)
337 iowrite32(v, adev->rio_mem + (reg * 4));
338 else {
339 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
340 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
341 }
47ed4e1c
KW
342
343 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
344 udelay(500);
345 }
d38ceaf9
AD
346}
347
348/**
349 * amdgpu_mm_rdoorbell - read a doorbell dword
350 *
351 * @adev: amdgpu_device pointer
352 * @index: doorbell index
353 *
354 * Returns the value in the doorbell aperture at the
355 * requested doorbell index (CIK).
356 */
357u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
358{
359 if (index < adev->doorbell.num_doorbells) {
360 return readl(adev->doorbell.ptr + index);
361 } else {
362 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
363 return 0;
364 }
365}
366
367/**
368 * amdgpu_mm_wdoorbell - write a doorbell dword
369 *
370 * @adev: amdgpu_device pointer
371 * @index: doorbell index
372 * @v: value to write
373 *
374 * Writes @v to the doorbell aperture at the
375 * requested doorbell index (CIK).
376 */
377void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
378{
379 if (index < adev->doorbell.num_doorbells) {
380 writel(v, adev->doorbell.ptr + index);
381 } else {
382 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
383 }
384}
385
832be404
KW
386/**
387 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
388 *
389 * @adev: amdgpu_device pointer
390 * @index: doorbell index
391 *
392 * Returns the value in the doorbell aperture at the
393 * requested doorbell index (VEGA10+).
394 */
395u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
396{
397 if (index < adev->doorbell.num_doorbells) {
398 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
399 } else {
400 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
401 return 0;
402 }
403}
404
405/**
406 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
407 *
408 * @adev: amdgpu_device pointer
409 * @index: doorbell index
410 * @v: value to write
411 *
412 * Writes @v to the doorbell aperture at the
413 * requested doorbell index (VEGA10+).
414 */
415void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
416{
417 if (index < adev->doorbell.num_doorbells) {
418 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
419 } else {
420 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
421 }
422}
423
d38ceaf9
AD
424/**
425 * amdgpu_invalid_rreg - dummy reg read function
426 *
427 * @adev: amdgpu device pointer
428 * @reg: offset of register
429 *
430 * Dummy register read function. Used for register blocks
431 * that certain asics don't have (all asics).
432 * Returns the value in the register.
433 */
434static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
435{
436 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
437 BUG();
438 return 0;
439}
440
441/**
442 * amdgpu_invalid_wreg - dummy reg write function
443 *
444 * @adev: amdgpu device pointer
445 * @reg: offset of register
446 * @v: value to write to the register
447 *
448 * Dummy register read function. Used for register blocks
449 * that certain asics don't have (all asics).
450 */
451static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
452{
453 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
454 reg, v);
455 BUG();
456}
457
4fa1c6a6
TZ
458/**
459 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
460 *
461 * @adev: amdgpu device pointer
462 * @reg: offset of register
463 *
464 * Dummy register read function. Used for register blocks
465 * that certain asics don't have (all asics).
466 * Returns the value in the register.
467 */
468static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
469{
470 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
471 BUG();
472 return 0;
473}
474
475/**
476 * amdgpu_invalid_wreg64 - dummy reg write function
477 *
478 * @adev: amdgpu device pointer
479 * @reg: offset of register
480 * @v: value to write to the register
481 *
482 * Dummy register read function. Used for register blocks
483 * that certain asics don't have (all asics).
484 */
485static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
486{
487 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
488 reg, v);
489 BUG();
490}
491
d38ceaf9
AD
492/**
493 * amdgpu_block_invalid_rreg - dummy reg read function
494 *
495 * @adev: amdgpu device pointer
496 * @block: offset of instance
497 * @reg: offset of register
498 *
499 * Dummy register read function. Used for register blocks
500 * that certain asics don't have (all asics).
501 * Returns the value in the register.
502 */
503static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
504 uint32_t block, uint32_t reg)
505{
506 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
507 reg, block);
508 BUG();
509 return 0;
510}
511
512/**
513 * amdgpu_block_invalid_wreg - dummy reg write function
514 *
515 * @adev: amdgpu device pointer
516 * @block: offset of instance
517 * @reg: offset of register
518 * @v: value to write to the register
519 *
520 * Dummy register read function. Used for register blocks
521 * that certain asics don't have (all asics).
522 */
523static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
524 uint32_t block,
525 uint32_t reg, uint32_t v)
526{
527 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
528 reg, block, v);
529 BUG();
530}
531
e3ecdffa
AD
532/**
533 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
534 *
535 * @adev: amdgpu device pointer
536 *
537 * Allocates a scratch page of VRAM for use by various things in the
538 * driver.
539 */
06ec9070 540static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 541{
a4a02777
CK
542 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
543 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
544 &adev->vram_scratch.robj,
545 &adev->vram_scratch.gpu_addr,
546 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
547}
548
e3ecdffa
AD
549/**
550 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
551 *
552 * @adev: amdgpu device pointer
553 *
554 * Frees the VRAM scratch page.
555 */
06ec9070 556static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 557{
078af1a3 558 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
559}
560
561/**
9c3f2b54 562 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
563 *
564 * @adev: amdgpu_device pointer
565 * @registers: pointer to the register array
566 * @array_size: size of the register array
567 *
568 * Programs an array or registers with and and or masks.
569 * This is a helper for setting golden registers.
570 */
9c3f2b54
AD
571void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
572 const u32 *registers,
573 const u32 array_size)
d38ceaf9
AD
574{
575 u32 tmp, reg, and_mask, or_mask;
576 int i;
577
578 if (array_size % 3)
579 return;
580
581 for (i = 0; i < array_size; i +=3) {
582 reg = registers[i + 0];
583 and_mask = registers[i + 1];
584 or_mask = registers[i + 2];
585
586 if (and_mask == 0xffffffff) {
587 tmp = or_mask;
588 } else {
589 tmp = RREG32(reg);
590 tmp &= ~and_mask;
e0d07657
HZ
591 if (adev->family >= AMDGPU_FAMILY_AI)
592 tmp |= (or_mask & and_mask);
593 else
594 tmp |= or_mask;
d38ceaf9
AD
595 }
596 WREG32(reg, tmp);
597 }
598}
599
e3ecdffa
AD
600/**
601 * amdgpu_device_pci_config_reset - reset the GPU
602 *
603 * @adev: amdgpu_device pointer
604 *
605 * Resets the GPU using the pci config reset sequence.
606 * Only applicable to asics prior to vega10.
607 */
8111c387 608void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
609{
610 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
611}
612
613/*
614 * GPU doorbell aperture helpers function.
615 */
616/**
06ec9070 617 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
618 *
619 * @adev: amdgpu_device pointer
620 *
621 * Init doorbell driver information (CIK)
622 * Returns 0 on success, error on failure.
623 */
06ec9070 624static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 625{
6585661d 626
705e519e
CK
627 /* No doorbell on SI hardware generation */
628 if (adev->asic_type < CHIP_BONAIRE) {
629 adev->doorbell.base = 0;
630 adev->doorbell.size = 0;
631 adev->doorbell.num_doorbells = 0;
632 adev->doorbell.ptr = NULL;
633 return 0;
634 }
635
d6895ad3
CK
636 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
637 return -EINVAL;
638
22357775
AD
639 amdgpu_asic_init_doorbell_index(adev);
640
d38ceaf9
AD
641 /* doorbell bar mapping */
642 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
643 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
644
edf600da 645 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 646 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
647 if (adev->doorbell.num_doorbells == 0)
648 return -EINVAL;
649
ec3db8a6 650 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
651 * paging queue doorbell use the second page. The
652 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
653 * doorbells are in the first page. So with paging queue enabled,
654 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
655 */
656 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 657 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 658
8972e5d2
CK
659 adev->doorbell.ptr = ioremap(adev->doorbell.base,
660 adev->doorbell.num_doorbells *
661 sizeof(u32));
662 if (adev->doorbell.ptr == NULL)
d38ceaf9 663 return -ENOMEM;
d38ceaf9
AD
664
665 return 0;
666}
667
668/**
06ec9070 669 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
670 *
671 * @adev: amdgpu_device pointer
672 *
673 * Tear down doorbell driver information (CIK)
674 */
06ec9070 675static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
676{
677 iounmap(adev->doorbell.ptr);
678 adev->doorbell.ptr = NULL;
679}
680
22cb0164 681
d38ceaf9
AD
682
683/*
06ec9070 684 * amdgpu_device_wb_*()
455a7bc2 685 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 686 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
687 */
688
689/**
06ec9070 690 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
691 *
692 * @adev: amdgpu_device pointer
693 *
694 * Disables Writeback and frees the Writeback memory (all asics).
695 * Used at driver shutdown.
696 */
06ec9070 697static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
698{
699 if (adev->wb.wb_obj) {
a76ed485
AD
700 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
701 &adev->wb.gpu_addr,
702 (void **)&adev->wb.wb);
d38ceaf9
AD
703 adev->wb.wb_obj = NULL;
704 }
705}
706
707/**
06ec9070 708 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
709 *
710 * @adev: amdgpu_device pointer
711 *
455a7bc2 712 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
713 * Used at driver startup.
714 * Returns 0 on success or an -error on failure.
715 */
06ec9070 716static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
717{
718 int r;
719
720 if (adev->wb.wb_obj == NULL) {
97407b63
AD
721 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
722 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
723 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
724 &adev->wb.wb_obj, &adev->wb.gpu_addr,
725 (void **)&adev->wb.wb);
d38ceaf9
AD
726 if (r) {
727 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
728 return r;
729 }
d38ceaf9
AD
730
731 adev->wb.num_wb = AMDGPU_MAX_WB;
732 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
733
734 /* clear wb memory */
73469585 735 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
736 }
737
738 return 0;
739}
740
741/**
131b4b36 742 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
743 *
744 * @adev: amdgpu_device pointer
745 * @wb: wb index
746 *
747 * Allocate a wb slot for use by the driver (all asics).
748 * Returns 0 on success or -EINVAL on failure.
749 */
131b4b36 750int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
751{
752 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 753
97407b63 754 if (offset < adev->wb.num_wb) {
7014285a 755 __set_bit(offset, adev->wb.used);
63ae07ca 756 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
757 return 0;
758 } else {
759 return -EINVAL;
760 }
761}
762
d38ceaf9 763/**
131b4b36 764 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
765 *
766 * @adev: amdgpu_device pointer
767 * @wb: wb index
768 *
769 * Free a wb slot allocated for use by the driver (all asics)
770 */
131b4b36 771void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 772{
73469585 773 wb >>= 3;
d38ceaf9 774 if (wb < adev->wb.num_wb)
73469585 775 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
776}
777
d6895ad3
CK
778/**
779 * amdgpu_device_resize_fb_bar - try to resize FB BAR
780 *
781 * @adev: amdgpu_device pointer
782 *
783 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
784 * to fail, but if any of the BARs is not accessible after the size we abort
785 * driver loading by returning -ENODEV.
786 */
787int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
788{
770d13b1 789 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 790 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
791 struct pci_bus *root;
792 struct resource *res;
793 unsigned i;
d6895ad3
CK
794 u16 cmd;
795 int r;
796
0c03b912 797 /* Bypass for VF */
798 if (amdgpu_sriov_vf(adev))
799 return 0;
800
31b8adab
CK
801 /* Check if the root BUS has 64bit memory resources */
802 root = adev->pdev->bus;
803 while (root->parent)
804 root = root->parent;
805
806 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 807 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
808 res->start > 0x100000000ull)
809 break;
810 }
811
812 /* Trying to resize is pointless without a root hub window above 4GB */
813 if (!res)
814 return 0;
815
d6895ad3
CK
816 /* Disable memory decoding while we change the BAR addresses and size */
817 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
818 pci_write_config_word(adev->pdev, PCI_COMMAND,
819 cmd & ~PCI_COMMAND_MEMORY);
820
821 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 822 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
823 if (adev->asic_type >= CHIP_BONAIRE)
824 pci_release_resource(adev->pdev, 2);
825
826 pci_release_resource(adev->pdev, 0);
827
828 r = pci_resize_resource(adev->pdev, 0, rbar_size);
829 if (r == -ENOSPC)
830 DRM_INFO("Not enough PCI address space for a large BAR.");
831 else if (r && r != -ENOTSUPP)
832 DRM_ERROR("Problem resizing BAR0 (%d).", r);
833
834 pci_assign_unassigned_bus_resources(adev->pdev->bus);
835
836 /* When the doorbell or fb BAR isn't available we have no chance of
837 * using the device.
838 */
06ec9070 839 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
840 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
841 return -ENODEV;
842
843 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
844
845 return 0;
846}
a05502e5 847
d38ceaf9
AD
848/*
849 * GPU helpers function.
850 */
851/**
39c640c0 852 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
853 *
854 * @adev: amdgpu_device pointer
855 *
c836fec5
JQ
856 * Check if the asic has been initialized (all asics) at driver startup
857 * or post is needed if hw reset is performed.
858 * Returns true if need or false if not.
d38ceaf9 859 */
39c640c0 860bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
861{
862 uint32_t reg;
863
bec86378
ML
864 if (amdgpu_sriov_vf(adev))
865 return false;
866
867 if (amdgpu_passthrough(adev)) {
1da2c326
ML
868 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
869 * some old smc fw still need driver do vPost otherwise gpu hang, while
870 * those smc fw version above 22.15 doesn't have this flaw, so we force
871 * vpost executed for smc version below 22.15
bec86378
ML
872 */
873 if (adev->asic_type == CHIP_FIJI) {
874 int err;
875 uint32_t fw_ver;
876 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
877 /* force vPost if error occured */
878 if (err)
879 return true;
880
881 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
882 if (fw_ver < 0x00160e00)
883 return true;
bec86378 884 }
bec86378 885 }
91fe77eb 886
887 if (adev->has_hw_reset) {
888 adev->has_hw_reset = false;
889 return true;
890 }
891
892 /* bios scratch used on CIK+ */
893 if (adev->asic_type >= CHIP_BONAIRE)
894 return amdgpu_atombios_scratch_need_asic_init(adev);
895
896 /* check MEM_SIZE for older asics */
897 reg = amdgpu_asic_get_config_memsize(adev);
898
899 if ((reg != 0) && (reg != 0xffffffff))
900 return false;
901
902 return true;
bec86378
ML
903}
904
d38ceaf9
AD
905/* if we get transitioned to only one device, take VGA back */
906/**
06ec9070 907 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
908 *
909 * @cookie: amdgpu_device pointer
910 * @state: enable/disable vga decode
911 *
912 * Enable/disable vga decode (all asics).
913 * Returns VGA resource flags.
914 */
06ec9070 915static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
916{
917 struct amdgpu_device *adev = cookie;
918 amdgpu_asic_set_vga_state(adev, state);
919 if (state)
920 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
921 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
922 else
923 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
924}
925
e3ecdffa
AD
926/**
927 * amdgpu_device_check_block_size - validate the vm block size
928 *
929 * @adev: amdgpu_device pointer
930 *
931 * Validates the vm block size specified via module parameter.
932 * The vm block size defines number of bits in page table versus page directory,
933 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
934 * page table and the remaining bits are in the page directory.
935 */
06ec9070 936static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
937{
938 /* defines number of bits in page table versus page directory,
939 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
940 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
941 if (amdgpu_vm_block_size == -1)
942 return;
a1adf8be 943
bab4fee7 944 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
945 dev_warn(adev->dev, "VM page table size (%d) too small\n",
946 amdgpu_vm_block_size);
97489129 947 amdgpu_vm_block_size = -1;
a1adf8be 948 }
a1adf8be
CZ
949}
950
e3ecdffa
AD
951/**
952 * amdgpu_device_check_vm_size - validate the vm size
953 *
954 * @adev: amdgpu_device pointer
955 *
956 * Validates the vm size in GB specified via module parameter.
957 * The VM size is the size of the GPU virtual memory space in GB.
958 */
06ec9070 959static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 960{
64dab074
AD
961 /* no need to check the default value */
962 if (amdgpu_vm_size == -1)
963 return;
964
83ca145d
ZJ
965 if (amdgpu_vm_size < 1) {
966 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
967 amdgpu_vm_size);
f3368128 968 amdgpu_vm_size = -1;
83ca145d 969 }
83ca145d
ZJ
970}
971
7951e376
RZ
972static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
973{
974 struct sysinfo si;
975 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
976 uint64_t total_memory;
977 uint64_t dram_size_seven_GB = 0x1B8000000;
978 uint64_t dram_size_three_GB = 0xB8000000;
979
980 if (amdgpu_smu_memory_pool_size == 0)
981 return;
982
983 if (!is_os_64) {
984 DRM_WARN("Not 64-bit OS, feature not supported\n");
985 goto def_value;
986 }
987 si_meminfo(&si);
988 total_memory = (uint64_t)si.totalram * si.mem_unit;
989
990 if ((amdgpu_smu_memory_pool_size == 1) ||
991 (amdgpu_smu_memory_pool_size == 2)) {
992 if (total_memory < dram_size_three_GB)
993 goto def_value1;
994 } else if ((amdgpu_smu_memory_pool_size == 4) ||
995 (amdgpu_smu_memory_pool_size == 8)) {
996 if (total_memory < dram_size_seven_GB)
997 goto def_value1;
998 } else {
999 DRM_WARN("Smu memory pool size not supported\n");
1000 goto def_value;
1001 }
1002 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1003
1004 return;
1005
1006def_value1:
1007 DRM_WARN("No enough system memory\n");
1008def_value:
1009 adev->pm.smu_prv_buffer_size = 0;
1010}
1011
d38ceaf9 1012/**
06ec9070 1013 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1014 *
1015 * @adev: amdgpu_device pointer
1016 *
1017 * Validates certain module parameters and updates
1018 * the associated values used by the driver (all asics).
1019 */
912dfc84 1020static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1021{
912dfc84
EQ
1022 int ret = 0;
1023
5b011235
CZ
1024 if (amdgpu_sched_jobs < 4) {
1025 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1026 amdgpu_sched_jobs);
1027 amdgpu_sched_jobs = 4;
76117507 1028 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1029 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1030 amdgpu_sched_jobs);
1031 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1032 }
d38ceaf9 1033
83e74db6 1034 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1035 /* gart size must be greater or equal to 32M */
1036 dev_warn(adev->dev, "gart size (%d) too small\n",
1037 amdgpu_gart_size);
83e74db6 1038 amdgpu_gart_size = -1;
d38ceaf9
AD
1039 }
1040
36d38372 1041 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1042 /* gtt size must be greater or equal to 32M */
36d38372
CK
1043 dev_warn(adev->dev, "gtt size (%d) too small\n",
1044 amdgpu_gtt_size);
1045 amdgpu_gtt_size = -1;
d38ceaf9
AD
1046 }
1047
d07f14be
RH
1048 /* valid range is between 4 and 9 inclusive */
1049 if (amdgpu_vm_fragment_size != -1 &&
1050 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1051 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1052 amdgpu_vm_fragment_size = -1;
1053 }
1054
7951e376
RZ
1055 amdgpu_device_check_smu_prv_buffer_size(adev);
1056
06ec9070 1057 amdgpu_device_check_vm_size(adev);
d38ceaf9 1058
06ec9070 1059 amdgpu_device_check_block_size(adev);
6a7f76e7 1060
912dfc84
EQ
1061 ret = amdgpu_device_get_job_timeout_settings(adev);
1062 if (ret) {
1063 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
1064 return ret;
8854695a 1065 }
19aede77
AD
1066
1067 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1068
1069 return ret;
d38ceaf9
AD
1070}
1071
1072/**
1073 * amdgpu_switcheroo_set_state - set switcheroo state
1074 *
1075 * @pdev: pci dev pointer
1694467b 1076 * @state: vga_switcheroo state
d38ceaf9
AD
1077 *
1078 * Callback for the switcheroo driver. Suspends or resumes the
1079 * the asics before or after it is powered up using ACPI methods.
1080 */
1081static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1082{
1083 struct drm_device *dev = pci_get_drvdata(pdev);
1084
1085 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1086 return;
1087
1088 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1089 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1090 /* don't suspend or resume card normally */
1091 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1092
810ddc3a 1093 amdgpu_device_resume(dev, true, true);
d38ceaf9 1094
d38ceaf9
AD
1095 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1096 drm_kms_helper_poll_enable(dev);
1097 } else {
7ca85295 1098 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1099 drm_kms_helper_poll_disable(dev);
1100 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1101 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1102 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1103 }
1104}
1105
1106/**
1107 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1108 *
1109 * @pdev: pci dev pointer
1110 *
1111 * Callback for the switcheroo driver. Check of the switcheroo
1112 * state can be changed.
1113 * Returns true if the state can be changed, false if not.
1114 */
1115static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1116{
1117 struct drm_device *dev = pci_get_drvdata(pdev);
1118
1119 /*
1120 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1121 * locking inversion with the driver load path. And the access here is
1122 * completely racy anyway. So don't bother with locking for now.
1123 */
1124 return dev->open_count == 0;
1125}
1126
1127static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1128 .set_gpu_state = amdgpu_switcheroo_set_state,
1129 .reprobe = NULL,
1130 .can_switch = amdgpu_switcheroo_can_switch,
1131};
1132
e3ecdffa
AD
1133/**
1134 * amdgpu_device_ip_set_clockgating_state - set the CG state
1135 *
87e3f136 1136 * @dev: amdgpu_device pointer
e3ecdffa
AD
1137 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1138 * @state: clockgating state (gate or ungate)
1139 *
1140 * Sets the requested clockgating state for all instances of
1141 * the hardware IP specified.
1142 * Returns the error code from the last instance.
1143 */
43fa561f 1144int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1145 enum amd_ip_block_type block_type,
1146 enum amd_clockgating_state state)
d38ceaf9 1147{
43fa561f 1148 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1149 int i, r = 0;
1150
1151 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1152 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1153 continue;
c722865a
RZ
1154 if (adev->ip_blocks[i].version->type != block_type)
1155 continue;
1156 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1157 continue;
1158 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1159 (void *)adev, state);
1160 if (r)
1161 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1162 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1163 }
1164 return r;
1165}
1166
e3ecdffa
AD
1167/**
1168 * amdgpu_device_ip_set_powergating_state - set the PG state
1169 *
87e3f136 1170 * @dev: amdgpu_device pointer
e3ecdffa
AD
1171 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1172 * @state: powergating state (gate or ungate)
1173 *
1174 * Sets the requested powergating state for all instances of
1175 * the hardware IP specified.
1176 * Returns the error code from the last instance.
1177 */
43fa561f 1178int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1179 enum amd_ip_block_type block_type,
1180 enum amd_powergating_state state)
d38ceaf9 1181{
43fa561f 1182 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1183 int i, r = 0;
1184
1185 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1186 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1187 continue;
c722865a
RZ
1188 if (adev->ip_blocks[i].version->type != block_type)
1189 continue;
1190 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1191 continue;
1192 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1193 (void *)adev, state);
1194 if (r)
1195 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1196 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1197 }
1198 return r;
1199}
1200
e3ecdffa
AD
1201/**
1202 * amdgpu_device_ip_get_clockgating_state - get the CG state
1203 *
1204 * @adev: amdgpu_device pointer
1205 * @flags: clockgating feature flags
1206 *
1207 * Walks the list of IPs on the device and updates the clockgating
1208 * flags for each IP.
1209 * Updates @flags with the feature flags for each hardware IP where
1210 * clockgating is enabled.
1211 */
2990a1fc
AD
1212void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1213 u32 *flags)
6cb2d4e4
HR
1214{
1215 int i;
1216
1217 for (i = 0; i < adev->num_ip_blocks; i++) {
1218 if (!adev->ip_blocks[i].status.valid)
1219 continue;
1220 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1221 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1222 }
1223}
1224
e3ecdffa
AD
1225/**
1226 * amdgpu_device_ip_wait_for_idle - wait for idle
1227 *
1228 * @adev: amdgpu_device pointer
1229 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1230 *
1231 * Waits for the request hardware IP to be idle.
1232 * Returns 0 for success or a negative error code on failure.
1233 */
2990a1fc
AD
1234int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1235 enum amd_ip_block_type block_type)
5dbbb60b
AD
1236{
1237 int i, r;
1238
1239 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1240 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1241 continue;
a1255107
AD
1242 if (adev->ip_blocks[i].version->type == block_type) {
1243 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1244 if (r)
1245 return r;
1246 break;
1247 }
1248 }
1249 return 0;
1250
1251}
1252
e3ecdffa
AD
1253/**
1254 * amdgpu_device_ip_is_idle - is the hardware IP idle
1255 *
1256 * @adev: amdgpu_device pointer
1257 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1258 *
1259 * Check if the hardware IP is idle or not.
1260 * Returns true if it the IP is idle, false if not.
1261 */
2990a1fc
AD
1262bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1263 enum amd_ip_block_type block_type)
5dbbb60b
AD
1264{
1265 int i;
1266
1267 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1268 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1269 continue;
a1255107
AD
1270 if (adev->ip_blocks[i].version->type == block_type)
1271 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1272 }
1273 return true;
1274
1275}
1276
e3ecdffa
AD
1277/**
1278 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1279 *
1280 * @adev: amdgpu_device pointer
87e3f136 1281 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1282 *
1283 * Returns a pointer to the hardware IP block structure
1284 * if it exists for the asic, otherwise NULL.
1285 */
2990a1fc
AD
1286struct amdgpu_ip_block *
1287amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1288 enum amd_ip_block_type type)
d38ceaf9
AD
1289{
1290 int i;
1291
1292 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1293 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1294 return &adev->ip_blocks[i];
1295
1296 return NULL;
1297}
1298
1299/**
2990a1fc 1300 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1301 *
1302 * @adev: amdgpu_device pointer
5fc3aeeb 1303 * @type: enum amd_ip_block_type
d38ceaf9
AD
1304 * @major: major version
1305 * @minor: minor version
1306 *
1307 * return 0 if equal or greater
1308 * return 1 if smaller or the ip_block doesn't exist
1309 */
2990a1fc
AD
1310int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1311 enum amd_ip_block_type type,
1312 u32 major, u32 minor)
d38ceaf9 1313{
2990a1fc 1314 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1315
a1255107
AD
1316 if (ip_block && ((ip_block->version->major > major) ||
1317 ((ip_block->version->major == major) &&
1318 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1319 return 0;
1320
1321 return 1;
1322}
1323
a1255107 1324/**
2990a1fc 1325 * amdgpu_device_ip_block_add
a1255107
AD
1326 *
1327 * @adev: amdgpu_device pointer
1328 * @ip_block_version: pointer to the IP to add
1329 *
1330 * Adds the IP block driver information to the collection of IPs
1331 * on the asic.
1332 */
2990a1fc
AD
1333int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1334 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1335{
1336 if (!ip_block_version)
1337 return -EINVAL;
1338
e966a725 1339 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1340 ip_block_version->funcs->name);
1341
a1255107
AD
1342 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1343
1344 return 0;
1345}
1346
e3ecdffa
AD
1347/**
1348 * amdgpu_device_enable_virtual_display - enable virtual display feature
1349 *
1350 * @adev: amdgpu_device pointer
1351 *
1352 * Enabled the virtual display feature if the user has enabled it via
1353 * the module parameter virtual_display. This feature provides a virtual
1354 * display hardware on headless boards or in virtualized environments.
1355 * This function parses and validates the configuration string specified by
1356 * the user and configues the virtual display configuration (number of
1357 * virtual connectors, crtcs, etc.) specified.
1358 */
483ef985 1359static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1360{
1361 adev->enable_virtual_display = false;
1362
1363 if (amdgpu_virtual_display) {
1364 struct drm_device *ddev = adev->ddev;
1365 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1366 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1367
1368 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1369 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1370 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1371 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1372 if (!strcmp("all", pciaddname)
1373 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1374 long num_crtc;
1375 int res = -1;
1376
9accf2fd 1377 adev->enable_virtual_display = true;
0f66356d
ED
1378
1379 if (pciaddname_tmp)
1380 res = kstrtol(pciaddname_tmp, 10,
1381 &num_crtc);
1382
1383 if (!res) {
1384 if (num_crtc < 1)
1385 num_crtc = 1;
1386 if (num_crtc > 6)
1387 num_crtc = 6;
1388 adev->mode_info.num_crtc = num_crtc;
1389 } else {
1390 adev->mode_info.num_crtc = 1;
1391 }
9accf2fd
ED
1392 break;
1393 }
1394 }
1395
0f66356d
ED
1396 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1397 amdgpu_virtual_display, pci_address_name,
1398 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1399
1400 kfree(pciaddstr);
1401 }
1402}
1403
e3ecdffa
AD
1404/**
1405 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1406 *
1407 * @adev: amdgpu_device pointer
1408 *
1409 * Parses the asic configuration parameters specified in the gpu info
1410 * firmware and makes them availale to the driver for use in configuring
1411 * the asic.
1412 * Returns 0 on success, -EINVAL on failure.
1413 */
e2a75f88
AD
1414static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1415{
e2a75f88
AD
1416 const char *chip_name;
1417 char fw_name[30];
1418 int err;
1419 const struct gpu_info_firmware_header_v1_0 *hdr;
1420
ab4fe3e1
HR
1421 adev->firmware.gpu_info_fw = NULL;
1422
e2a75f88
AD
1423 switch (adev->asic_type) {
1424 case CHIP_TOPAZ:
1425 case CHIP_TONGA:
1426 case CHIP_FIJI:
e2a75f88 1427 case CHIP_POLARIS10:
cc07f18d 1428 case CHIP_POLARIS11:
e2a75f88 1429 case CHIP_POLARIS12:
cc07f18d 1430 case CHIP_VEGAM:
e2a75f88
AD
1431 case CHIP_CARRIZO:
1432 case CHIP_STONEY:
1433#ifdef CONFIG_DRM_AMDGPU_SI
1434 case CHIP_VERDE:
1435 case CHIP_TAHITI:
1436 case CHIP_PITCAIRN:
1437 case CHIP_OLAND:
1438 case CHIP_HAINAN:
1439#endif
1440#ifdef CONFIG_DRM_AMDGPU_CIK
1441 case CHIP_BONAIRE:
1442 case CHIP_HAWAII:
1443 case CHIP_KAVERI:
1444 case CHIP_KABINI:
1445 case CHIP_MULLINS:
1446#endif
27c0bc71 1447 case CHIP_VEGA20:
e2a75f88
AD
1448 default:
1449 return 0;
1450 case CHIP_VEGA10:
1451 chip_name = "vega10";
1452 break;
3f76dced
AD
1453 case CHIP_VEGA12:
1454 chip_name = "vega12";
1455 break;
2d2e5e7e 1456 case CHIP_RAVEN:
54c4d17e
FX
1457 if (adev->rev_id >= 8)
1458 chip_name = "raven2";
741deade
AD
1459 else if (adev->pdev->device == 0x15d8)
1460 chip_name = "picasso";
54c4d17e
FX
1461 else
1462 chip_name = "raven";
2d2e5e7e 1463 break;
65e60f6e
LM
1464 case CHIP_ARCTURUS:
1465 chip_name = "arcturus";
1466 break;
23c6268e
HR
1467 case CHIP_NAVI10:
1468 chip_name = "navi10";
1469 break;
ed42cfe1
XY
1470 case CHIP_NAVI14:
1471 chip_name = "navi14";
1472 break;
42b325e5
XY
1473 case CHIP_NAVI12:
1474 chip_name = "navi12";
1475 break;
e2a75f88
AD
1476 }
1477
1478 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1479 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1480 if (err) {
1481 dev_err(adev->dev,
1482 "Failed to load gpu_info firmware \"%s\"\n",
1483 fw_name);
1484 goto out;
1485 }
ab4fe3e1 1486 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1487 if (err) {
1488 dev_err(adev->dev,
1489 "Failed to validate gpu_info firmware \"%s\"\n",
1490 fw_name);
1491 goto out;
1492 }
1493
ab4fe3e1 1494 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1495 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1496
1497 switch (hdr->version_major) {
1498 case 1:
1499 {
1500 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1501 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1502 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1503
b5ab16bf
AD
1504 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1505 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1506 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1507 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1508 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1509 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1510 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1511 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1512 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1513 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1514 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1515 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1516 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1517 adev->gfx.cu_info.max_waves_per_simd =
1518 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1519 adev->gfx.cu_info.max_scratch_slots_per_cu =
1520 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1521 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1522 if (hdr->version_minor >= 1) {
35c2e910
HZ
1523 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1524 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1525 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1526 adev->gfx.config.num_sc_per_sh =
1527 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1528 adev->gfx.config.num_packer_per_sc =
1529 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1530 }
48321c3d
HW
1531#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1532 if (hdr->version_minor == 2) {
1533 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1534 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1535 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1536 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1537 }
1538#endif
e2a75f88
AD
1539 break;
1540 }
1541 default:
1542 dev_err(adev->dev,
1543 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1544 err = -EINVAL;
1545 goto out;
1546 }
1547out:
e2a75f88
AD
1548 return err;
1549}
1550
e3ecdffa
AD
1551/**
1552 * amdgpu_device_ip_early_init - run early init for hardware IPs
1553 *
1554 * @adev: amdgpu_device pointer
1555 *
1556 * Early initialization pass for hardware IPs. The hardware IPs that make
1557 * up each asic are discovered each IP's early_init callback is run. This
1558 * is the first stage in initializing the asic.
1559 * Returns 0 on success, negative error code on failure.
1560 */
06ec9070 1561static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1562{
aaa36a97 1563 int i, r;
d38ceaf9 1564
483ef985 1565 amdgpu_device_enable_virtual_display(adev);
a6be7570 1566
d38ceaf9 1567 switch (adev->asic_type) {
aaa36a97
AD
1568 case CHIP_TOPAZ:
1569 case CHIP_TONGA:
48299f95 1570 case CHIP_FIJI:
2cc0c0b5 1571 case CHIP_POLARIS10:
32cc7e53 1572 case CHIP_POLARIS11:
c4642a47 1573 case CHIP_POLARIS12:
32cc7e53 1574 case CHIP_VEGAM:
aaa36a97 1575 case CHIP_CARRIZO:
39bb0c92
SL
1576 case CHIP_STONEY:
1577 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1578 adev->family = AMDGPU_FAMILY_CZ;
1579 else
1580 adev->family = AMDGPU_FAMILY_VI;
1581
1582 r = vi_set_ip_blocks(adev);
1583 if (r)
1584 return r;
1585 break;
33f34802
KW
1586#ifdef CONFIG_DRM_AMDGPU_SI
1587 case CHIP_VERDE:
1588 case CHIP_TAHITI:
1589 case CHIP_PITCAIRN:
1590 case CHIP_OLAND:
1591 case CHIP_HAINAN:
295d0daf 1592 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1593 r = si_set_ip_blocks(adev);
1594 if (r)
1595 return r;
1596 break;
1597#endif
a2e73f56
AD
1598#ifdef CONFIG_DRM_AMDGPU_CIK
1599 case CHIP_BONAIRE:
1600 case CHIP_HAWAII:
1601 case CHIP_KAVERI:
1602 case CHIP_KABINI:
1603 case CHIP_MULLINS:
1604 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1605 adev->family = AMDGPU_FAMILY_CI;
1606 else
1607 adev->family = AMDGPU_FAMILY_KV;
1608
1609 r = cik_set_ip_blocks(adev);
1610 if (r)
1611 return r;
1612 break;
1613#endif
e48a3cd9
AD
1614 case CHIP_VEGA10:
1615 case CHIP_VEGA12:
e4bd8170 1616 case CHIP_VEGA20:
e48a3cd9 1617 case CHIP_RAVEN:
61cf44c1 1618 case CHIP_ARCTURUS:
741deade 1619 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1620 adev->family = AMDGPU_FAMILY_RV;
1621 else
1622 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1623
1624 r = soc15_set_ip_blocks(adev);
1625 if (r)
1626 return r;
1627 break;
0a5b8c7b 1628 case CHIP_NAVI10:
7ecb5cd4 1629 case CHIP_NAVI14:
4808cf9c 1630 case CHIP_NAVI12:
0a5b8c7b
HR
1631 adev->family = AMDGPU_FAMILY_NV;
1632
1633 r = nv_set_ip_blocks(adev);
1634 if (r)
1635 return r;
1636 break;
d38ceaf9
AD
1637 default:
1638 /* FIXME: not supported yet */
1639 return -EINVAL;
1640 }
1641
e2a75f88
AD
1642 r = amdgpu_device_parse_gpu_info_fw(adev);
1643 if (r)
1644 return r;
1645
1884734a 1646 amdgpu_amdkfd_device_probe(adev);
1647
3149d9da
XY
1648 if (amdgpu_sriov_vf(adev)) {
1649 r = amdgpu_virt_request_full_gpu(adev, true);
1650 if (r)
5ffa61c1 1651 return -EAGAIN;
3149d9da
XY
1652 }
1653
3b94fb10 1654 adev->pm.pp_feature = amdgpu_pp_feature_mask;
00544006
HR
1655 if (amdgpu_sriov_vf(adev))
1656 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1657
d38ceaf9
AD
1658 for (i = 0; i < adev->num_ip_blocks; i++) {
1659 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1660 DRM_ERROR("disabled ip block: %d <%s>\n",
1661 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1662 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1663 } else {
a1255107
AD
1664 if (adev->ip_blocks[i].version->funcs->early_init) {
1665 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1666 if (r == -ENOENT) {
a1255107 1667 adev->ip_blocks[i].status.valid = false;
2c1a2784 1668 } else if (r) {
a1255107
AD
1669 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1670 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1671 return r;
2c1a2784 1672 } else {
a1255107 1673 adev->ip_blocks[i].status.valid = true;
2c1a2784 1674 }
974e6b64 1675 } else {
a1255107 1676 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1677 }
d38ceaf9 1678 }
21a249ca
AD
1679 /* get the vbios after the asic_funcs are set up */
1680 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1681 /* Read BIOS */
1682 if (!amdgpu_get_bios(adev))
1683 return -EINVAL;
1684
1685 r = amdgpu_atombios_init(adev);
1686 if (r) {
1687 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1688 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1689 return r;
1690 }
1691 }
d38ceaf9
AD
1692 }
1693
395d1fb9
NH
1694 adev->cg_flags &= amdgpu_cg_mask;
1695 adev->pg_flags &= amdgpu_pg_mask;
1696
d38ceaf9
AD
1697 return 0;
1698}
1699
0a4f2520
RZ
1700static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1701{
1702 int i, r;
1703
1704 for (i = 0; i < adev->num_ip_blocks; i++) {
1705 if (!adev->ip_blocks[i].status.sw)
1706 continue;
1707 if (adev->ip_blocks[i].status.hw)
1708 continue;
1709 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1710 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1711 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1712 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1713 if (r) {
1714 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1715 adev->ip_blocks[i].version->funcs->name, r);
1716 return r;
1717 }
1718 adev->ip_blocks[i].status.hw = true;
1719 }
1720 }
1721
1722 return 0;
1723}
1724
1725static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1726{
1727 int i, r;
1728
1729 for (i = 0; i < adev->num_ip_blocks; i++) {
1730 if (!adev->ip_blocks[i].status.sw)
1731 continue;
1732 if (adev->ip_blocks[i].status.hw)
1733 continue;
1734 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1735 if (r) {
1736 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1737 adev->ip_blocks[i].version->funcs->name, r);
1738 return r;
1739 }
1740 adev->ip_blocks[i].status.hw = true;
1741 }
1742
1743 return 0;
1744}
1745
7a3e0bb2
RZ
1746static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1747{
1748 int r = 0;
1749 int i;
80f41f84 1750 uint32_t smu_version;
7a3e0bb2
RZ
1751
1752 if (adev->asic_type >= CHIP_VEGA10) {
1753 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1754 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1755 continue;
1756
1757 /* no need to do the fw loading again if already done*/
1758 if (adev->ip_blocks[i].status.hw == true)
1759 break;
1760
1761 if (adev->in_gpu_reset || adev->in_suspend) {
1762 r = adev->ip_blocks[i].version->funcs->resume(adev);
1763 if (r) {
1764 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1765 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1766 return r;
1767 }
1768 } else {
1769 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1770 if (r) {
1771 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1772 adev->ip_blocks[i].version->funcs->name, r);
1773 return r;
7a3e0bb2 1774 }
7a3e0bb2 1775 }
482f0e53
ML
1776
1777 adev->ip_blocks[i].status.hw = true;
1778 break;
7a3e0bb2
RZ
1779 }
1780 }
482f0e53 1781
80f41f84 1782 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1783
80f41f84 1784 return r;
7a3e0bb2
RZ
1785}
1786
e3ecdffa
AD
1787/**
1788 * amdgpu_device_ip_init - run init for hardware IPs
1789 *
1790 * @adev: amdgpu_device pointer
1791 *
1792 * Main initialization pass for hardware IPs. The list of all the hardware
1793 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1794 * are run. sw_init initializes the software state associated with each IP
1795 * and hw_init initializes the hardware associated with each IP.
1796 * Returns 0 on success, negative error code on failure.
1797 */
06ec9070 1798static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1799{
1800 int i, r;
1801
c030f2e4 1802 r = amdgpu_ras_init(adev);
1803 if (r)
1804 return r;
1805
d38ceaf9 1806 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1807 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1808 continue;
a1255107 1809 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1810 if (r) {
a1255107
AD
1811 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1812 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1813 goto init_failed;
2c1a2784 1814 }
a1255107 1815 adev->ip_blocks[i].status.sw = true;
bfca0289 1816
d38ceaf9 1817 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1818 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1819 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1820 if (r) {
1821 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1822 goto init_failed;
2c1a2784 1823 }
a1255107 1824 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1825 if (r) {
1826 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1827 goto init_failed;
2c1a2784 1828 }
06ec9070 1829 r = amdgpu_device_wb_init(adev);
2c1a2784 1830 if (r) {
06ec9070 1831 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1832 goto init_failed;
2c1a2784 1833 }
a1255107 1834 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1835
1836 /* right after GMC hw init, we create CSA */
f92d5c61 1837 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1838 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1839 AMDGPU_GEM_DOMAIN_VRAM,
1840 AMDGPU_CSA_SIZE);
2493664f
ML
1841 if (r) {
1842 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1843 goto init_failed;
2493664f
ML
1844 }
1845 }
d38ceaf9
AD
1846 }
1847 }
1848
533aed27
AG
1849 r = amdgpu_ib_pool_init(adev);
1850 if (r) {
1851 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1852 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1853 goto init_failed;
1854 }
1855
c8963ea4
RZ
1856 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1857 if (r)
72d3f592 1858 goto init_failed;
0a4f2520
RZ
1859
1860 r = amdgpu_device_ip_hw_init_phase1(adev);
1861 if (r)
72d3f592 1862 goto init_failed;
0a4f2520 1863
7a3e0bb2
RZ
1864 r = amdgpu_device_fw_loading(adev);
1865 if (r)
72d3f592 1866 goto init_failed;
7a3e0bb2 1867
0a4f2520
RZ
1868 r = amdgpu_device_ip_hw_init_phase2(adev);
1869 if (r)
72d3f592 1870 goto init_failed;
d38ceaf9 1871
3e2e2ab5
HZ
1872 if (adev->gmc.xgmi.num_physical_nodes > 1)
1873 amdgpu_xgmi_add_device(adev);
1884734a 1874 amdgpu_amdkfd_device_init(adev);
c6332b97 1875
72d3f592 1876init_failed:
d3c117e5 1877 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1878 if (!r)
1879 amdgpu_virt_init_data_exchange(adev);
c6332b97 1880 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1881 }
c6332b97 1882
72d3f592 1883 return r;
d38ceaf9
AD
1884}
1885
e3ecdffa
AD
1886/**
1887 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1888 *
1889 * @adev: amdgpu_device pointer
1890 *
1891 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1892 * this function before a GPU reset. If the value is retained after a
1893 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1894 */
06ec9070 1895static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1896{
1897 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1898}
1899
e3ecdffa
AD
1900/**
1901 * amdgpu_device_check_vram_lost - check if vram is valid
1902 *
1903 * @adev: amdgpu_device pointer
1904 *
1905 * Checks the reset magic value written to the gart pointer in VRAM.
1906 * The driver calls this after a GPU reset to see if the contents of
1907 * VRAM is lost or now.
1908 * returns true if vram is lost, false if not.
1909 */
06ec9070 1910static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1911{
1912 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1913 AMDGPU_RESET_MAGIC_NUM);
1914}
1915
e3ecdffa 1916/**
1112a46b 1917 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1918 *
1919 * @adev: amdgpu_device pointer
1920 *
e3ecdffa 1921 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1922 * set_clockgating_state callbacks are run.
1923 * Late initialization pass enabling clockgating for hardware IPs.
1924 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1925 * Returns 0 on success, negative error code on failure.
1926 */
fdd34271 1927
1112a46b
RZ
1928static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1929 enum amd_clockgating_state state)
d38ceaf9 1930{
1112a46b 1931 int i, j, r;
d38ceaf9 1932
4a2ba394
SL
1933 if (amdgpu_emu_mode == 1)
1934 return 0;
1935
1112a46b
RZ
1936 for (j = 0; j < adev->num_ip_blocks; j++) {
1937 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1938 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1939 continue;
4a446d55 1940 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1941 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1942 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1943 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1944 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1945 /* enable clockgating to save power */
a1255107 1946 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1947 state);
4a446d55
AD
1948 if (r) {
1949 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1950 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1951 return r;
1952 }
b0b00ff1 1953 }
d38ceaf9 1954 }
06b18f61 1955
c9f96fd5
RZ
1956 return 0;
1957}
1958
1112a46b 1959static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1960{
1112a46b 1961 int i, j, r;
06b18f61 1962
c9f96fd5
RZ
1963 if (amdgpu_emu_mode == 1)
1964 return 0;
1965
1112a46b
RZ
1966 for (j = 0; j < adev->num_ip_blocks; j++) {
1967 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1968 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1969 continue;
1970 /* skip CG for VCE/UVD, it's handled specially */
1971 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1972 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1973 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1974 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1975 /* enable powergating to save power */
1976 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1977 state);
c9f96fd5
RZ
1978 if (r) {
1979 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1980 adev->ip_blocks[i].version->funcs->name, r);
1981 return r;
1982 }
1983 }
1984 }
2dc80b00
S
1985 return 0;
1986}
1987
beff74bc
AD
1988static int amdgpu_device_enable_mgpu_fan_boost(void)
1989{
1990 struct amdgpu_gpu_instance *gpu_ins;
1991 struct amdgpu_device *adev;
1992 int i, ret = 0;
1993
1994 mutex_lock(&mgpu_info.mutex);
1995
1996 /*
1997 * MGPU fan boost feature should be enabled
1998 * only when there are two or more dGPUs in
1999 * the system
2000 */
2001 if (mgpu_info.num_dgpu < 2)
2002 goto out;
2003
2004 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2005 gpu_ins = &(mgpu_info.gpu_ins[i]);
2006 adev = gpu_ins->adev;
2007 if (!(adev->flags & AMD_IS_APU) &&
2008 !gpu_ins->mgpu_fan_enabled &&
2009 adev->powerplay.pp_funcs &&
2010 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2011 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2012 if (ret)
2013 break;
2014
2015 gpu_ins->mgpu_fan_enabled = 1;
2016 }
2017 }
2018
2019out:
2020 mutex_unlock(&mgpu_info.mutex);
2021
2022 return ret;
2023}
2024
e3ecdffa
AD
2025/**
2026 * amdgpu_device_ip_late_init - run late init for hardware IPs
2027 *
2028 * @adev: amdgpu_device pointer
2029 *
2030 * Late initialization pass for hardware IPs. The list of all the hardware
2031 * IPs that make up the asic is walked and the late_init callbacks are run.
2032 * late_init covers any special initialization that an IP requires
2033 * after all of the have been initialized or something that needs to happen
2034 * late in the init process.
2035 * Returns 0 on success, negative error code on failure.
2036 */
06ec9070 2037static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
2038{
2039 int i = 0, r;
2040
2041 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2042 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2043 continue;
2044 if (adev->ip_blocks[i].version->funcs->late_init) {
2045 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2046 if (r) {
2047 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2048 adev->ip_blocks[i].version->funcs->name, r);
2049 return r;
2050 }
2dc80b00 2051 }
73f847db 2052 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2053 }
2054
1112a46b
RZ
2055 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2056 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2057
06ec9070 2058 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2059
beff74bc
AD
2060 r = amdgpu_device_enable_mgpu_fan_boost();
2061 if (r)
2062 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2063
2064 /* set to low pstate by default */
2065 amdgpu_xgmi_set_pstate(adev, 0);
2066
d38ceaf9
AD
2067 return 0;
2068}
2069
e3ecdffa
AD
2070/**
2071 * amdgpu_device_ip_fini - run fini for hardware IPs
2072 *
2073 * @adev: amdgpu_device pointer
2074 *
2075 * Main teardown pass for hardware IPs. The list of all the hardware
2076 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2077 * are run. hw_fini tears down the hardware associated with each IP
2078 * and sw_fini tears down any software state associated with each IP.
2079 * Returns 0 on success, negative error code on failure.
2080 */
06ec9070 2081static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2082{
2083 int i, r;
2084
c030f2e4 2085 amdgpu_ras_pre_fini(adev);
2086
a82400b5
AG
2087 if (adev->gmc.xgmi.num_physical_nodes > 1)
2088 amdgpu_xgmi_remove_device(adev);
2089
1884734a 2090 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2091
2092 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2093 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2094
3e96dbfd
AD
2095 /* need to disable SMC first */
2096 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2097 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2098 continue;
fdd34271 2099 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2100 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2101 /* XXX handle errors */
2102 if (r) {
2103 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2104 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2105 }
a1255107 2106 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2107 break;
2108 }
2109 }
2110
d38ceaf9 2111 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2112 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2113 continue;
8201a67a 2114
a1255107 2115 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2116 /* XXX handle errors */
2c1a2784 2117 if (r) {
a1255107
AD
2118 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2119 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2120 }
8201a67a 2121
a1255107 2122 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2123 }
2124
9950cda2 2125
d38ceaf9 2126 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2127 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2128 continue;
c12aba3a
ML
2129
2130 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2131 amdgpu_ucode_free_bo(adev);
1e256e27 2132 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2133 amdgpu_device_wb_fini(adev);
2134 amdgpu_device_vram_scratch_fini(adev);
533aed27 2135 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2136 }
2137
a1255107 2138 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2139 /* XXX handle errors */
2c1a2784 2140 if (r) {
a1255107
AD
2141 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2142 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2143 }
a1255107
AD
2144 adev->ip_blocks[i].status.sw = false;
2145 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2146 }
2147
a6dcfd9c 2148 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2149 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2150 continue;
a1255107
AD
2151 if (adev->ip_blocks[i].version->funcs->late_fini)
2152 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2153 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2154 }
2155
c030f2e4 2156 amdgpu_ras_fini(adev);
2157
030308fc 2158 if (amdgpu_sriov_vf(adev))
24136135
ML
2159 if (amdgpu_virt_release_full_gpu(adev, false))
2160 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2161
d38ceaf9
AD
2162 return 0;
2163}
2164
e3ecdffa 2165/**
beff74bc 2166 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2167 *
1112a46b 2168 * @work: work_struct.
e3ecdffa 2169 */
beff74bc 2170static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2171{
2172 struct amdgpu_device *adev =
beff74bc 2173 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2174 int r;
2175
2176 r = amdgpu_ib_ring_tests(adev);
2177 if (r)
2178 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2179}
2180
1e317b99
RZ
2181static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2182{
2183 struct amdgpu_device *adev =
2184 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2185
2186 mutex_lock(&adev->gfx.gfx_off_mutex);
2187 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2188 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2189 adev->gfx.gfx_off_state = true;
2190 }
2191 mutex_unlock(&adev->gfx.gfx_off_mutex);
2192}
2193
e3ecdffa 2194/**
e7854a03 2195 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2196 *
2197 * @adev: amdgpu_device pointer
2198 *
2199 * Main suspend function for hardware IPs. The list of all the hardware
2200 * IPs that make up the asic is walked, clockgating is disabled and the
2201 * suspend callbacks are run. suspend puts the hardware and software state
2202 * in each IP into a state suitable for suspend.
2203 * Returns 0 on success, negative error code on failure.
2204 */
e7854a03
AD
2205static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2206{
2207 int i, r;
2208
05df1f01 2209 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2210 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2211
e7854a03
AD
2212 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2213 if (!adev->ip_blocks[i].status.valid)
2214 continue;
2215 /* displays are handled separately */
2216 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2217 /* XXX handle errors */
2218 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2219 /* XXX handle errors */
2220 if (r) {
2221 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2222 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2223 return r;
e7854a03 2224 }
482f0e53 2225 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2226 }
2227 }
2228
e7854a03
AD
2229 return 0;
2230}
2231
2232/**
2233 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2234 *
2235 * @adev: amdgpu_device pointer
2236 *
2237 * Main suspend function for hardware IPs. The list of all the hardware
2238 * IPs that make up the asic is walked, clockgating is disabled and the
2239 * suspend callbacks are run. suspend puts the hardware and software state
2240 * in each IP into a state suitable for suspend.
2241 * Returns 0 on success, negative error code on failure.
2242 */
2243static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2244{
2245 int i, r;
2246
2247 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2248 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2249 continue;
e7854a03
AD
2250 /* displays are handled in phase1 */
2251 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2252 continue;
d38ceaf9 2253 /* XXX handle errors */
a1255107 2254 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2255 /* XXX handle errors */
2c1a2784 2256 if (r) {
a1255107
AD
2257 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2258 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2259 }
a3a09142
AD
2260 /* handle putting the SMC in the appropriate state */
2261 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2262 if (is_support_sw_smu(adev)) {
2263 /* todo */
2264 } else if (adev->powerplay.pp_funcs &&
482f0e53 2265 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2266 r = adev->powerplay.pp_funcs->set_mp1_state(
2267 adev->powerplay.pp_handle,
2268 adev->mp1_state);
2269 if (r) {
2270 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2271 adev->mp1_state, r);
482f0e53 2272 return r;
a3a09142 2273 }
482f0e53 2274 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2275 }
2276 }
d38ceaf9
AD
2277 }
2278
2279 return 0;
2280}
2281
e7854a03
AD
2282/**
2283 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2284 *
2285 * @adev: amdgpu_device pointer
2286 *
2287 * Main suspend function for hardware IPs. The list of all the hardware
2288 * IPs that make up the asic is walked, clockgating is disabled and the
2289 * suspend callbacks are run. suspend puts the hardware and software state
2290 * in each IP into a state suitable for suspend.
2291 * Returns 0 on success, negative error code on failure.
2292 */
2293int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2294{
2295 int r;
2296
e7819644
YT
2297 if (amdgpu_sriov_vf(adev))
2298 amdgpu_virt_request_full_gpu(adev, false);
2299
e7854a03
AD
2300 r = amdgpu_device_ip_suspend_phase1(adev);
2301 if (r)
2302 return r;
2303 r = amdgpu_device_ip_suspend_phase2(adev);
2304
e7819644
YT
2305 if (amdgpu_sriov_vf(adev))
2306 amdgpu_virt_release_full_gpu(adev, false);
2307
e7854a03
AD
2308 return r;
2309}
2310
06ec9070 2311static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2312{
2313 int i, r;
2314
2cb681b6
ML
2315 static enum amd_ip_block_type ip_order[] = {
2316 AMD_IP_BLOCK_TYPE_GMC,
2317 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2318 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2319 AMD_IP_BLOCK_TYPE_IH,
2320 };
a90ad3c2 2321
2cb681b6
ML
2322 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2323 int j;
2324 struct amdgpu_ip_block *block;
a90ad3c2 2325
2cb681b6
ML
2326 for (j = 0; j < adev->num_ip_blocks; j++) {
2327 block = &adev->ip_blocks[j];
2328
482f0e53 2329 block->status.hw = false;
2cb681b6
ML
2330 if (block->version->type != ip_order[i] ||
2331 !block->status.valid)
2332 continue;
2333
2334 r = block->version->funcs->hw_init(adev);
0aaeefcc 2335 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2336 if (r)
2337 return r;
482f0e53 2338 block->status.hw = true;
a90ad3c2
ML
2339 }
2340 }
2341
2342 return 0;
2343}
2344
06ec9070 2345static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2346{
2347 int i, r;
2348
2cb681b6
ML
2349 static enum amd_ip_block_type ip_order[] = {
2350 AMD_IP_BLOCK_TYPE_SMC,
2351 AMD_IP_BLOCK_TYPE_DCE,
2352 AMD_IP_BLOCK_TYPE_GFX,
2353 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2354 AMD_IP_BLOCK_TYPE_UVD,
2355 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2356 };
a90ad3c2 2357
2cb681b6
ML
2358 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2359 int j;
2360 struct amdgpu_ip_block *block;
a90ad3c2 2361
2cb681b6
ML
2362 for (j = 0; j < adev->num_ip_blocks; j++) {
2363 block = &adev->ip_blocks[j];
2364
2365 if (block->version->type != ip_order[i] ||
482f0e53
ML
2366 !block->status.valid ||
2367 block->status.hw)
2cb681b6
ML
2368 continue;
2369
2370 r = block->version->funcs->hw_init(adev);
0aaeefcc 2371 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2372 if (r)
2373 return r;
482f0e53 2374 block->status.hw = true;
a90ad3c2
ML
2375 }
2376 }
2377
2378 return 0;
2379}
2380
e3ecdffa
AD
2381/**
2382 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2383 *
2384 * @adev: amdgpu_device pointer
2385 *
2386 * First resume function for hardware IPs. The list of all the hardware
2387 * IPs that make up the asic is walked and the resume callbacks are run for
2388 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2389 * after a suspend and updates the software state as necessary. This
2390 * function is also used for restoring the GPU after a GPU reset.
2391 * Returns 0 on success, negative error code on failure.
2392 */
06ec9070 2393static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2394{
2395 int i, r;
2396
a90ad3c2 2397 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2398 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2399 continue;
a90ad3c2 2400 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2401 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2402 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2403
fcf0649f
CZ
2404 r = adev->ip_blocks[i].version->funcs->resume(adev);
2405 if (r) {
2406 DRM_ERROR("resume of IP block <%s> failed %d\n",
2407 adev->ip_blocks[i].version->funcs->name, r);
2408 return r;
2409 }
482f0e53 2410 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2411 }
2412 }
2413
2414 return 0;
2415}
2416
e3ecdffa
AD
2417/**
2418 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2419 *
2420 * @adev: amdgpu_device pointer
2421 *
2422 * First resume function for hardware IPs. The list of all the hardware
2423 * IPs that make up the asic is walked and the resume callbacks are run for
2424 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2425 * functional state after a suspend and updates the software state as
2426 * necessary. This function is also used for restoring the GPU after a GPU
2427 * reset.
2428 * Returns 0 on success, negative error code on failure.
2429 */
06ec9070 2430static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2431{
2432 int i, r;
2433
2434 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2435 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2436 continue;
fcf0649f 2437 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2438 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2439 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2440 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2441 continue;
a1255107 2442 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2443 if (r) {
a1255107
AD
2444 DRM_ERROR("resume of IP block <%s> failed %d\n",
2445 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2446 return r;
2c1a2784 2447 }
482f0e53 2448 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2449 }
2450
2451 return 0;
2452}
2453
e3ecdffa
AD
2454/**
2455 * amdgpu_device_ip_resume - run resume for hardware IPs
2456 *
2457 * @adev: amdgpu_device pointer
2458 *
2459 * Main resume function for hardware IPs. The hardware IPs
2460 * are split into two resume functions because they are
2461 * are also used in in recovering from a GPU reset and some additional
2462 * steps need to be take between them. In this case (S3/S4) they are
2463 * run sequentially.
2464 * Returns 0 on success, negative error code on failure.
2465 */
06ec9070 2466static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2467{
2468 int r;
2469
06ec9070 2470 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2471 if (r)
2472 return r;
7a3e0bb2
RZ
2473
2474 r = amdgpu_device_fw_loading(adev);
2475 if (r)
2476 return r;
2477
06ec9070 2478 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2479
2480 return r;
2481}
2482
e3ecdffa
AD
2483/**
2484 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2485 *
2486 * @adev: amdgpu_device pointer
2487 *
2488 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2489 */
4e99a44e 2490static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2491{
6867e1b5
ML
2492 if (amdgpu_sriov_vf(adev)) {
2493 if (adev->is_atom_fw) {
2494 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2495 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2496 } else {
2497 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2498 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2499 }
2500
2501 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2502 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2503 }
048765ad
AR
2504}
2505
e3ecdffa
AD
2506/**
2507 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2508 *
2509 * @asic_type: AMD asic type
2510 *
2511 * Check if there is DC (new modesetting infrastructre) support for an asic.
2512 * returns true if DC has support, false if not.
2513 */
4562236b
HW
2514bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2515{
2516 switch (asic_type) {
2517#if defined(CONFIG_DRM_AMD_DC)
2518 case CHIP_BONAIRE:
0d6fbccb 2519 case CHIP_KAVERI:
367e6687
AD
2520 case CHIP_KABINI:
2521 case CHIP_MULLINS:
d9fda248
HW
2522 /*
2523 * We have systems in the wild with these ASICs that require
2524 * LVDS and VGA support which is not supported with DC.
2525 *
2526 * Fallback to the non-DC driver here by default so as not to
2527 * cause regressions.
2528 */
2529 return amdgpu_dc > 0;
2530 case CHIP_HAWAII:
4562236b
HW
2531 case CHIP_CARRIZO:
2532 case CHIP_STONEY:
4562236b 2533 case CHIP_POLARIS10:
675fd32b 2534 case CHIP_POLARIS11:
2c8ad2d5 2535 case CHIP_POLARIS12:
675fd32b 2536 case CHIP_VEGAM:
4562236b
HW
2537 case CHIP_TONGA:
2538 case CHIP_FIJI:
42f8ffa1 2539 case CHIP_VEGA10:
dca7b401 2540 case CHIP_VEGA12:
c6034aa2 2541 case CHIP_VEGA20:
dc37a9a0 2542#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2543 case CHIP_RAVEN:
b4f199c7
HW
2544#endif
2545#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2546 case CHIP_NAVI10:
8fceceb6 2547 case CHIP_NAVI14:
42f8ffa1 2548#endif
fd187853 2549 return amdgpu_dc != 0;
4562236b
HW
2550#endif
2551 default:
2552 return false;
2553 }
2554}
2555
2556/**
2557 * amdgpu_device_has_dc_support - check if dc is supported
2558 *
2559 * @adev: amdgpu_device_pointer
2560 *
2561 * Returns true for supported, false for not supported
2562 */
2563bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2564{
2555039d
XY
2565 if (amdgpu_sriov_vf(adev))
2566 return false;
2567
4562236b
HW
2568 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2569}
2570
d4535e2c
AG
2571
2572static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2573{
2574 struct amdgpu_device *adev =
2575 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2576
2577 adev->asic_reset_res = amdgpu_asic_reset(adev);
2578 if (adev->asic_reset_res)
fed184e9 2579 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2580 adev->asic_reset_res, adev->ddev->unique);
2581}
2582
2583
d38ceaf9
AD
2584/**
2585 * amdgpu_device_init - initialize the driver
2586 *
2587 * @adev: amdgpu_device pointer
87e3f136 2588 * @ddev: drm dev pointer
d38ceaf9
AD
2589 * @pdev: pci dev pointer
2590 * @flags: driver flags
2591 *
2592 * Initializes the driver info and hw (all asics).
2593 * Returns 0 for success or an error on failure.
2594 * Called at driver startup.
2595 */
2596int amdgpu_device_init(struct amdgpu_device *adev,
2597 struct drm_device *ddev,
2598 struct pci_dev *pdev,
2599 uint32_t flags)
2600{
2601 int r, i;
2602 bool runtime = false;
95844d20 2603 u32 max_MBps;
d38ceaf9
AD
2604
2605 adev->shutdown = false;
2606 adev->dev = &pdev->dev;
2607 adev->ddev = ddev;
2608 adev->pdev = pdev;
2609 adev->flags = flags;
2f7d10b3 2610 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2611 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2612 if (amdgpu_emu_mode == 1)
2613 adev->usec_timeout *= 2;
770d13b1 2614 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2615 adev->accel_working = false;
2616 adev->num_rings = 0;
2617 adev->mman.buffer_funcs = NULL;
2618 adev->mman.buffer_funcs_ring = NULL;
2619 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2620 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2621 adev->gmc.gmc_funcs = NULL;
f54d1867 2622 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2623 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2624
2625 adev->smc_rreg = &amdgpu_invalid_rreg;
2626 adev->smc_wreg = &amdgpu_invalid_wreg;
2627 adev->pcie_rreg = &amdgpu_invalid_rreg;
2628 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2629 adev->pciep_rreg = &amdgpu_invalid_rreg;
2630 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2631 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2632 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2633 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2634 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2635 adev->didt_rreg = &amdgpu_invalid_rreg;
2636 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2637 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2638 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2639 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2640 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2641
3e39ab90
AD
2642 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2643 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2644 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2645
2646 /* mutex initialization are all done here so we
2647 * can recall function without having locking issues */
d38ceaf9 2648 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2649 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2650 mutex_init(&adev->pm.mutex);
2651 mutex_init(&adev->gfx.gpu_clock_mutex);
2652 mutex_init(&adev->srbm_mutex);
b8866c26 2653 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2654 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2655 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2656 mutex_init(&adev->mn_lock);
e23b74aa 2657 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2658 hash_init(adev->mn_hash);
13a752e3 2659 mutex_init(&adev->lock_reset);
bb5a2bdf 2660 mutex_init(&adev->virt.dpm_mutex);
32eaeae0 2661 mutex_init(&adev->psp.mutex);
d38ceaf9 2662
912dfc84
EQ
2663 r = amdgpu_device_check_arguments(adev);
2664 if (r)
2665 return r;
d38ceaf9 2666
d38ceaf9
AD
2667 spin_lock_init(&adev->mmio_idx_lock);
2668 spin_lock_init(&adev->smc_idx_lock);
2669 spin_lock_init(&adev->pcie_idx_lock);
2670 spin_lock_init(&adev->uvd_ctx_idx_lock);
2671 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2672 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2673 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2674 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2675 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2676
0c4e7fa5
CZ
2677 INIT_LIST_HEAD(&adev->shadow_list);
2678 mutex_init(&adev->shadow_list_lock);
2679
795f2813
AR
2680 INIT_LIST_HEAD(&adev->ring_lru_list);
2681 spin_lock_init(&adev->ring_lru_list_lock);
2682
beff74bc
AD
2683 INIT_DELAYED_WORK(&adev->delayed_init_work,
2684 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2685 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2686 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2687
d4535e2c
AG
2688 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2689
d23ee13f 2690 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2691 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2692
0fa49558
AX
2693 /* Registers mapping */
2694 /* TODO: block userspace mapping of io register */
da69c161
KW
2695 if (adev->asic_type >= CHIP_BONAIRE) {
2696 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2697 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2698 } else {
2699 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2700 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2701 }
d38ceaf9 2702
d38ceaf9
AD
2703 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2704 if (adev->rmmio == NULL) {
2705 return -ENOMEM;
2706 }
2707 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2708 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2709
d38ceaf9
AD
2710 /* io port mapping */
2711 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2712 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2713 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2714 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2715 break;
2716 }
2717 }
2718 if (adev->rio_mem == NULL)
b64a18c5 2719 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2720
b2109d8e
JX
2721 /* enable PCIE atomic ops */
2722 r = pci_enable_atomic_ops_to_root(adev->pdev,
2723 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2724 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2725 if (r) {
2726 adev->have_atomics_support = false;
2727 DRM_INFO("PCIE atomic ops is not supported\n");
2728 } else {
2729 adev->have_atomics_support = true;
2730 }
2731
5494d864
AD
2732 amdgpu_device_get_pcie_info(adev);
2733
b239c017
JX
2734 if (amdgpu_mcbp)
2735 DRM_INFO("MCBP is enabled\n");
2736
5f84cc63
JX
2737 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2738 adev->enable_mes = true;
2739
f54eeab4 2740 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2741 r = amdgpu_discovery_init(adev);
2742 if (r) {
2743 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2744 return r;
2745 }
2746 }
2747
d38ceaf9 2748 /* early init functions */
06ec9070 2749 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2750 if (r)
2751 return r;
2752
6585661d
OZ
2753 /* doorbell bar mapping and doorbell index init*/
2754 amdgpu_device_doorbell_init(adev);
2755
d38ceaf9
AD
2756 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2757 /* this will fail for cards that aren't VGA class devices, just
2758 * ignore it */
06ec9070 2759 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2760
e9bef455 2761 if (amdgpu_device_is_px(ddev))
d38ceaf9 2762 runtime = true;
84c8b22e
LW
2763 if (!pci_is_thunderbolt_attached(adev->pdev))
2764 vga_switcheroo_register_client(adev->pdev,
2765 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2766 if (runtime)
2767 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2768
9475a943
SL
2769 if (amdgpu_emu_mode == 1) {
2770 /* post the asic on emulation mode */
2771 emu_soc_asic_init(adev);
bfca0289 2772 goto fence_driver_init;
9475a943 2773 }
bfca0289 2774
4e99a44e
ML
2775 /* detect if we are with an SRIOV vbios */
2776 amdgpu_device_detect_sriov_bios(adev);
048765ad 2777
95e8e59e
AD
2778 /* check if we need to reset the asic
2779 * E.g., driver was not cleanly unloaded previously, etc.
2780 */
f14899fd 2781 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2782 r = amdgpu_asic_reset(adev);
2783 if (r) {
2784 dev_err(adev->dev, "asic reset on init failed\n");
2785 goto failed;
2786 }
2787 }
2788
d38ceaf9 2789 /* Post card if necessary */
39c640c0 2790 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2791 if (!adev->bios) {
bec86378 2792 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2793 r = -EINVAL;
2794 goto failed;
d38ceaf9 2795 }
bec86378 2796 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2797 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2798 if (r) {
2799 dev_err(adev->dev, "gpu post error!\n");
2800 goto failed;
2801 }
d38ceaf9
AD
2802 }
2803
88b64e95
AD
2804 if (adev->is_atom_fw) {
2805 /* Initialize clocks */
2806 r = amdgpu_atomfirmware_get_clock_info(adev);
2807 if (r) {
2808 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2809 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2810 goto failed;
2811 }
2812 } else {
a5bde2f9
AD
2813 /* Initialize clocks */
2814 r = amdgpu_atombios_get_clock_info(adev);
2815 if (r) {
2816 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2817 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2818 goto failed;
a5bde2f9
AD
2819 }
2820 /* init i2c buses */
4562236b
HW
2821 if (!amdgpu_device_has_dc_support(adev))
2822 amdgpu_atombios_i2c_init(adev);
2c1a2784 2823 }
d38ceaf9 2824
bfca0289 2825fence_driver_init:
d38ceaf9
AD
2826 /* Fence driver */
2827 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2828 if (r) {
2829 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2830 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2831 goto failed;
2c1a2784 2832 }
d38ceaf9
AD
2833
2834 /* init the mode config */
2835 drm_mode_config_init(adev->ddev);
2836
06ec9070 2837 r = amdgpu_device_ip_init(adev);
d38ceaf9 2838 if (r) {
8840a387 2839 /* failed in exclusive mode due to timeout */
2840 if (amdgpu_sriov_vf(adev) &&
2841 !amdgpu_sriov_runtime(adev) &&
2842 amdgpu_virt_mmio_blocked(adev) &&
2843 !amdgpu_virt_wait_reset(adev)) {
2844 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2845 /* Don't send request since VF is inactive. */
2846 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2847 adev->virt.ops = NULL;
8840a387 2848 r = -EAGAIN;
2849 goto failed;
2850 }
06ec9070 2851 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2852 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2853 if (amdgpu_virt_request_full_gpu(adev, false))
2854 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2855 goto failed;
d38ceaf9
AD
2856 }
2857
2858 adev->accel_working = true;
2859
e59c0205
AX
2860 amdgpu_vm_check_compute_bug(adev);
2861
95844d20
MO
2862 /* Initialize the buffer migration limit. */
2863 if (amdgpu_moverate >= 0)
2864 max_MBps = amdgpu_moverate;
2865 else
2866 max_MBps = 8; /* Allow 8 MB/s. */
2867 /* Get a log2 for easy divisions. */
2868 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2869
9bc92b9c
ML
2870 amdgpu_fbdev_init(adev);
2871
e9bc1bf7
YT
2872 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2873 amdgpu_pm_virt_sysfs_init(adev);
2874
d2f52ac8
RZ
2875 r = amdgpu_pm_sysfs_init(adev);
2876 if (r)
2877 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2878
5bb23532
OM
2879 r = amdgpu_ucode_sysfs_init(adev);
2880 if (r)
2881 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2882
75758255 2883 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2884 if (r)
d38ceaf9 2885 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2886
2887 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2888 if (r)
d38ceaf9 2889 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2890
50ab2533 2891 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2892 if (r)
50ab2533 2893 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2894
763efb6c 2895 r = amdgpu_debugfs_init(adev);
db95e218 2896 if (r)
763efb6c 2897 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2898
d38ceaf9
AD
2899 if ((amdgpu_testing & 1)) {
2900 if (adev->accel_working)
2901 amdgpu_test_moves(adev);
2902 else
2903 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2904 }
d38ceaf9
AD
2905 if (amdgpu_benchmarking) {
2906 if (adev->accel_working)
2907 amdgpu_benchmark(adev, amdgpu_benchmarking);
2908 else
2909 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2910 }
2911
2912 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2913 * explicit gating rather than handling it automatically.
2914 */
06ec9070 2915 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2916 if (r) {
06ec9070 2917 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2918 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2919 goto failed;
2c1a2784 2920 }
d38ceaf9 2921
108c6a63 2922 /* must succeed. */
511fdbc3 2923 amdgpu_ras_resume(adev);
108c6a63 2924
beff74bc
AD
2925 queue_delayed_work(system_wq, &adev->delayed_init_work,
2926 msecs_to_jiffies(AMDGPU_RESUME_MS));
2927
dcea6e65
KR
2928 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2929 if (r) {
2930 dev_err(adev->dev, "Could not create pcie_replay_count");
2931 return r;
2932 }
108c6a63 2933
d155bef0
AB
2934 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2935 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
2936 if (r)
2937 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2938
d38ceaf9 2939 return 0;
83ba126a
AD
2940
2941failed:
89041940 2942 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2943 if (runtime)
2944 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2945
83ba126a 2946 return r;
d38ceaf9
AD
2947}
2948
d38ceaf9
AD
2949/**
2950 * amdgpu_device_fini - tear down the driver
2951 *
2952 * @adev: amdgpu_device pointer
2953 *
2954 * Tear down the driver info (all asics).
2955 * Called at driver shutdown.
2956 */
2957void amdgpu_device_fini(struct amdgpu_device *adev)
2958{
2959 int r;
2960
2961 DRM_INFO("amdgpu: finishing device.\n");
2962 adev->shutdown = true;
e5b03032
ML
2963 /* disable all interrupts */
2964 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2965 if (adev->mode_info.mode_config_initialized){
2966 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2967 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2968 else
2969 drm_atomic_helper_shutdown(adev->ddev);
2970 }
d38ceaf9 2971 amdgpu_fence_driver_fini(adev);
58e955d9 2972 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2973 amdgpu_fbdev_fini(adev);
06ec9070 2974 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2975 if (adev->firmware.gpu_info_fw) {
2976 release_firmware(adev->firmware.gpu_info_fw);
2977 adev->firmware.gpu_info_fw = NULL;
2978 }
d38ceaf9 2979 adev->accel_working = false;
beff74bc 2980 cancel_delayed_work_sync(&adev->delayed_init_work);
d38ceaf9 2981 /* free i2c buses */
4562236b
HW
2982 if (!amdgpu_device_has_dc_support(adev))
2983 amdgpu_i2c_fini(adev);
bfca0289
SL
2984
2985 if (amdgpu_emu_mode != 1)
2986 amdgpu_atombios_fini(adev);
2987
d38ceaf9
AD
2988 kfree(adev->bios);
2989 adev->bios = NULL;
84c8b22e
LW
2990 if (!pci_is_thunderbolt_attached(adev->pdev))
2991 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2992 if (adev->flags & AMD_IS_PX)
2993 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2994 vga_client_register(adev->pdev, NULL, NULL, NULL);
2995 if (adev->rio_mem)
2996 pci_iounmap(adev->pdev, adev->rio_mem);
2997 adev->rio_mem = NULL;
2998 iounmap(adev->rmmio);
2999 adev->rmmio = NULL;
06ec9070 3000 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
3001 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
3002 amdgpu_pm_virt_sysfs_fini(adev);
3003
d38ceaf9 3004 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3005 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 3006 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3007 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3008 amdgpu_pmu_fini(adev);
6698a3d0 3009 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3010 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3011 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3012}
3013
3014
3015/*
3016 * Suspend & resume.
3017 */
3018/**
810ddc3a 3019 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3020 *
87e3f136
DP
3021 * @dev: drm dev pointer
3022 * @suspend: suspend state
3023 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3024 *
3025 * Puts the hw in the suspend state (all asics).
3026 * Returns 0 for success or an error on failure.
3027 * Called at driver suspend.
3028 */
810ddc3a 3029int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
3030{
3031 struct amdgpu_device *adev;
3032 struct drm_crtc *crtc;
3033 struct drm_connector *connector;
5ceb54c6 3034 int r;
d38ceaf9
AD
3035
3036 if (dev == NULL || dev->dev_private == NULL) {
3037 return -ENODEV;
3038 }
3039
3040 adev = dev->dev_private;
3041
3042 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3043 return 0;
3044
44779b43 3045 adev->in_suspend = true;
d38ceaf9
AD
3046 drm_kms_helper_poll_disable(dev);
3047
5f818173
S
3048 if (fbcon)
3049 amdgpu_fbdev_set_suspend(adev, 1);
3050
beff74bc 3051 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3052
4562236b
HW
3053 if (!amdgpu_device_has_dc_support(adev)) {
3054 /* turn off display hw */
3055 drm_modeset_lock_all(dev);
3056 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3057 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
3058 }
3059 drm_modeset_unlock_all(dev);
fe1053b7
AD
3060 /* unpin the front buffers and cursors */
3061 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3062 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3063 struct drm_framebuffer *fb = crtc->primary->fb;
3064 struct amdgpu_bo *robj;
3065
91334223 3066 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3067 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3068 r = amdgpu_bo_reserve(aobj, true);
3069 if (r == 0) {
3070 amdgpu_bo_unpin(aobj);
3071 amdgpu_bo_unreserve(aobj);
3072 }
756e6880 3073 }
756e6880 3074
fe1053b7
AD
3075 if (fb == NULL || fb->obj[0] == NULL) {
3076 continue;
3077 }
3078 robj = gem_to_amdgpu_bo(fb->obj[0]);
3079 /* don't unpin kernel fb objects */
3080 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3081 r = amdgpu_bo_reserve(robj, true);
3082 if (r == 0) {
3083 amdgpu_bo_unpin(robj);
3084 amdgpu_bo_unreserve(robj);
3085 }
d38ceaf9
AD
3086 }
3087 }
3088 }
fe1053b7
AD
3089
3090 amdgpu_amdkfd_suspend(adev);
3091
5e6932fe 3092 amdgpu_ras_suspend(adev);
3093
fe1053b7
AD
3094 r = amdgpu_device_ip_suspend_phase1(adev);
3095
d38ceaf9
AD
3096 /* evict vram memory */
3097 amdgpu_bo_evict_vram(adev);
3098
5ceb54c6 3099 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3100
fe1053b7 3101 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3102
a0a71e49
AD
3103 /* evict remaining vram memory
3104 * This second call to evict vram is to evict the gart page table
3105 * using the CPU.
3106 */
d38ceaf9
AD
3107 amdgpu_bo_evict_vram(adev);
3108
3109 pci_save_state(dev->pdev);
3110 if (suspend) {
3111 /* Shut down the device */
3112 pci_disable_device(dev->pdev);
3113 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 3114 } else {
3115 r = amdgpu_asic_reset(adev);
3116 if (r)
3117 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
3118 }
3119
d38ceaf9
AD
3120 return 0;
3121}
3122
3123/**
810ddc3a 3124 * amdgpu_device_resume - initiate device resume
d38ceaf9 3125 *
87e3f136
DP
3126 * @dev: drm dev pointer
3127 * @resume: resume state
3128 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3129 *
3130 * Bring the hw back to operating state (all asics).
3131 * Returns 0 for success or an error on failure.
3132 * Called at driver resume.
3133 */
810ddc3a 3134int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
3135{
3136 struct drm_connector *connector;
3137 struct amdgpu_device *adev = dev->dev_private;
756e6880 3138 struct drm_crtc *crtc;
03161a6e 3139 int r = 0;
d38ceaf9
AD
3140
3141 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3142 return 0;
3143
d38ceaf9
AD
3144 if (resume) {
3145 pci_set_power_state(dev->pdev, PCI_D0);
3146 pci_restore_state(dev->pdev);
74b0b157 3147 r = pci_enable_device(dev->pdev);
03161a6e 3148 if (r)
4d3b9ae5 3149 return r;
d38ceaf9
AD
3150 }
3151
3152 /* post card */
39c640c0 3153 if (amdgpu_device_need_post(adev)) {
74b0b157 3154 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3155 if (r)
3156 DRM_ERROR("amdgpu asic init failed\n");
3157 }
d38ceaf9 3158
06ec9070 3159 r = amdgpu_device_ip_resume(adev);
e6707218 3160 if (r) {
06ec9070 3161 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3162 return r;
e6707218 3163 }
5ceb54c6
AD
3164 amdgpu_fence_driver_resume(adev);
3165
d38ceaf9 3166
06ec9070 3167 r = amdgpu_device_ip_late_init(adev);
03161a6e 3168 if (r)
4d3b9ae5 3169 return r;
d38ceaf9 3170
beff74bc
AD
3171 queue_delayed_work(system_wq, &adev->delayed_init_work,
3172 msecs_to_jiffies(AMDGPU_RESUME_MS));
3173
fe1053b7
AD
3174 if (!amdgpu_device_has_dc_support(adev)) {
3175 /* pin cursors */
3176 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3177 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3178
91334223 3179 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3180 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3181 r = amdgpu_bo_reserve(aobj, true);
3182 if (r == 0) {
3183 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3184 if (r != 0)
3185 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3186 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3187 amdgpu_bo_unreserve(aobj);
3188 }
756e6880
AD
3189 }
3190 }
3191 }
ba997709
YZ
3192 r = amdgpu_amdkfd_resume(adev);
3193 if (r)
3194 return r;
756e6880 3195
96a5d8d4 3196 /* Make sure IB tests flushed */
beff74bc 3197 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3198
d38ceaf9
AD
3199 /* blat the mode back in */
3200 if (fbcon) {
4562236b
HW
3201 if (!amdgpu_device_has_dc_support(adev)) {
3202 /* pre DCE11 */
3203 drm_helper_resume_force_mode(dev);
3204
3205 /* turn on display hw */
3206 drm_modeset_lock_all(dev);
3207 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3208 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3209 }
3210 drm_modeset_unlock_all(dev);
d38ceaf9 3211 }
4d3b9ae5 3212 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3213 }
3214
3215 drm_kms_helper_poll_enable(dev);
23a1a9e5 3216
5e6932fe 3217 amdgpu_ras_resume(adev);
3218
23a1a9e5
L
3219 /*
3220 * Most of the connector probing functions try to acquire runtime pm
3221 * refs to ensure that the GPU is powered on when connector polling is
3222 * performed. Since we're calling this from a runtime PM callback,
3223 * trying to acquire rpm refs will cause us to deadlock.
3224 *
3225 * Since we're guaranteed to be holding the rpm lock, it's safe to
3226 * temporarily disable the rpm helpers so this doesn't deadlock us.
3227 */
3228#ifdef CONFIG_PM
3229 dev->dev->power.disable_depth++;
3230#endif
4562236b
HW
3231 if (!amdgpu_device_has_dc_support(adev))
3232 drm_helper_hpd_irq_event(dev);
3233 else
3234 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3235#ifdef CONFIG_PM
3236 dev->dev->power.disable_depth--;
3237#endif
44779b43
RZ
3238 adev->in_suspend = false;
3239
4d3b9ae5 3240 return 0;
d38ceaf9
AD
3241}
3242
e3ecdffa
AD
3243/**
3244 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3245 *
3246 * @adev: amdgpu_device pointer
3247 *
3248 * The list of all the hardware IPs that make up the asic is walked and
3249 * the check_soft_reset callbacks are run. check_soft_reset determines
3250 * if the asic is still hung or not.
3251 * Returns true if any of the IPs are still in a hung state, false if not.
3252 */
06ec9070 3253static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3254{
3255 int i;
3256 bool asic_hang = false;
3257
f993d628
ML
3258 if (amdgpu_sriov_vf(adev))
3259 return true;
3260
8bc04c29
AD
3261 if (amdgpu_asic_need_full_reset(adev))
3262 return true;
3263
63fbf42f 3264 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3265 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3266 continue;
a1255107
AD
3267 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3268 adev->ip_blocks[i].status.hang =
3269 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3270 if (adev->ip_blocks[i].status.hang) {
3271 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3272 asic_hang = true;
3273 }
3274 }
3275 return asic_hang;
3276}
3277
e3ecdffa
AD
3278/**
3279 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3280 *
3281 * @adev: amdgpu_device pointer
3282 *
3283 * The list of all the hardware IPs that make up the asic is walked and the
3284 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3285 * handles any IP specific hardware or software state changes that are
3286 * necessary for a soft reset to succeed.
3287 * Returns 0 on success, negative error code on failure.
3288 */
06ec9070 3289static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3290{
3291 int i, r = 0;
3292
3293 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3294 if (!adev->ip_blocks[i].status.valid)
d31a501e 3295 continue;
a1255107
AD
3296 if (adev->ip_blocks[i].status.hang &&
3297 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3298 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3299 if (r)
3300 return r;
3301 }
3302 }
3303
3304 return 0;
3305}
3306
e3ecdffa
AD
3307/**
3308 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3309 *
3310 * @adev: amdgpu_device pointer
3311 *
3312 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3313 * reset is necessary to recover.
3314 * Returns true if a full asic reset is required, false if not.
3315 */
06ec9070 3316static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3317{
da146d3b
AD
3318 int i;
3319
8bc04c29
AD
3320 if (amdgpu_asic_need_full_reset(adev))
3321 return true;
3322
da146d3b 3323 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3324 if (!adev->ip_blocks[i].status.valid)
da146d3b 3325 continue;
a1255107
AD
3326 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3327 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3328 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3329 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3330 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3331 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3332 DRM_INFO("Some block need full reset!\n");
3333 return true;
3334 }
3335 }
35d782fe
CZ
3336 }
3337 return false;
3338}
3339
e3ecdffa
AD
3340/**
3341 * amdgpu_device_ip_soft_reset - do a soft reset
3342 *
3343 * @adev: amdgpu_device pointer
3344 *
3345 * The list of all the hardware IPs that make up the asic is walked and the
3346 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3347 * IP specific hardware or software state changes that are necessary to soft
3348 * reset the IP.
3349 * Returns 0 on success, negative error code on failure.
3350 */
06ec9070 3351static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3352{
3353 int i, r = 0;
3354
3355 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3356 if (!adev->ip_blocks[i].status.valid)
35d782fe 3357 continue;
a1255107
AD
3358 if (adev->ip_blocks[i].status.hang &&
3359 adev->ip_blocks[i].version->funcs->soft_reset) {
3360 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3361 if (r)
3362 return r;
3363 }
3364 }
3365
3366 return 0;
3367}
3368
e3ecdffa
AD
3369/**
3370 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3371 *
3372 * @adev: amdgpu_device pointer
3373 *
3374 * The list of all the hardware IPs that make up the asic is walked and the
3375 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3376 * handles any IP specific hardware or software state changes that are
3377 * necessary after the IP has been soft reset.
3378 * Returns 0 on success, negative error code on failure.
3379 */
06ec9070 3380static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3381{
3382 int i, r = 0;
3383
3384 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3385 if (!adev->ip_blocks[i].status.valid)
35d782fe 3386 continue;
a1255107
AD
3387 if (adev->ip_blocks[i].status.hang &&
3388 adev->ip_blocks[i].version->funcs->post_soft_reset)
3389 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3390 if (r)
3391 return r;
3392 }
3393
3394 return 0;
3395}
3396
e3ecdffa 3397/**
c33adbc7 3398 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3399 *
3400 * @adev: amdgpu_device pointer
3401 *
3402 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3403 * restore things like GPUVM page tables after a GPU reset where
3404 * the contents of VRAM might be lost.
403009bf
CK
3405 *
3406 * Returns:
3407 * 0 on success, negative error code on failure.
e3ecdffa 3408 */
c33adbc7 3409static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3410{
c41d1cf6 3411 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3412 struct amdgpu_bo *shadow;
3413 long r = 1, tmo;
c41d1cf6
ML
3414
3415 if (amdgpu_sriov_runtime(adev))
b045d3af 3416 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3417 else
3418 tmo = msecs_to_jiffies(100);
3419
3420 DRM_INFO("recover vram bo from shadow start\n");
3421 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3422 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3423
3424 /* No need to recover an evicted BO */
3425 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3426 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3427 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3428 continue;
3429
3430 r = amdgpu_bo_restore_shadow(shadow, &next);
3431 if (r)
3432 break;
3433
c41d1cf6 3434 if (fence) {
1712fb1a 3435 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3436 dma_fence_put(fence);
3437 fence = next;
1712fb1a 3438 if (tmo == 0) {
3439 r = -ETIMEDOUT;
c41d1cf6 3440 break;
1712fb1a 3441 } else if (tmo < 0) {
3442 r = tmo;
3443 break;
3444 }
403009bf
CK
3445 } else {
3446 fence = next;
c41d1cf6 3447 }
c41d1cf6
ML
3448 }
3449 mutex_unlock(&adev->shadow_list_lock);
3450
403009bf
CK
3451 if (fence)
3452 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3453 dma_fence_put(fence);
3454
1712fb1a 3455 if (r < 0 || tmo <= 0) {
3456 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3457 return -EIO;
3458 }
c41d1cf6 3459
403009bf
CK
3460 DRM_INFO("recover vram bo from shadow done\n");
3461 return 0;
c41d1cf6
ML
3462}
3463
a90ad3c2 3464
e3ecdffa 3465/**
06ec9070 3466 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3467 *
3468 * @adev: amdgpu device pointer
87e3f136 3469 * @from_hypervisor: request from hypervisor
5740682e
ML
3470 *
3471 * do VF FLR and reinitialize Asic
3f48c681 3472 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3473 */
3474static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3475 bool from_hypervisor)
5740682e
ML
3476{
3477 int r;
3478
3479 if (from_hypervisor)
3480 r = amdgpu_virt_request_full_gpu(adev, true);
3481 else
3482 r = amdgpu_virt_reset_gpu(adev);
3483 if (r)
3484 return r;
a90ad3c2 3485
f81e8d53
WL
3486 amdgpu_amdkfd_pre_reset(adev);
3487
a90ad3c2 3488 /* Resume IP prior to SMC */
06ec9070 3489 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3490 if (r)
3491 goto error;
a90ad3c2
ML
3492
3493 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3494 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3495
7a3e0bb2
RZ
3496 r = amdgpu_device_fw_loading(adev);
3497 if (r)
3498 return r;
3499
a90ad3c2 3500 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3501 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3502 if (r)
3503 goto error;
a90ad3c2
ML
3504
3505 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3506 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3507 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3508
abc34253 3509error:
d3c117e5 3510 amdgpu_virt_init_data_exchange(adev);
abc34253 3511 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3512 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3513 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3514 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3515 }
3516
3517 return r;
3518}
3519
12938fad
CK
3520/**
3521 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3522 *
3523 * @adev: amdgpu device pointer
3524 *
3525 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3526 * a hung GPU.
3527 */
3528bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3529{
3530 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3531 DRM_INFO("Timeout, but no hardware hang detected.\n");
3532 return false;
3533 }
3534
3ba7b418
AG
3535 if (amdgpu_gpu_recovery == 0)
3536 goto disabled;
3537
3538 if (amdgpu_sriov_vf(adev))
3539 return true;
3540
3541 if (amdgpu_gpu_recovery == -1) {
3542 switch (adev->asic_type) {
fc42d47c
AG
3543 case CHIP_BONAIRE:
3544 case CHIP_HAWAII:
3ba7b418
AG
3545 case CHIP_TOPAZ:
3546 case CHIP_TONGA:
3547 case CHIP_FIJI:
3548 case CHIP_POLARIS10:
3549 case CHIP_POLARIS11:
3550 case CHIP_POLARIS12:
3551 case CHIP_VEGAM:
3552 case CHIP_VEGA20:
3553 case CHIP_VEGA10:
3554 case CHIP_VEGA12:
3555 break;
3556 default:
3557 goto disabled;
3558 }
12938fad
CK
3559 }
3560
3561 return true;
3ba7b418
AG
3562
3563disabled:
3564 DRM_INFO("GPU recovery disabled.\n");
3565 return false;
12938fad
CK
3566}
3567
5c6dd71e 3568
26bc5340
AG
3569static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3570 struct amdgpu_job *job,
3571 bool *need_full_reset_arg)
3572{
3573 int i, r = 0;
3574 bool need_full_reset = *need_full_reset_arg;
71182665 3575
71182665 3576 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3577 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3578 struct amdgpu_ring *ring = adev->rings[i];
3579
51687759 3580 if (!ring || !ring->sched.thread)
0875dc9e 3581 continue;
5740682e 3582
2f9d4084
ML
3583 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3584 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3585 }
d38ceaf9 3586
222b5f04
AG
3587 if(job)
3588 drm_sched_increase_karma(&job->base);
3589
1d721ed6 3590 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3591 if (!amdgpu_sriov_vf(adev)) {
3592
3593 if (!need_full_reset)
3594 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3595
3596 if (!need_full_reset) {
3597 amdgpu_device_ip_pre_soft_reset(adev);
3598 r = amdgpu_device_ip_soft_reset(adev);
3599 amdgpu_device_ip_post_soft_reset(adev);
3600 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3601 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3602 need_full_reset = true;
3603 }
3604 }
3605
3606 if (need_full_reset)
3607 r = amdgpu_device_ip_suspend(adev);
3608
3609 *need_full_reset_arg = need_full_reset;
3610 }
3611
3612 return r;
3613}
3614
3615static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3616 struct list_head *device_list_handle,
3617 bool *need_full_reset_arg)
3618{
3619 struct amdgpu_device *tmp_adev = NULL;
3620 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3621 int r = 0;
3622
3623 /*
3624 * ASIC reset has to be done on all HGMI hive nodes ASAP
3625 * to allow proper links negotiation in FW (within 1 sec)
3626 */
3627 if (need_full_reset) {
3628 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3629 /* For XGMI run all resets in parallel to speed up the process */
3630 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3631 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3632 r = -EALREADY;
3633 } else
3634 r = amdgpu_asic_reset(tmp_adev);
3635
3636 if (r) {
fed184e9 3637 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3638 r, tmp_adev->ddev->unique);
d4535e2c
AG
3639 break;
3640 }
3641 }
3642
3643 /* For XGMI wait for all PSP resets to complete before proceed */
3644 if (!r) {
3645 list_for_each_entry(tmp_adev, device_list_handle,
3646 gmc.xgmi.head) {
3647 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3648 flush_work(&tmp_adev->xgmi_reset_work);
3649 r = tmp_adev->asic_reset_res;
3650 if (r)
3651 break;
3652 }
3653 }
2be4c4a9 3654
3655 list_for_each_entry(tmp_adev, device_list_handle,
3656 gmc.xgmi.head) {
3657 amdgpu_ras_reserve_bad_pages(tmp_adev);
3658 }
26bc5340
AG
3659 }
3660 }
3661
3662
3663 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3664 if (need_full_reset) {
3665 /* post card */
3666 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3667 DRM_WARN("asic atom init failed!");
3668
3669 if (!r) {
3670 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3671 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3672 if (r)
3673 goto out;
3674
3675 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3676 if (vram_lost) {
77e7f829 3677 DRM_INFO("VRAM is lost due to GPU reset!\n");
26bc5340
AG
3678 atomic_inc(&tmp_adev->vram_lost_counter);
3679 }
3680
3681 r = amdgpu_gtt_mgr_recover(
3682 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3683 if (r)
3684 goto out;
3685
3686 r = amdgpu_device_fw_loading(tmp_adev);
3687 if (r)
3688 return r;
3689
3690 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3691 if (r)
3692 goto out;
3693
3694 if (vram_lost)
3695 amdgpu_device_fill_reset_magic(tmp_adev);
3696
fdafb359
EQ
3697 /*
3698 * Add this ASIC as tracked as reset was already
3699 * complete successfully.
3700 */
3701 amdgpu_register_gpu_instance(tmp_adev);
3702
7c04ca50 3703 r = amdgpu_device_ip_late_init(tmp_adev);
3704 if (r)
3705 goto out;
3706
e79a04d5 3707 /* must succeed. */
511fdbc3 3708 amdgpu_ras_resume(tmp_adev);
e79a04d5 3709
26bc5340
AG
3710 /* Update PSP FW topology after reset */
3711 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3712 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3713 }
3714 }
3715
3716
3717out:
3718 if (!r) {
3719 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3720 r = amdgpu_ib_ring_tests(tmp_adev);
3721 if (r) {
3722 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3723 r = amdgpu_device_ip_suspend(tmp_adev);
3724 need_full_reset = true;
3725 r = -EAGAIN;
3726 goto end;
3727 }
3728 }
3729
3730 if (!r)
3731 r = amdgpu_device_recover_vram(tmp_adev);
3732 else
3733 tmp_adev->asic_reset_res = r;
3734 }
3735
3736end:
3737 *need_full_reset_arg = need_full_reset;
3738 return r;
3739}
3740
1d721ed6 3741static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3742{
1d721ed6
AG
3743 if (trylock) {
3744 if (!mutex_trylock(&adev->lock_reset))
3745 return false;
3746 } else
3747 mutex_lock(&adev->lock_reset);
5740682e 3748
26bc5340
AG
3749 atomic_inc(&adev->gpu_reset_counter);
3750 adev->in_gpu_reset = 1;
a3a09142
AD
3751 switch (amdgpu_asic_reset_method(adev)) {
3752 case AMD_RESET_METHOD_MODE1:
3753 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3754 break;
3755 case AMD_RESET_METHOD_MODE2:
3756 adev->mp1_state = PP_MP1_STATE_RESET;
3757 break;
3758 default:
3759 adev->mp1_state = PP_MP1_STATE_NONE;
3760 break;
3761 }
7b184b00 3762 /* Block kfd: SRIOV would do it separately */
3763 if (!amdgpu_sriov_vf(adev))
3764 amdgpu_amdkfd_pre_reset(adev);
1d721ed6
AG
3765
3766 return true;
26bc5340 3767}
d38ceaf9 3768
26bc5340
AG
3769static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3770{
7b184b00 3771 /*unlock kfd: SRIOV would do it separately */
3772 if (!amdgpu_sriov_vf(adev))
3773 amdgpu_amdkfd_post_reset(adev);
89041940 3774 amdgpu_vf_error_trans_all(adev);
a3a09142 3775 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
3776 adev->in_gpu_reset = 0;
3777 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3778}
3779
3780
3781/**
3782 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3783 *
3784 * @adev: amdgpu device pointer
3785 * @job: which job trigger hang
3786 *
3787 * Attempt to reset the GPU if it has hung (all asics).
3788 * Attempt to do soft-reset or full-reset and reinitialize Asic
3789 * Returns 0 for success or an error on failure.
3790 */
3791
3792int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3793 struct amdgpu_job *job)
3794{
1d721ed6
AG
3795 struct list_head device_list, *device_list_handle = NULL;
3796 bool need_full_reset, job_signaled;
26bc5340 3797 struct amdgpu_hive_info *hive = NULL;
26bc5340 3798 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3799 int i, r = 0;
26bc5340 3800
1d721ed6 3801 need_full_reset = job_signaled = false;
26bc5340
AG
3802 INIT_LIST_HEAD(&device_list);
3803
3804 dev_info(adev->dev, "GPU reset begin!\n");
3805
beff74bc 3806 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3807
1d721ed6
AG
3808 hive = amdgpu_get_xgmi_hive(adev, false);
3809
26bc5340 3810 /*
1d721ed6
AG
3811 * Here we trylock to avoid chain of resets executing from
3812 * either trigger by jobs on different adevs in XGMI hive or jobs on
3813 * different schedulers for same device while this TO handler is running.
3814 * We always reset all schedulers for device and all devices for XGMI
3815 * hive so that should take care of them too.
26bc5340 3816 */
1d721ed6
AG
3817
3818 if (hive && !mutex_trylock(&hive->reset_lock)) {
3819 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3820 job->base.id, hive->hive_id);
26bc5340 3821 return 0;
1d721ed6 3822 }
26bc5340
AG
3823
3824 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3825 if (!amdgpu_device_lock_adev(adev, !hive)) {
3826 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3827 job->base.id);
3828 return 0;
26bc5340
AG
3829 }
3830
3831 /* Build list of devices to reset */
1d721ed6 3832 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340
AG
3833 if (!hive) {
3834 amdgpu_device_unlock_adev(adev);
3835 return -ENODEV;
3836 }
3837
3838 /*
3839 * In case we are in XGMI hive mode device reset is done for all the
3840 * nodes in the hive to retrain all XGMI links and hence the reset
3841 * sequence is executed in loop on all nodes.
3842 */
3843 device_list_handle = &hive->device_list;
3844 } else {
3845 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3846 device_list_handle = &device_list;
3847 }
3848
fdafb359
EQ
3849 /*
3850 * Mark these ASICs to be reseted as untracked first
3851 * And add them back after reset completed
3852 */
3853 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
3854 amdgpu_unregister_gpu_instance(tmp_adev);
3855
1d721ed6
AG
3856 /* block all schedulers and reset given job's ring */
3857 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
f1c1314b 3858 /* disable ras on ALL IPs */
3859 if (amdgpu_device_ip_need_full_reset(tmp_adev))
3860 amdgpu_ras_suspend(tmp_adev);
3861
1d721ed6
AG
3862 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3863 struct amdgpu_ring *ring = tmp_adev->rings[i];
3864
3865 if (!ring || !ring->sched.thread)
3866 continue;
3867
3868 drm_sched_stop(&ring->sched, &job->base);
3869 }
3870 }
3871
3872
3873 /*
3874 * Must check guilty signal here since after this point all old
3875 * HW fences are force signaled.
3876 *
3877 * job->base holds a reference to parent fence
3878 */
3879 if (job && job->base.s_fence->parent &&
3880 dma_fence_is_signaled(job->base.s_fence->parent))
3881 job_signaled = true;
3882
3883 if (!amdgpu_device_ip_need_full_reset(adev))
3884 device_list_handle = &device_list;
3885
3886 if (job_signaled) {
3887 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3888 goto skip_hw_reset;
3889 }
3890
3891
3892 /* Guilty job will be freed after this*/
3893 r = amdgpu_device_pre_asic_reset(adev,
3894 job,
3895 &need_full_reset);
3896 if (r) {
3897 /*TODO Should we stop ?*/
3898 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3899 r, adev->ddev->unique);
3900 adev->asic_reset_res = r;
3901 }
3902
26bc5340
AG
3903retry: /* Rest of adevs pre asic reset from XGMI hive. */
3904 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3905
3906 if (tmp_adev == adev)
3907 continue;
3908
1d721ed6 3909 amdgpu_device_lock_adev(tmp_adev, false);
26bc5340
AG
3910 r = amdgpu_device_pre_asic_reset(tmp_adev,
3911 NULL,
3912 &need_full_reset);
3913 /*TODO Should we stop ?*/
3914 if (r) {
3915 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3916 r, tmp_adev->ddev->unique);
3917 tmp_adev->asic_reset_res = r;
3918 }
3919 }
3920
3921 /* Actual ASIC resets if needed.*/
3922 /* TODO Implement XGMI hive reset logic for SRIOV */
3923 if (amdgpu_sriov_vf(adev)) {
3924 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3925 if (r)
3926 adev->asic_reset_res = r;
3927 } else {
3928 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3929 if (r && r == -EAGAIN)
3930 goto retry;
3931 }
3932
1d721ed6
AG
3933skip_hw_reset:
3934
26bc5340
AG
3935 /* Post ASIC reset for all devs .*/
3936 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
1d721ed6
AG
3937 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3938 struct amdgpu_ring *ring = tmp_adev->rings[i];
3939
3940 if (!ring || !ring->sched.thread)
3941 continue;
3942
3943 /* No point to resubmit jobs if we didn't HW reset*/
3944 if (!tmp_adev->asic_reset_res && !job_signaled)
3945 drm_sched_resubmit_jobs(&ring->sched);
3946
3947 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3948 }
3949
3950 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3951 drm_helper_resume_force_mode(tmp_adev->ddev);
3952 }
3953
3954 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3955
3956 if (r) {
3957 /* bad news, how to tell it to userspace ? */
3958 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3959 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3960 } else {
3961 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3962 }
3963
3964 amdgpu_device_unlock_adev(tmp_adev);
3965 }
3966
1d721ed6 3967 if (hive)
22d6575b 3968 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3969
3970 if (r)
3971 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3972 return r;
3973}
3974
e3ecdffa
AD
3975/**
3976 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3977 *
3978 * @adev: amdgpu_device pointer
3979 *
3980 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3981 * and lanes) of the slot the device is in. Handles APUs and
3982 * virtualized environments where PCIE config space may not be available.
3983 */
5494d864 3984static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3985{
5d9a6330 3986 struct pci_dev *pdev;
c5313457
HK
3987 enum pci_bus_speed speed_cap, platform_speed_cap;
3988 enum pcie_link_width platform_link_width;
d0dd7f0c 3989
cd474ba0
AD
3990 if (amdgpu_pcie_gen_cap)
3991 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3992
cd474ba0
AD
3993 if (amdgpu_pcie_lane_cap)
3994 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3995
cd474ba0
AD
3996 /* covers APUs as well */
3997 if (pci_is_root_bus(adev->pdev->bus)) {
3998 if (adev->pm.pcie_gen_mask == 0)
3999 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4000 if (adev->pm.pcie_mlw_mask == 0)
4001 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4002 return;
cd474ba0 4003 }
d0dd7f0c 4004
c5313457
HK
4005 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4006 return;
4007
dbaa922b
AD
4008 pcie_bandwidth_available(adev->pdev, NULL,
4009 &platform_speed_cap, &platform_link_width);
c5313457 4010
cd474ba0 4011 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4012 /* asic caps */
4013 pdev = adev->pdev;
4014 speed_cap = pcie_get_speed_cap(pdev);
4015 if (speed_cap == PCI_SPEED_UNKNOWN) {
4016 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4017 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4018 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4019 } else {
5d9a6330
AD
4020 if (speed_cap == PCIE_SPEED_16_0GT)
4021 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4022 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4023 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4024 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4025 else if (speed_cap == PCIE_SPEED_8_0GT)
4026 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4027 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4028 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4029 else if (speed_cap == PCIE_SPEED_5_0GT)
4030 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4031 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4032 else
4033 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4034 }
4035 /* platform caps */
c5313457 4036 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4037 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4038 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4039 } else {
c5313457 4040 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4041 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4042 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4043 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4044 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4045 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4046 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4047 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4048 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4049 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4050 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4051 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4052 else
4053 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4054
cd474ba0
AD
4055 }
4056 }
4057 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4058 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4059 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4060 } else {
c5313457 4061 switch (platform_link_width) {
5d9a6330 4062 case PCIE_LNK_X32:
cd474ba0
AD
4063 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4064 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4065 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4066 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4067 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4068 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4069 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4070 break;
5d9a6330 4071 case PCIE_LNK_X16:
cd474ba0
AD
4072 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4073 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4074 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4075 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4076 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4077 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4078 break;
5d9a6330 4079 case PCIE_LNK_X12:
cd474ba0
AD
4080 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4081 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4082 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4083 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4084 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4085 break;
5d9a6330 4086 case PCIE_LNK_X8:
cd474ba0
AD
4087 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4088 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4089 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4090 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4091 break;
5d9a6330 4092 case PCIE_LNK_X4:
cd474ba0
AD
4093 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4094 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4095 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4096 break;
5d9a6330 4097 case PCIE_LNK_X2:
cd474ba0
AD
4098 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4099 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4100 break;
5d9a6330 4101 case PCIE_LNK_X1:
cd474ba0
AD
4102 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4103 break;
4104 default:
4105 break;
4106 }
d0dd7f0c
AD
4107 }
4108 }
4109}
d38ceaf9 4110