drm/amdgpu/powerplay: add set_mp1_state for vega12
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
e2a75f88 68MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 69MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 70MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 71MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 72MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 73MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
23c6268e 74MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 75MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
e2a75f88 76
2dc80b00
S
77#define AMDGPU_RESUME_MS 2000
78
d38ceaf9 79static const char *amdgpu_asic_name[] = {
da69c161
KW
80 "TAHITI",
81 "PITCAIRN",
82 "VERDE",
83 "OLAND",
84 "HAINAN",
d38ceaf9
AD
85 "BONAIRE",
86 "KAVERI",
87 "KABINI",
88 "HAWAII",
89 "MULLINS",
90 "TOPAZ",
91 "TONGA",
48299f95 92 "FIJI",
d38ceaf9 93 "CARRIZO",
139f4917 94 "STONEY",
2cc0c0b5
FC
95 "POLARIS10",
96 "POLARIS11",
c4642a47 97 "POLARIS12",
48ff108d 98 "VEGAM",
d4196f01 99 "VEGA10",
8fab806a 100 "VEGA12",
956fcddc 101 "VEGA20",
2ca8a5d2 102 "RAVEN",
d6c3b24e 103 "ARCTURUS",
852a6626 104 "NAVI10",
87dbad02 105 "NAVI14",
d38ceaf9
AD
106 "LAST",
107};
108
dcea6e65
KR
109/**
110 * DOC: pcie_replay_count
111 *
112 * The amdgpu driver provides a sysfs API for reporting the total number
113 * of PCIe replays (NAKs)
114 * The file pcie_replay_count is used for this and returns the total
115 * number of replays as a sum of the NAKs generated and NAKs received
116 */
117
118static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
119 struct device_attribute *attr, char *buf)
120{
121 struct drm_device *ddev = dev_get_drvdata(dev);
122 struct amdgpu_device *adev = ddev->dev_private;
123 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
124
125 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
126}
127
128static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
129 amdgpu_device_get_pcie_replay_count, NULL);
130
5494d864
AD
131static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
132
e3ecdffa
AD
133/**
134 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
135 *
136 * @dev: drm_device pointer
137 *
138 * Returns true if the device is a dGPU with HG/PX power control,
139 * otherwise return false.
140 */
d38ceaf9
AD
141bool amdgpu_device_is_px(struct drm_device *dev)
142{
143 struct amdgpu_device *adev = dev->dev_private;
144
2f7d10b3 145 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
146 return true;
147 return false;
148}
149
150/*
151 * MMIO register access helper functions.
152 */
e3ecdffa
AD
153/**
154 * amdgpu_mm_rreg - read a memory mapped IO register
155 *
156 * @adev: amdgpu_device pointer
157 * @reg: dword aligned register offset
158 * @acc_flags: access flags which require special behavior
159 *
160 * Returns the 32 bit value from the offset specified.
161 */
d38ceaf9 162uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 163 uint32_t acc_flags)
d38ceaf9 164{
f4b373f4
TSD
165 uint32_t ret;
166
43ca8efa 167 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 168 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 169
15d72fd7 170 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 171 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
172 else {
173 unsigned long flags;
d38ceaf9
AD
174
175 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
176 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
177 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
178 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 179 }
f4b373f4
TSD
180 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
181 return ret;
d38ceaf9
AD
182}
183
421a2a30
ML
184/*
185 * MMIO register read with bytes helper functions
186 * @offset:bytes offset from MMIO start
187 *
188*/
189
e3ecdffa
AD
190/**
191 * amdgpu_mm_rreg8 - read a memory mapped IO register
192 *
193 * @adev: amdgpu_device pointer
194 * @offset: byte aligned register offset
195 *
196 * Returns the 8 bit value from the offset specified.
197 */
421a2a30
ML
198uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
199 if (offset < adev->rmmio_size)
200 return (readb(adev->rmmio + offset));
201 BUG();
202}
203
204/*
205 * MMIO register write with bytes helper functions
206 * @offset:bytes offset from MMIO start
207 * @value: the value want to be written to the register
208 *
209*/
e3ecdffa
AD
210/**
211 * amdgpu_mm_wreg8 - read a memory mapped IO register
212 *
213 * @adev: amdgpu_device pointer
214 * @offset: byte aligned register offset
215 * @value: 8 bit value to write
216 *
217 * Writes the value specified to the offset specified.
218 */
421a2a30
ML
219void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
220 if (offset < adev->rmmio_size)
221 writeb(value, adev->rmmio + offset);
222 else
223 BUG();
224}
225
e3ecdffa
AD
226/**
227 * amdgpu_mm_wreg - write to a memory mapped IO register
228 *
229 * @adev: amdgpu_device pointer
230 * @reg: dword aligned register offset
231 * @v: 32 bit value to write to the register
232 * @acc_flags: access flags which require special behavior
233 *
234 * Writes the value specified to the offset specified.
235 */
d38ceaf9 236void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 237 uint32_t acc_flags)
d38ceaf9 238{
f4b373f4 239 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 240
47ed4e1c
KW
241 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
242 adev->last_mm_index = v;
243 }
244
43ca8efa 245 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 246 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 247
15d72fd7 248 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
249 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
250 else {
251 unsigned long flags;
252
253 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
254 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
255 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
256 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
257 }
47ed4e1c
KW
258
259 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
260 udelay(500);
261 }
d38ceaf9
AD
262}
263
e3ecdffa
AD
264/**
265 * amdgpu_io_rreg - read an IO register
266 *
267 * @adev: amdgpu_device pointer
268 * @reg: dword aligned register offset
269 *
270 * Returns the 32 bit value from the offset specified.
271 */
d38ceaf9
AD
272u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
273{
274 if ((reg * 4) < adev->rio_mem_size)
275 return ioread32(adev->rio_mem + (reg * 4));
276 else {
277 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
278 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
279 }
280}
281
e3ecdffa
AD
282/**
283 * amdgpu_io_wreg - write to an IO register
284 *
285 * @adev: amdgpu_device pointer
286 * @reg: dword aligned register offset
287 * @v: 32 bit value to write to the register
288 *
289 * Writes the value specified to the offset specified.
290 */
d38ceaf9
AD
291void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
292{
47ed4e1c
KW
293 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
294 adev->last_mm_index = v;
295 }
d38ceaf9
AD
296
297 if ((reg * 4) < adev->rio_mem_size)
298 iowrite32(v, adev->rio_mem + (reg * 4));
299 else {
300 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
301 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
302 }
47ed4e1c
KW
303
304 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
305 udelay(500);
306 }
d38ceaf9
AD
307}
308
309/**
310 * amdgpu_mm_rdoorbell - read a doorbell dword
311 *
312 * @adev: amdgpu_device pointer
313 * @index: doorbell index
314 *
315 * Returns the value in the doorbell aperture at the
316 * requested doorbell index (CIK).
317 */
318u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
319{
320 if (index < adev->doorbell.num_doorbells) {
321 return readl(adev->doorbell.ptr + index);
322 } else {
323 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
324 return 0;
325 }
326}
327
328/**
329 * amdgpu_mm_wdoorbell - write a doorbell dword
330 *
331 * @adev: amdgpu_device pointer
332 * @index: doorbell index
333 * @v: value to write
334 *
335 * Writes @v to the doorbell aperture at the
336 * requested doorbell index (CIK).
337 */
338void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
339{
340 if (index < adev->doorbell.num_doorbells) {
341 writel(v, adev->doorbell.ptr + index);
342 } else {
343 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
344 }
345}
346
832be404
KW
347/**
348 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
349 *
350 * @adev: amdgpu_device pointer
351 * @index: doorbell index
352 *
353 * Returns the value in the doorbell aperture at the
354 * requested doorbell index (VEGA10+).
355 */
356u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
357{
358 if (index < adev->doorbell.num_doorbells) {
359 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
360 } else {
361 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
362 return 0;
363 }
364}
365
366/**
367 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
368 *
369 * @adev: amdgpu_device pointer
370 * @index: doorbell index
371 * @v: value to write
372 *
373 * Writes @v to the doorbell aperture at the
374 * requested doorbell index (VEGA10+).
375 */
376void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
377{
378 if (index < adev->doorbell.num_doorbells) {
379 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
380 } else {
381 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
382 }
383}
384
d38ceaf9
AD
385/**
386 * amdgpu_invalid_rreg - dummy reg read function
387 *
388 * @adev: amdgpu device pointer
389 * @reg: offset of register
390 *
391 * Dummy register read function. Used for register blocks
392 * that certain asics don't have (all asics).
393 * Returns the value in the register.
394 */
395static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
396{
397 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
398 BUG();
399 return 0;
400}
401
402/**
403 * amdgpu_invalid_wreg - dummy reg write function
404 *
405 * @adev: amdgpu device pointer
406 * @reg: offset of register
407 * @v: value to write to the register
408 *
409 * Dummy register read function. Used for register blocks
410 * that certain asics don't have (all asics).
411 */
412static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
413{
414 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
415 reg, v);
416 BUG();
417}
418
419/**
420 * amdgpu_block_invalid_rreg - dummy reg read function
421 *
422 * @adev: amdgpu device pointer
423 * @block: offset of instance
424 * @reg: offset of register
425 *
426 * Dummy register read function. Used for register blocks
427 * that certain asics don't have (all asics).
428 * Returns the value in the register.
429 */
430static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
431 uint32_t block, uint32_t reg)
432{
433 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
434 reg, block);
435 BUG();
436 return 0;
437}
438
439/**
440 * amdgpu_block_invalid_wreg - dummy reg write function
441 *
442 * @adev: amdgpu device pointer
443 * @block: offset of instance
444 * @reg: offset of register
445 * @v: value to write to the register
446 *
447 * Dummy register read function. Used for register blocks
448 * that certain asics don't have (all asics).
449 */
450static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
451 uint32_t block,
452 uint32_t reg, uint32_t v)
453{
454 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
455 reg, block, v);
456 BUG();
457}
458
e3ecdffa
AD
459/**
460 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
461 *
462 * @adev: amdgpu device pointer
463 *
464 * Allocates a scratch page of VRAM for use by various things in the
465 * driver.
466 */
06ec9070 467static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 468{
a4a02777
CK
469 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
470 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
471 &adev->vram_scratch.robj,
472 &adev->vram_scratch.gpu_addr,
473 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
474}
475
e3ecdffa
AD
476/**
477 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
478 *
479 * @adev: amdgpu device pointer
480 *
481 * Frees the VRAM scratch page.
482 */
06ec9070 483static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 484{
078af1a3 485 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
486}
487
488/**
9c3f2b54 489 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
490 *
491 * @adev: amdgpu_device pointer
492 * @registers: pointer to the register array
493 * @array_size: size of the register array
494 *
495 * Programs an array or registers with and and or masks.
496 * This is a helper for setting golden registers.
497 */
9c3f2b54
AD
498void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
499 const u32 *registers,
500 const u32 array_size)
d38ceaf9
AD
501{
502 u32 tmp, reg, and_mask, or_mask;
503 int i;
504
505 if (array_size % 3)
506 return;
507
508 for (i = 0; i < array_size; i +=3) {
509 reg = registers[i + 0];
510 and_mask = registers[i + 1];
511 or_mask = registers[i + 2];
512
513 if (and_mask == 0xffffffff) {
514 tmp = or_mask;
515 } else {
516 tmp = RREG32(reg);
517 tmp &= ~and_mask;
e0d07657
HZ
518 if (adev->family >= AMDGPU_FAMILY_AI)
519 tmp |= (or_mask & and_mask);
520 else
521 tmp |= or_mask;
d38ceaf9
AD
522 }
523 WREG32(reg, tmp);
524 }
525}
526
e3ecdffa
AD
527/**
528 * amdgpu_device_pci_config_reset - reset the GPU
529 *
530 * @adev: amdgpu_device pointer
531 *
532 * Resets the GPU using the pci config reset sequence.
533 * Only applicable to asics prior to vega10.
534 */
8111c387 535void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
536{
537 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
538}
539
540/*
541 * GPU doorbell aperture helpers function.
542 */
543/**
06ec9070 544 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
545 *
546 * @adev: amdgpu_device pointer
547 *
548 * Init doorbell driver information (CIK)
549 * Returns 0 on success, error on failure.
550 */
06ec9070 551static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 552{
6585661d 553
705e519e
CK
554 /* No doorbell on SI hardware generation */
555 if (adev->asic_type < CHIP_BONAIRE) {
556 adev->doorbell.base = 0;
557 adev->doorbell.size = 0;
558 adev->doorbell.num_doorbells = 0;
559 adev->doorbell.ptr = NULL;
560 return 0;
561 }
562
d6895ad3
CK
563 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
564 return -EINVAL;
565
22357775
AD
566 amdgpu_asic_init_doorbell_index(adev);
567
d38ceaf9
AD
568 /* doorbell bar mapping */
569 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
570 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
571
edf600da 572 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 573 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
574 if (adev->doorbell.num_doorbells == 0)
575 return -EINVAL;
576
ec3db8a6 577 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
578 * paging queue doorbell use the second page. The
579 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
580 * doorbells are in the first page. So with paging queue enabled,
581 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
582 */
583 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 584 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 585
8972e5d2
CK
586 adev->doorbell.ptr = ioremap(adev->doorbell.base,
587 adev->doorbell.num_doorbells *
588 sizeof(u32));
589 if (adev->doorbell.ptr == NULL)
d38ceaf9 590 return -ENOMEM;
d38ceaf9
AD
591
592 return 0;
593}
594
595/**
06ec9070 596 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
597 *
598 * @adev: amdgpu_device pointer
599 *
600 * Tear down doorbell driver information (CIK)
601 */
06ec9070 602static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
603{
604 iounmap(adev->doorbell.ptr);
605 adev->doorbell.ptr = NULL;
606}
607
22cb0164 608
d38ceaf9
AD
609
610/*
06ec9070 611 * amdgpu_device_wb_*()
455a7bc2 612 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 613 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
614 */
615
616/**
06ec9070 617 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
618 *
619 * @adev: amdgpu_device pointer
620 *
621 * Disables Writeback and frees the Writeback memory (all asics).
622 * Used at driver shutdown.
623 */
06ec9070 624static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
625{
626 if (adev->wb.wb_obj) {
a76ed485
AD
627 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
628 &adev->wb.gpu_addr,
629 (void **)&adev->wb.wb);
d38ceaf9
AD
630 adev->wb.wb_obj = NULL;
631 }
632}
633
634/**
06ec9070 635 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
636 *
637 * @adev: amdgpu_device pointer
638 *
455a7bc2 639 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
640 * Used at driver startup.
641 * Returns 0 on success or an -error on failure.
642 */
06ec9070 643static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
644{
645 int r;
646
647 if (adev->wb.wb_obj == NULL) {
97407b63
AD
648 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
649 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
650 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
651 &adev->wb.wb_obj, &adev->wb.gpu_addr,
652 (void **)&adev->wb.wb);
d38ceaf9
AD
653 if (r) {
654 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
655 return r;
656 }
d38ceaf9
AD
657
658 adev->wb.num_wb = AMDGPU_MAX_WB;
659 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
660
661 /* clear wb memory */
73469585 662 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
663 }
664
665 return 0;
666}
667
668/**
131b4b36 669 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
670 *
671 * @adev: amdgpu_device pointer
672 * @wb: wb index
673 *
674 * Allocate a wb slot for use by the driver (all asics).
675 * Returns 0 on success or -EINVAL on failure.
676 */
131b4b36 677int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
678{
679 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 680
97407b63 681 if (offset < adev->wb.num_wb) {
7014285a 682 __set_bit(offset, adev->wb.used);
63ae07ca 683 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
684 return 0;
685 } else {
686 return -EINVAL;
687 }
688}
689
d38ceaf9 690/**
131b4b36 691 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
692 *
693 * @adev: amdgpu_device pointer
694 * @wb: wb index
695 *
696 * Free a wb slot allocated for use by the driver (all asics)
697 */
131b4b36 698void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 699{
73469585 700 wb >>= 3;
d38ceaf9 701 if (wb < adev->wb.num_wb)
73469585 702 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
703}
704
d6895ad3
CK
705/**
706 * amdgpu_device_resize_fb_bar - try to resize FB BAR
707 *
708 * @adev: amdgpu_device pointer
709 *
710 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
711 * to fail, but if any of the BARs is not accessible after the size we abort
712 * driver loading by returning -ENODEV.
713 */
714int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
715{
770d13b1 716 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 717 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
718 struct pci_bus *root;
719 struct resource *res;
720 unsigned i;
d6895ad3
CK
721 u16 cmd;
722 int r;
723
0c03b912 724 /* Bypass for VF */
725 if (amdgpu_sriov_vf(adev))
726 return 0;
727
31b8adab
CK
728 /* Check if the root BUS has 64bit memory resources */
729 root = adev->pdev->bus;
730 while (root->parent)
731 root = root->parent;
732
733 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 734 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
735 res->start > 0x100000000ull)
736 break;
737 }
738
739 /* Trying to resize is pointless without a root hub window above 4GB */
740 if (!res)
741 return 0;
742
d6895ad3
CK
743 /* Disable memory decoding while we change the BAR addresses and size */
744 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
745 pci_write_config_word(adev->pdev, PCI_COMMAND,
746 cmd & ~PCI_COMMAND_MEMORY);
747
748 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 749 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
750 if (adev->asic_type >= CHIP_BONAIRE)
751 pci_release_resource(adev->pdev, 2);
752
753 pci_release_resource(adev->pdev, 0);
754
755 r = pci_resize_resource(adev->pdev, 0, rbar_size);
756 if (r == -ENOSPC)
757 DRM_INFO("Not enough PCI address space for a large BAR.");
758 else if (r && r != -ENOTSUPP)
759 DRM_ERROR("Problem resizing BAR0 (%d).", r);
760
761 pci_assign_unassigned_bus_resources(adev->pdev->bus);
762
763 /* When the doorbell or fb BAR isn't available we have no chance of
764 * using the device.
765 */
06ec9070 766 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
767 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
768 return -ENODEV;
769
770 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
771
772 return 0;
773}
a05502e5 774
d38ceaf9
AD
775/*
776 * GPU helpers function.
777 */
778/**
39c640c0 779 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
780 *
781 * @adev: amdgpu_device pointer
782 *
c836fec5
JQ
783 * Check if the asic has been initialized (all asics) at driver startup
784 * or post is needed if hw reset is performed.
785 * Returns true if need or false if not.
d38ceaf9 786 */
39c640c0 787bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
788{
789 uint32_t reg;
790
bec86378
ML
791 if (amdgpu_sriov_vf(adev))
792 return false;
793
794 if (amdgpu_passthrough(adev)) {
1da2c326
ML
795 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
796 * some old smc fw still need driver do vPost otherwise gpu hang, while
797 * those smc fw version above 22.15 doesn't have this flaw, so we force
798 * vpost executed for smc version below 22.15
bec86378
ML
799 */
800 if (adev->asic_type == CHIP_FIJI) {
801 int err;
802 uint32_t fw_ver;
803 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
804 /* force vPost if error occured */
805 if (err)
806 return true;
807
808 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
809 if (fw_ver < 0x00160e00)
810 return true;
bec86378 811 }
bec86378 812 }
91fe77eb 813
814 if (adev->has_hw_reset) {
815 adev->has_hw_reset = false;
816 return true;
817 }
818
819 /* bios scratch used on CIK+ */
820 if (adev->asic_type >= CHIP_BONAIRE)
821 return amdgpu_atombios_scratch_need_asic_init(adev);
822
823 /* check MEM_SIZE for older asics */
824 reg = amdgpu_asic_get_config_memsize(adev);
825
826 if ((reg != 0) && (reg != 0xffffffff))
827 return false;
828
829 return true;
bec86378
ML
830}
831
d38ceaf9
AD
832/* if we get transitioned to only one device, take VGA back */
833/**
06ec9070 834 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
835 *
836 * @cookie: amdgpu_device pointer
837 * @state: enable/disable vga decode
838 *
839 * Enable/disable vga decode (all asics).
840 * Returns VGA resource flags.
841 */
06ec9070 842static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
843{
844 struct amdgpu_device *adev = cookie;
845 amdgpu_asic_set_vga_state(adev, state);
846 if (state)
847 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
848 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
849 else
850 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
851}
852
e3ecdffa
AD
853/**
854 * amdgpu_device_check_block_size - validate the vm block size
855 *
856 * @adev: amdgpu_device pointer
857 *
858 * Validates the vm block size specified via module parameter.
859 * The vm block size defines number of bits in page table versus page directory,
860 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
861 * page table and the remaining bits are in the page directory.
862 */
06ec9070 863static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
864{
865 /* defines number of bits in page table versus page directory,
866 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
867 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
868 if (amdgpu_vm_block_size == -1)
869 return;
a1adf8be 870
bab4fee7 871 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
872 dev_warn(adev->dev, "VM page table size (%d) too small\n",
873 amdgpu_vm_block_size);
97489129 874 amdgpu_vm_block_size = -1;
a1adf8be 875 }
a1adf8be
CZ
876}
877
e3ecdffa
AD
878/**
879 * amdgpu_device_check_vm_size - validate the vm size
880 *
881 * @adev: amdgpu_device pointer
882 *
883 * Validates the vm size in GB specified via module parameter.
884 * The VM size is the size of the GPU virtual memory space in GB.
885 */
06ec9070 886static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 887{
64dab074
AD
888 /* no need to check the default value */
889 if (amdgpu_vm_size == -1)
890 return;
891
83ca145d
ZJ
892 if (amdgpu_vm_size < 1) {
893 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
894 amdgpu_vm_size);
f3368128 895 amdgpu_vm_size = -1;
83ca145d 896 }
83ca145d
ZJ
897}
898
7951e376
RZ
899static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
900{
901 struct sysinfo si;
902 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
903 uint64_t total_memory;
904 uint64_t dram_size_seven_GB = 0x1B8000000;
905 uint64_t dram_size_three_GB = 0xB8000000;
906
907 if (amdgpu_smu_memory_pool_size == 0)
908 return;
909
910 if (!is_os_64) {
911 DRM_WARN("Not 64-bit OS, feature not supported\n");
912 goto def_value;
913 }
914 si_meminfo(&si);
915 total_memory = (uint64_t)si.totalram * si.mem_unit;
916
917 if ((amdgpu_smu_memory_pool_size == 1) ||
918 (amdgpu_smu_memory_pool_size == 2)) {
919 if (total_memory < dram_size_three_GB)
920 goto def_value1;
921 } else if ((amdgpu_smu_memory_pool_size == 4) ||
922 (amdgpu_smu_memory_pool_size == 8)) {
923 if (total_memory < dram_size_seven_GB)
924 goto def_value1;
925 } else {
926 DRM_WARN("Smu memory pool size not supported\n");
927 goto def_value;
928 }
929 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
930
931 return;
932
933def_value1:
934 DRM_WARN("No enough system memory\n");
935def_value:
936 adev->pm.smu_prv_buffer_size = 0;
937}
938
d38ceaf9 939/**
06ec9070 940 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
941 *
942 * @adev: amdgpu_device pointer
943 *
944 * Validates certain module parameters and updates
945 * the associated values used by the driver (all asics).
946 */
912dfc84 947static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 948{
912dfc84
EQ
949 int ret = 0;
950
5b011235
CZ
951 if (amdgpu_sched_jobs < 4) {
952 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
953 amdgpu_sched_jobs);
954 amdgpu_sched_jobs = 4;
76117507 955 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
956 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
957 amdgpu_sched_jobs);
958 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
959 }
d38ceaf9 960
83e74db6 961 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
962 /* gart size must be greater or equal to 32M */
963 dev_warn(adev->dev, "gart size (%d) too small\n",
964 amdgpu_gart_size);
83e74db6 965 amdgpu_gart_size = -1;
d38ceaf9
AD
966 }
967
36d38372 968 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 969 /* gtt size must be greater or equal to 32M */
36d38372
CK
970 dev_warn(adev->dev, "gtt size (%d) too small\n",
971 amdgpu_gtt_size);
972 amdgpu_gtt_size = -1;
d38ceaf9
AD
973 }
974
d07f14be
RH
975 /* valid range is between 4 and 9 inclusive */
976 if (amdgpu_vm_fragment_size != -1 &&
977 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
978 dev_warn(adev->dev, "valid range is between 4 and 9\n");
979 amdgpu_vm_fragment_size = -1;
980 }
981
7951e376
RZ
982 amdgpu_device_check_smu_prv_buffer_size(adev);
983
06ec9070 984 amdgpu_device_check_vm_size(adev);
d38ceaf9 985
06ec9070 986 amdgpu_device_check_block_size(adev);
6a7f76e7 987
912dfc84
EQ
988 ret = amdgpu_device_get_job_timeout_settings(adev);
989 if (ret) {
990 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
991 return ret;
8854695a 992 }
19aede77
AD
993
994 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
995
996 return ret;
d38ceaf9
AD
997}
998
999/**
1000 * amdgpu_switcheroo_set_state - set switcheroo state
1001 *
1002 * @pdev: pci dev pointer
1694467b 1003 * @state: vga_switcheroo state
d38ceaf9
AD
1004 *
1005 * Callback for the switcheroo driver. Suspends or resumes the
1006 * the asics before or after it is powered up using ACPI methods.
1007 */
1008static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1009{
1010 struct drm_device *dev = pci_get_drvdata(pdev);
1011
1012 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1013 return;
1014
1015 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1016 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1017 /* don't suspend or resume card normally */
1018 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1019
810ddc3a 1020 amdgpu_device_resume(dev, true, true);
d38ceaf9 1021
d38ceaf9
AD
1022 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1023 drm_kms_helper_poll_enable(dev);
1024 } else {
7ca85295 1025 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1026 drm_kms_helper_poll_disable(dev);
1027 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1028 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1029 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1030 }
1031}
1032
1033/**
1034 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1035 *
1036 * @pdev: pci dev pointer
1037 *
1038 * Callback for the switcheroo driver. Check of the switcheroo
1039 * state can be changed.
1040 * Returns true if the state can be changed, false if not.
1041 */
1042static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1043{
1044 struct drm_device *dev = pci_get_drvdata(pdev);
1045
1046 /*
1047 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1048 * locking inversion with the driver load path. And the access here is
1049 * completely racy anyway. So don't bother with locking for now.
1050 */
1051 return dev->open_count == 0;
1052}
1053
1054static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1055 .set_gpu_state = amdgpu_switcheroo_set_state,
1056 .reprobe = NULL,
1057 .can_switch = amdgpu_switcheroo_can_switch,
1058};
1059
e3ecdffa
AD
1060/**
1061 * amdgpu_device_ip_set_clockgating_state - set the CG state
1062 *
87e3f136 1063 * @dev: amdgpu_device pointer
e3ecdffa
AD
1064 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1065 * @state: clockgating state (gate or ungate)
1066 *
1067 * Sets the requested clockgating state for all instances of
1068 * the hardware IP specified.
1069 * Returns the error code from the last instance.
1070 */
43fa561f 1071int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1072 enum amd_ip_block_type block_type,
1073 enum amd_clockgating_state state)
d38ceaf9 1074{
43fa561f 1075 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1076 int i, r = 0;
1077
1078 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1079 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1080 continue;
c722865a
RZ
1081 if (adev->ip_blocks[i].version->type != block_type)
1082 continue;
1083 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1084 continue;
1085 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1086 (void *)adev, state);
1087 if (r)
1088 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1089 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1090 }
1091 return r;
1092}
1093
e3ecdffa
AD
1094/**
1095 * amdgpu_device_ip_set_powergating_state - set the PG state
1096 *
87e3f136 1097 * @dev: amdgpu_device pointer
e3ecdffa
AD
1098 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1099 * @state: powergating state (gate or ungate)
1100 *
1101 * Sets the requested powergating state for all instances of
1102 * the hardware IP specified.
1103 * Returns the error code from the last instance.
1104 */
43fa561f 1105int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1106 enum amd_ip_block_type block_type,
1107 enum amd_powergating_state state)
d38ceaf9 1108{
43fa561f 1109 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1110 int i, r = 0;
1111
1112 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1113 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1114 continue;
c722865a
RZ
1115 if (adev->ip_blocks[i].version->type != block_type)
1116 continue;
1117 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1118 continue;
1119 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1120 (void *)adev, state);
1121 if (r)
1122 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1123 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1124 }
1125 return r;
1126}
1127
e3ecdffa
AD
1128/**
1129 * amdgpu_device_ip_get_clockgating_state - get the CG state
1130 *
1131 * @adev: amdgpu_device pointer
1132 * @flags: clockgating feature flags
1133 *
1134 * Walks the list of IPs on the device and updates the clockgating
1135 * flags for each IP.
1136 * Updates @flags with the feature flags for each hardware IP where
1137 * clockgating is enabled.
1138 */
2990a1fc
AD
1139void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1140 u32 *flags)
6cb2d4e4
HR
1141{
1142 int i;
1143
1144 for (i = 0; i < adev->num_ip_blocks; i++) {
1145 if (!adev->ip_blocks[i].status.valid)
1146 continue;
1147 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1148 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1149 }
1150}
1151
e3ecdffa
AD
1152/**
1153 * amdgpu_device_ip_wait_for_idle - wait for idle
1154 *
1155 * @adev: amdgpu_device pointer
1156 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1157 *
1158 * Waits for the request hardware IP to be idle.
1159 * Returns 0 for success or a negative error code on failure.
1160 */
2990a1fc
AD
1161int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1162 enum amd_ip_block_type block_type)
5dbbb60b
AD
1163{
1164 int i, r;
1165
1166 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1167 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1168 continue;
a1255107
AD
1169 if (adev->ip_blocks[i].version->type == block_type) {
1170 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1171 if (r)
1172 return r;
1173 break;
1174 }
1175 }
1176 return 0;
1177
1178}
1179
e3ecdffa
AD
1180/**
1181 * amdgpu_device_ip_is_idle - is the hardware IP idle
1182 *
1183 * @adev: amdgpu_device pointer
1184 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1185 *
1186 * Check if the hardware IP is idle or not.
1187 * Returns true if it the IP is idle, false if not.
1188 */
2990a1fc
AD
1189bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1190 enum amd_ip_block_type block_type)
5dbbb60b
AD
1191{
1192 int i;
1193
1194 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1195 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1196 continue;
a1255107
AD
1197 if (adev->ip_blocks[i].version->type == block_type)
1198 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1199 }
1200 return true;
1201
1202}
1203
e3ecdffa
AD
1204/**
1205 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1206 *
1207 * @adev: amdgpu_device pointer
87e3f136 1208 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1209 *
1210 * Returns a pointer to the hardware IP block structure
1211 * if it exists for the asic, otherwise NULL.
1212 */
2990a1fc
AD
1213struct amdgpu_ip_block *
1214amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1215 enum amd_ip_block_type type)
d38ceaf9
AD
1216{
1217 int i;
1218
1219 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1220 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1221 return &adev->ip_blocks[i];
1222
1223 return NULL;
1224}
1225
1226/**
2990a1fc 1227 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1228 *
1229 * @adev: amdgpu_device pointer
5fc3aeeb 1230 * @type: enum amd_ip_block_type
d38ceaf9
AD
1231 * @major: major version
1232 * @minor: minor version
1233 *
1234 * return 0 if equal or greater
1235 * return 1 if smaller or the ip_block doesn't exist
1236 */
2990a1fc
AD
1237int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1238 enum amd_ip_block_type type,
1239 u32 major, u32 minor)
d38ceaf9 1240{
2990a1fc 1241 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1242
a1255107
AD
1243 if (ip_block && ((ip_block->version->major > major) ||
1244 ((ip_block->version->major == major) &&
1245 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1246 return 0;
1247
1248 return 1;
1249}
1250
a1255107 1251/**
2990a1fc 1252 * amdgpu_device_ip_block_add
a1255107
AD
1253 *
1254 * @adev: amdgpu_device pointer
1255 * @ip_block_version: pointer to the IP to add
1256 *
1257 * Adds the IP block driver information to the collection of IPs
1258 * on the asic.
1259 */
2990a1fc
AD
1260int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1261 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1262{
1263 if (!ip_block_version)
1264 return -EINVAL;
1265
e966a725 1266 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1267 ip_block_version->funcs->name);
1268
a1255107
AD
1269 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1270
1271 return 0;
1272}
1273
e3ecdffa
AD
1274/**
1275 * amdgpu_device_enable_virtual_display - enable virtual display feature
1276 *
1277 * @adev: amdgpu_device pointer
1278 *
1279 * Enabled the virtual display feature if the user has enabled it via
1280 * the module parameter virtual_display. This feature provides a virtual
1281 * display hardware on headless boards or in virtualized environments.
1282 * This function parses and validates the configuration string specified by
1283 * the user and configues the virtual display configuration (number of
1284 * virtual connectors, crtcs, etc.) specified.
1285 */
483ef985 1286static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1287{
1288 adev->enable_virtual_display = false;
1289
1290 if (amdgpu_virtual_display) {
1291 struct drm_device *ddev = adev->ddev;
1292 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1293 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1294
1295 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1296 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1297 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1298 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1299 if (!strcmp("all", pciaddname)
1300 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1301 long num_crtc;
1302 int res = -1;
1303
9accf2fd 1304 adev->enable_virtual_display = true;
0f66356d
ED
1305
1306 if (pciaddname_tmp)
1307 res = kstrtol(pciaddname_tmp, 10,
1308 &num_crtc);
1309
1310 if (!res) {
1311 if (num_crtc < 1)
1312 num_crtc = 1;
1313 if (num_crtc > 6)
1314 num_crtc = 6;
1315 adev->mode_info.num_crtc = num_crtc;
1316 } else {
1317 adev->mode_info.num_crtc = 1;
1318 }
9accf2fd
ED
1319 break;
1320 }
1321 }
1322
0f66356d
ED
1323 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1324 amdgpu_virtual_display, pci_address_name,
1325 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1326
1327 kfree(pciaddstr);
1328 }
1329}
1330
e3ecdffa
AD
1331/**
1332 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1333 *
1334 * @adev: amdgpu_device pointer
1335 *
1336 * Parses the asic configuration parameters specified in the gpu info
1337 * firmware and makes them availale to the driver for use in configuring
1338 * the asic.
1339 * Returns 0 on success, -EINVAL on failure.
1340 */
e2a75f88
AD
1341static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1342{
e2a75f88
AD
1343 const char *chip_name;
1344 char fw_name[30];
1345 int err;
1346 const struct gpu_info_firmware_header_v1_0 *hdr;
1347
ab4fe3e1
HR
1348 adev->firmware.gpu_info_fw = NULL;
1349
e2a75f88
AD
1350 switch (adev->asic_type) {
1351 case CHIP_TOPAZ:
1352 case CHIP_TONGA:
1353 case CHIP_FIJI:
e2a75f88 1354 case CHIP_POLARIS10:
cc07f18d 1355 case CHIP_POLARIS11:
e2a75f88 1356 case CHIP_POLARIS12:
cc07f18d 1357 case CHIP_VEGAM:
e2a75f88
AD
1358 case CHIP_CARRIZO:
1359 case CHIP_STONEY:
1360#ifdef CONFIG_DRM_AMDGPU_SI
1361 case CHIP_VERDE:
1362 case CHIP_TAHITI:
1363 case CHIP_PITCAIRN:
1364 case CHIP_OLAND:
1365 case CHIP_HAINAN:
1366#endif
1367#ifdef CONFIG_DRM_AMDGPU_CIK
1368 case CHIP_BONAIRE:
1369 case CHIP_HAWAII:
1370 case CHIP_KAVERI:
1371 case CHIP_KABINI:
1372 case CHIP_MULLINS:
1373#endif
27c0bc71 1374 case CHIP_VEGA20:
e2a75f88
AD
1375 default:
1376 return 0;
1377 case CHIP_VEGA10:
1378 chip_name = "vega10";
1379 break;
3f76dced
AD
1380 case CHIP_VEGA12:
1381 chip_name = "vega12";
1382 break;
2d2e5e7e 1383 case CHIP_RAVEN:
54c4d17e
FX
1384 if (adev->rev_id >= 8)
1385 chip_name = "raven2";
741deade
AD
1386 else if (adev->pdev->device == 0x15d8)
1387 chip_name = "picasso";
54c4d17e
FX
1388 else
1389 chip_name = "raven";
2d2e5e7e 1390 break;
65e60f6e
LM
1391 case CHIP_ARCTURUS:
1392 chip_name = "arcturus";
1393 break;
23c6268e
HR
1394 case CHIP_NAVI10:
1395 chip_name = "navi10";
1396 break;
ed42cfe1
XY
1397 case CHIP_NAVI14:
1398 chip_name = "navi14";
1399 break;
e2a75f88
AD
1400 }
1401
1402 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1403 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1404 if (err) {
1405 dev_err(adev->dev,
1406 "Failed to load gpu_info firmware \"%s\"\n",
1407 fw_name);
1408 goto out;
1409 }
ab4fe3e1 1410 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1411 if (err) {
1412 dev_err(adev->dev,
1413 "Failed to validate gpu_info firmware \"%s\"\n",
1414 fw_name);
1415 goto out;
1416 }
1417
ab4fe3e1 1418 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1419 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1420
1421 switch (hdr->version_major) {
1422 case 1:
1423 {
1424 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1425 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1426 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1427
b5ab16bf
AD
1428 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1429 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1430 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1431 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1432 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1433 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1434 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1435 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1436 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1437 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1438 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1439 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1440 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1441 adev->gfx.cu_info.max_waves_per_simd =
1442 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1443 adev->gfx.cu_info.max_scratch_slots_per_cu =
1444 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1445 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1446 if (hdr->version_minor >= 1) {
35c2e910
HZ
1447 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1448 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1449 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1450 adev->gfx.config.num_sc_per_sh =
1451 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1452 adev->gfx.config.num_packer_per_sc =
1453 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1454 }
48321c3d
HW
1455#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1456 if (hdr->version_minor == 2) {
1457 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1458 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1459 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1460 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1461 }
1462#endif
e2a75f88
AD
1463 break;
1464 }
1465 default:
1466 dev_err(adev->dev,
1467 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1468 err = -EINVAL;
1469 goto out;
1470 }
1471out:
e2a75f88
AD
1472 return err;
1473}
1474
e3ecdffa
AD
1475/**
1476 * amdgpu_device_ip_early_init - run early init for hardware IPs
1477 *
1478 * @adev: amdgpu_device pointer
1479 *
1480 * Early initialization pass for hardware IPs. The hardware IPs that make
1481 * up each asic are discovered each IP's early_init callback is run. This
1482 * is the first stage in initializing the asic.
1483 * Returns 0 on success, negative error code on failure.
1484 */
06ec9070 1485static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1486{
aaa36a97 1487 int i, r;
d38ceaf9 1488
483ef985 1489 amdgpu_device_enable_virtual_display(adev);
a6be7570 1490
d38ceaf9 1491 switch (adev->asic_type) {
aaa36a97
AD
1492 case CHIP_TOPAZ:
1493 case CHIP_TONGA:
48299f95 1494 case CHIP_FIJI:
2cc0c0b5 1495 case CHIP_POLARIS10:
32cc7e53 1496 case CHIP_POLARIS11:
c4642a47 1497 case CHIP_POLARIS12:
32cc7e53 1498 case CHIP_VEGAM:
aaa36a97 1499 case CHIP_CARRIZO:
39bb0c92
SL
1500 case CHIP_STONEY:
1501 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1502 adev->family = AMDGPU_FAMILY_CZ;
1503 else
1504 adev->family = AMDGPU_FAMILY_VI;
1505
1506 r = vi_set_ip_blocks(adev);
1507 if (r)
1508 return r;
1509 break;
33f34802
KW
1510#ifdef CONFIG_DRM_AMDGPU_SI
1511 case CHIP_VERDE:
1512 case CHIP_TAHITI:
1513 case CHIP_PITCAIRN:
1514 case CHIP_OLAND:
1515 case CHIP_HAINAN:
295d0daf 1516 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1517 r = si_set_ip_blocks(adev);
1518 if (r)
1519 return r;
1520 break;
1521#endif
a2e73f56
AD
1522#ifdef CONFIG_DRM_AMDGPU_CIK
1523 case CHIP_BONAIRE:
1524 case CHIP_HAWAII:
1525 case CHIP_KAVERI:
1526 case CHIP_KABINI:
1527 case CHIP_MULLINS:
1528 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1529 adev->family = AMDGPU_FAMILY_CI;
1530 else
1531 adev->family = AMDGPU_FAMILY_KV;
1532
1533 r = cik_set_ip_blocks(adev);
1534 if (r)
1535 return r;
1536 break;
1537#endif
e48a3cd9
AD
1538 case CHIP_VEGA10:
1539 case CHIP_VEGA12:
e4bd8170 1540 case CHIP_VEGA20:
e48a3cd9 1541 case CHIP_RAVEN:
61cf44c1 1542 case CHIP_ARCTURUS:
741deade 1543 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1544 adev->family = AMDGPU_FAMILY_RV;
1545 else
1546 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1547
1548 r = soc15_set_ip_blocks(adev);
1549 if (r)
1550 return r;
1551 break;
0a5b8c7b 1552 case CHIP_NAVI10:
7ecb5cd4 1553 case CHIP_NAVI14:
0a5b8c7b
HR
1554 adev->family = AMDGPU_FAMILY_NV;
1555
1556 r = nv_set_ip_blocks(adev);
1557 if (r)
1558 return r;
1559 break;
d38ceaf9
AD
1560 default:
1561 /* FIXME: not supported yet */
1562 return -EINVAL;
1563 }
1564
e2a75f88
AD
1565 r = amdgpu_device_parse_gpu_info_fw(adev);
1566 if (r)
1567 return r;
1568
1884734a 1569 amdgpu_amdkfd_device_probe(adev);
1570
3149d9da
XY
1571 if (amdgpu_sriov_vf(adev)) {
1572 r = amdgpu_virt_request_full_gpu(adev, true);
1573 if (r)
5ffa61c1 1574 return -EAGAIN;
78d48112
TH
1575
1576 /* query the reg access mode at the very beginning */
1577 amdgpu_virt_init_reg_access_mode(adev);
3149d9da
XY
1578 }
1579
3b94fb10 1580 adev->pm.pp_feature = amdgpu_pp_feature_mask;
00544006
HR
1581 if (amdgpu_sriov_vf(adev))
1582 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1583
d38ceaf9
AD
1584 for (i = 0; i < adev->num_ip_blocks; i++) {
1585 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1586 DRM_ERROR("disabled ip block: %d <%s>\n",
1587 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1588 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1589 } else {
a1255107
AD
1590 if (adev->ip_blocks[i].version->funcs->early_init) {
1591 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1592 if (r == -ENOENT) {
a1255107 1593 adev->ip_blocks[i].status.valid = false;
2c1a2784 1594 } else if (r) {
a1255107
AD
1595 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1596 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1597 return r;
2c1a2784 1598 } else {
a1255107 1599 adev->ip_blocks[i].status.valid = true;
2c1a2784 1600 }
974e6b64 1601 } else {
a1255107 1602 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1603 }
d38ceaf9 1604 }
21a249ca
AD
1605 /* get the vbios after the asic_funcs are set up */
1606 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1607 /* Read BIOS */
1608 if (!amdgpu_get_bios(adev))
1609 return -EINVAL;
1610
1611 r = amdgpu_atombios_init(adev);
1612 if (r) {
1613 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1614 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1615 return r;
1616 }
1617 }
d38ceaf9
AD
1618 }
1619
395d1fb9
NH
1620 adev->cg_flags &= amdgpu_cg_mask;
1621 adev->pg_flags &= amdgpu_pg_mask;
1622
d38ceaf9
AD
1623 return 0;
1624}
1625
0a4f2520
RZ
1626static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1627{
1628 int i, r;
1629
1630 for (i = 0; i < adev->num_ip_blocks; i++) {
1631 if (!adev->ip_blocks[i].status.sw)
1632 continue;
1633 if (adev->ip_blocks[i].status.hw)
1634 continue;
1635 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1636 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1637 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1638 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1639 if (r) {
1640 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1641 adev->ip_blocks[i].version->funcs->name, r);
1642 return r;
1643 }
1644 adev->ip_blocks[i].status.hw = true;
1645 }
1646 }
1647
1648 return 0;
1649}
1650
1651static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1652{
1653 int i, r;
1654
1655 for (i = 0; i < adev->num_ip_blocks; i++) {
1656 if (!adev->ip_blocks[i].status.sw)
1657 continue;
1658 if (adev->ip_blocks[i].status.hw)
1659 continue;
1660 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1661 if (r) {
1662 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1663 adev->ip_blocks[i].version->funcs->name, r);
1664 return r;
1665 }
1666 adev->ip_blocks[i].status.hw = true;
1667 }
1668
1669 return 0;
1670}
1671
7a3e0bb2
RZ
1672static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1673{
1674 int r = 0;
1675 int i;
80f41f84 1676 uint32_t smu_version;
7a3e0bb2
RZ
1677
1678 if (adev->asic_type >= CHIP_VEGA10) {
1679 for (i = 0; i < adev->num_ip_blocks; i++) {
1680 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1681 if (adev->in_gpu_reset || adev->in_suspend) {
1682 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1683 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1684 r = adev->ip_blocks[i].version->funcs->resume(adev);
1685 if (r) {
1686 DRM_ERROR("resume of IP block <%s> failed %d\n",
1687 adev->ip_blocks[i].version->funcs->name, r);
1688 return r;
1689 }
1690 } else {
1691 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1692 if (r) {
1693 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1694 adev->ip_blocks[i].version->funcs->name, r);
1695 return r;
1696 }
1697 }
1698 adev->ip_blocks[i].status.hw = true;
1699 }
1700 }
1701 }
80f41f84 1702 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1703
80f41f84 1704 return r;
7a3e0bb2
RZ
1705}
1706
e3ecdffa
AD
1707/**
1708 * amdgpu_device_ip_init - run init for hardware IPs
1709 *
1710 * @adev: amdgpu_device pointer
1711 *
1712 * Main initialization pass for hardware IPs. The list of all the hardware
1713 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1714 * are run. sw_init initializes the software state associated with each IP
1715 * and hw_init initializes the hardware associated with each IP.
1716 * Returns 0 on success, negative error code on failure.
1717 */
06ec9070 1718static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1719{
1720 int i, r;
1721
c030f2e4 1722 r = amdgpu_ras_init(adev);
1723 if (r)
1724 return r;
1725
d38ceaf9 1726 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1727 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1728 continue;
a1255107 1729 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1730 if (r) {
a1255107
AD
1731 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1732 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1733 goto init_failed;
2c1a2784 1734 }
a1255107 1735 adev->ip_blocks[i].status.sw = true;
bfca0289 1736
d38ceaf9 1737 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1738 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1739 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1740 if (r) {
1741 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1742 goto init_failed;
2c1a2784 1743 }
a1255107 1744 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1745 if (r) {
1746 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1747 goto init_failed;
2c1a2784 1748 }
06ec9070 1749 r = amdgpu_device_wb_init(adev);
2c1a2784 1750 if (r) {
06ec9070 1751 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1752 goto init_failed;
2c1a2784 1753 }
a1255107 1754 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1755
1756 /* right after GMC hw init, we create CSA */
f92d5c61 1757 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1758 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1759 AMDGPU_GEM_DOMAIN_VRAM,
1760 AMDGPU_CSA_SIZE);
2493664f
ML
1761 if (r) {
1762 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1763 goto init_failed;
2493664f
ML
1764 }
1765 }
d38ceaf9
AD
1766 }
1767 }
1768
533aed27
AG
1769 r = amdgpu_ib_pool_init(adev);
1770 if (r) {
1771 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1772 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1773 goto init_failed;
1774 }
1775
c8963ea4
RZ
1776 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1777 if (r)
72d3f592 1778 goto init_failed;
0a4f2520
RZ
1779
1780 r = amdgpu_device_ip_hw_init_phase1(adev);
1781 if (r)
72d3f592 1782 goto init_failed;
0a4f2520 1783
7a3e0bb2
RZ
1784 r = amdgpu_device_fw_loading(adev);
1785 if (r)
72d3f592 1786 goto init_failed;
7a3e0bb2 1787
0a4f2520
RZ
1788 r = amdgpu_device_ip_hw_init_phase2(adev);
1789 if (r)
72d3f592 1790 goto init_failed;
d38ceaf9 1791
3e2e2ab5
HZ
1792 if (adev->gmc.xgmi.num_physical_nodes > 1)
1793 amdgpu_xgmi_add_device(adev);
1884734a 1794 amdgpu_amdkfd_device_init(adev);
c6332b97 1795
72d3f592 1796init_failed:
d3c117e5 1797 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1798 if (!r)
1799 amdgpu_virt_init_data_exchange(adev);
c6332b97 1800 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1801 }
c6332b97 1802
72d3f592 1803 return r;
d38ceaf9
AD
1804}
1805
e3ecdffa
AD
1806/**
1807 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1808 *
1809 * @adev: amdgpu_device pointer
1810 *
1811 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1812 * this function before a GPU reset. If the value is retained after a
1813 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1814 */
06ec9070 1815static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1816{
1817 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1818}
1819
e3ecdffa
AD
1820/**
1821 * amdgpu_device_check_vram_lost - check if vram is valid
1822 *
1823 * @adev: amdgpu_device pointer
1824 *
1825 * Checks the reset magic value written to the gart pointer in VRAM.
1826 * The driver calls this after a GPU reset to see if the contents of
1827 * VRAM is lost or now.
1828 * returns true if vram is lost, false if not.
1829 */
06ec9070 1830static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1831{
1832 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1833 AMDGPU_RESET_MAGIC_NUM);
1834}
1835
e3ecdffa 1836/**
1112a46b 1837 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1838 *
1839 * @adev: amdgpu_device pointer
1840 *
e3ecdffa 1841 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1842 * set_clockgating_state callbacks are run.
1843 * Late initialization pass enabling clockgating for hardware IPs.
1844 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1845 * Returns 0 on success, negative error code on failure.
1846 */
fdd34271 1847
1112a46b
RZ
1848static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1849 enum amd_clockgating_state state)
d38ceaf9 1850{
1112a46b 1851 int i, j, r;
d38ceaf9 1852
4a2ba394
SL
1853 if (amdgpu_emu_mode == 1)
1854 return 0;
1855
1112a46b
RZ
1856 for (j = 0; j < adev->num_ip_blocks; j++) {
1857 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1858 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1859 continue;
4a446d55 1860 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1861 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1862 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1863 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1864 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1865 /* enable clockgating to save power */
a1255107 1866 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1867 state);
4a446d55
AD
1868 if (r) {
1869 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1870 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1871 return r;
1872 }
b0b00ff1 1873 }
d38ceaf9 1874 }
06b18f61 1875
c9f96fd5
RZ
1876 return 0;
1877}
1878
1112a46b 1879static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1880{
1112a46b 1881 int i, j, r;
06b18f61 1882
c9f96fd5
RZ
1883 if (amdgpu_emu_mode == 1)
1884 return 0;
1885
1112a46b
RZ
1886 for (j = 0; j < adev->num_ip_blocks; j++) {
1887 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1888 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1889 continue;
1890 /* skip CG for VCE/UVD, it's handled specially */
1891 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1892 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1893 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1894 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1895 /* enable powergating to save power */
1896 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1897 state);
c9f96fd5
RZ
1898 if (r) {
1899 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1900 adev->ip_blocks[i].version->funcs->name, r);
1901 return r;
1902 }
1903 }
1904 }
2dc80b00
S
1905 return 0;
1906}
1907
beff74bc
AD
1908static int amdgpu_device_enable_mgpu_fan_boost(void)
1909{
1910 struct amdgpu_gpu_instance *gpu_ins;
1911 struct amdgpu_device *adev;
1912 int i, ret = 0;
1913
1914 mutex_lock(&mgpu_info.mutex);
1915
1916 /*
1917 * MGPU fan boost feature should be enabled
1918 * only when there are two or more dGPUs in
1919 * the system
1920 */
1921 if (mgpu_info.num_dgpu < 2)
1922 goto out;
1923
1924 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1925 gpu_ins = &(mgpu_info.gpu_ins[i]);
1926 adev = gpu_ins->adev;
1927 if (!(adev->flags & AMD_IS_APU) &&
1928 !gpu_ins->mgpu_fan_enabled &&
1929 adev->powerplay.pp_funcs &&
1930 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1931 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1932 if (ret)
1933 break;
1934
1935 gpu_ins->mgpu_fan_enabled = 1;
1936 }
1937 }
1938
1939out:
1940 mutex_unlock(&mgpu_info.mutex);
1941
1942 return ret;
1943}
1944
e3ecdffa
AD
1945/**
1946 * amdgpu_device_ip_late_init - run late init for hardware IPs
1947 *
1948 * @adev: amdgpu_device pointer
1949 *
1950 * Late initialization pass for hardware IPs. The list of all the hardware
1951 * IPs that make up the asic is walked and the late_init callbacks are run.
1952 * late_init covers any special initialization that an IP requires
1953 * after all of the have been initialized or something that needs to happen
1954 * late in the init process.
1955 * Returns 0 on success, negative error code on failure.
1956 */
06ec9070 1957static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1958{
1959 int i = 0, r;
1960
1961 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1962 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1963 continue;
1964 if (adev->ip_blocks[i].version->funcs->late_init) {
1965 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1966 if (r) {
1967 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1968 adev->ip_blocks[i].version->funcs->name, r);
1969 return r;
1970 }
2dc80b00 1971 }
73f847db 1972 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1973 }
1974
1112a46b
RZ
1975 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1976 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1977
06ec9070 1978 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 1979
beff74bc
AD
1980 r = amdgpu_device_enable_mgpu_fan_boost();
1981 if (r)
1982 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
1983
1984 /* set to low pstate by default */
1985 amdgpu_xgmi_set_pstate(adev, 0);
1986
d38ceaf9
AD
1987 return 0;
1988}
1989
e3ecdffa
AD
1990/**
1991 * amdgpu_device_ip_fini - run fini for hardware IPs
1992 *
1993 * @adev: amdgpu_device pointer
1994 *
1995 * Main teardown pass for hardware IPs. The list of all the hardware
1996 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1997 * are run. hw_fini tears down the hardware associated with each IP
1998 * and sw_fini tears down any software state associated with each IP.
1999 * Returns 0 on success, negative error code on failure.
2000 */
06ec9070 2001static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2002{
2003 int i, r;
2004
c030f2e4 2005 amdgpu_ras_pre_fini(adev);
2006
a82400b5
AG
2007 if (adev->gmc.xgmi.num_physical_nodes > 1)
2008 amdgpu_xgmi_remove_device(adev);
2009
1884734a 2010 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2011
2012 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2013 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2014
3e96dbfd
AD
2015 /* need to disable SMC first */
2016 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2017 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2018 continue;
fdd34271 2019 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2020 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2021 /* XXX handle errors */
2022 if (r) {
2023 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2024 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2025 }
a1255107 2026 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2027 break;
2028 }
2029 }
2030
d38ceaf9 2031 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2032 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2033 continue;
8201a67a 2034
a1255107 2035 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2036 /* XXX handle errors */
2c1a2784 2037 if (r) {
a1255107
AD
2038 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2039 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2040 }
8201a67a 2041
a1255107 2042 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2043 }
2044
9950cda2 2045
d38ceaf9 2046 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2047 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2048 continue;
c12aba3a
ML
2049
2050 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2051 amdgpu_ucode_free_bo(adev);
1e256e27 2052 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2053 amdgpu_device_wb_fini(adev);
2054 amdgpu_device_vram_scratch_fini(adev);
533aed27 2055 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2056 }
2057
a1255107 2058 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2059 /* XXX handle errors */
2c1a2784 2060 if (r) {
a1255107
AD
2061 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2062 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2063 }
a1255107
AD
2064 adev->ip_blocks[i].status.sw = false;
2065 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2066 }
2067
a6dcfd9c 2068 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2069 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2070 continue;
a1255107
AD
2071 if (adev->ip_blocks[i].version->funcs->late_fini)
2072 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2073 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2074 }
2075
c030f2e4 2076 amdgpu_ras_fini(adev);
2077
030308fc 2078 if (amdgpu_sriov_vf(adev))
24136135
ML
2079 if (amdgpu_virt_release_full_gpu(adev, false))
2080 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2081
d38ceaf9
AD
2082 return 0;
2083}
2084
e3ecdffa 2085/**
beff74bc 2086 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2087 *
1112a46b 2088 * @work: work_struct.
e3ecdffa 2089 */
beff74bc 2090static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2091{
2092 struct amdgpu_device *adev =
beff74bc 2093 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2094 int r;
2095
2096 r = amdgpu_ib_ring_tests(adev);
2097 if (r)
2098 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2099}
2100
1e317b99
RZ
2101static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2102{
2103 struct amdgpu_device *adev =
2104 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2105
2106 mutex_lock(&adev->gfx.gfx_off_mutex);
2107 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2108 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2109 adev->gfx.gfx_off_state = true;
2110 }
2111 mutex_unlock(&adev->gfx.gfx_off_mutex);
2112}
2113
e3ecdffa 2114/**
e7854a03 2115 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2116 *
2117 * @adev: amdgpu_device pointer
2118 *
2119 * Main suspend function for hardware IPs. The list of all the hardware
2120 * IPs that make up the asic is walked, clockgating is disabled and the
2121 * suspend callbacks are run. suspend puts the hardware and software state
2122 * in each IP into a state suitable for suspend.
2123 * Returns 0 on success, negative error code on failure.
2124 */
e7854a03
AD
2125static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2126{
2127 int i, r;
2128
05df1f01 2129 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2130 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2131
e7854a03
AD
2132 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2133 if (!adev->ip_blocks[i].status.valid)
2134 continue;
2135 /* displays are handled separately */
2136 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2137 /* XXX handle errors */
2138 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2139 /* XXX handle errors */
2140 if (r) {
2141 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2142 adev->ip_blocks[i].version->funcs->name, r);
2143 }
2144 }
2145 }
2146
e7854a03
AD
2147 return 0;
2148}
2149
2150/**
2151 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2152 *
2153 * @adev: amdgpu_device pointer
2154 *
2155 * Main suspend function for hardware IPs. The list of all the hardware
2156 * IPs that make up the asic is walked, clockgating is disabled and the
2157 * suspend callbacks are run. suspend puts the hardware and software state
2158 * in each IP into a state suitable for suspend.
2159 * Returns 0 on success, negative error code on failure.
2160 */
2161static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2162{
2163 int i, r;
2164
2165 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2166 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2167 continue;
e7854a03
AD
2168 /* displays are handled in phase1 */
2169 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2170 continue;
d38ceaf9 2171 /* XXX handle errors */
a1255107 2172 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2173 /* XXX handle errors */
2c1a2784 2174 if (r) {
a1255107
AD
2175 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2176 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2177 }
d38ceaf9
AD
2178 }
2179
2180 return 0;
2181}
2182
e7854a03
AD
2183/**
2184 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2185 *
2186 * @adev: amdgpu_device pointer
2187 *
2188 * Main suspend function for hardware IPs. The list of all the hardware
2189 * IPs that make up the asic is walked, clockgating is disabled and the
2190 * suspend callbacks are run. suspend puts the hardware and software state
2191 * in each IP into a state suitable for suspend.
2192 * Returns 0 on success, negative error code on failure.
2193 */
2194int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2195{
2196 int r;
2197
e7819644
YT
2198 if (amdgpu_sriov_vf(adev))
2199 amdgpu_virt_request_full_gpu(adev, false);
2200
e7854a03
AD
2201 r = amdgpu_device_ip_suspend_phase1(adev);
2202 if (r)
2203 return r;
2204 r = amdgpu_device_ip_suspend_phase2(adev);
2205
e7819644
YT
2206 if (amdgpu_sriov_vf(adev))
2207 amdgpu_virt_release_full_gpu(adev, false);
2208
e7854a03
AD
2209 return r;
2210}
2211
06ec9070 2212static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2213{
2214 int i, r;
2215
2cb681b6
ML
2216 static enum amd_ip_block_type ip_order[] = {
2217 AMD_IP_BLOCK_TYPE_GMC,
2218 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2219 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2220 AMD_IP_BLOCK_TYPE_IH,
2221 };
a90ad3c2 2222
2cb681b6
ML
2223 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2224 int j;
2225 struct amdgpu_ip_block *block;
a90ad3c2 2226
2cb681b6
ML
2227 for (j = 0; j < adev->num_ip_blocks; j++) {
2228 block = &adev->ip_blocks[j];
2229
2230 if (block->version->type != ip_order[i] ||
2231 !block->status.valid)
2232 continue;
2233
2234 r = block->version->funcs->hw_init(adev);
0aaeefcc 2235 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2236 if (r)
2237 return r;
a90ad3c2
ML
2238 }
2239 }
2240
2241 return 0;
2242}
2243
06ec9070 2244static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2245{
2246 int i, r;
2247
2cb681b6
ML
2248 static enum amd_ip_block_type ip_order[] = {
2249 AMD_IP_BLOCK_TYPE_SMC,
2250 AMD_IP_BLOCK_TYPE_DCE,
2251 AMD_IP_BLOCK_TYPE_GFX,
2252 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2253 AMD_IP_BLOCK_TYPE_UVD,
2254 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2255 };
a90ad3c2 2256
2cb681b6
ML
2257 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2258 int j;
2259 struct amdgpu_ip_block *block;
a90ad3c2 2260
2cb681b6
ML
2261 for (j = 0; j < adev->num_ip_blocks; j++) {
2262 block = &adev->ip_blocks[j];
2263
2264 if (block->version->type != ip_order[i] ||
2265 !block->status.valid)
2266 continue;
2267
2268 r = block->version->funcs->hw_init(adev);
0aaeefcc 2269 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2270 if (r)
2271 return r;
a90ad3c2
ML
2272 }
2273 }
2274
2275 return 0;
2276}
2277
e3ecdffa
AD
2278/**
2279 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2280 *
2281 * @adev: amdgpu_device pointer
2282 *
2283 * First resume function for hardware IPs. The list of all the hardware
2284 * IPs that make up the asic is walked and the resume callbacks are run for
2285 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2286 * after a suspend and updates the software state as necessary. This
2287 * function is also used for restoring the GPU after a GPU reset.
2288 * Returns 0 on success, negative error code on failure.
2289 */
06ec9070 2290static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2291{
2292 int i, r;
2293
a90ad3c2
ML
2294 for (i = 0; i < adev->num_ip_blocks; i++) {
2295 if (!adev->ip_blocks[i].status.valid)
2296 continue;
a90ad3c2 2297 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2298 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2299 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2300 r = adev->ip_blocks[i].version->funcs->resume(adev);
2301 if (r) {
2302 DRM_ERROR("resume of IP block <%s> failed %d\n",
2303 adev->ip_blocks[i].version->funcs->name, r);
2304 return r;
2305 }
a90ad3c2
ML
2306 }
2307 }
2308
2309 return 0;
2310}
2311
e3ecdffa
AD
2312/**
2313 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2314 *
2315 * @adev: amdgpu_device pointer
2316 *
2317 * First resume function for hardware IPs. The list of all the hardware
2318 * IPs that make up the asic is walked and the resume callbacks are run for
2319 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2320 * functional state after a suspend and updates the software state as
2321 * necessary. This function is also used for restoring the GPU after a GPU
2322 * reset.
2323 * Returns 0 on success, negative error code on failure.
2324 */
06ec9070 2325static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2326{
2327 int i, r;
2328
2329 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2330 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2331 continue;
fcf0649f 2332 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2333 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2334 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2335 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2336 continue;
a1255107 2337 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2338 if (r) {
a1255107
AD
2339 DRM_ERROR("resume of IP block <%s> failed %d\n",
2340 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2341 return r;
2c1a2784 2342 }
d38ceaf9
AD
2343 }
2344
2345 return 0;
2346}
2347
e3ecdffa
AD
2348/**
2349 * amdgpu_device_ip_resume - run resume for hardware IPs
2350 *
2351 * @adev: amdgpu_device pointer
2352 *
2353 * Main resume function for hardware IPs. The hardware IPs
2354 * are split into two resume functions because they are
2355 * are also used in in recovering from a GPU reset and some additional
2356 * steps need to be take between them. In this case (S3/S4) they are
2357 * run sequentially.
2358 * Returns 0 on success, negative error code on failure.
2359 */
06ec9070 2360static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2361{
2362 int r;
2363
06ec9070 2364 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2365 if (r)
2366 return r;
7a3e0bb2
RZ
2367
2368 r = amdgpu_device_fw_loading(adev);
2369 if (r)
2370 return r;
2371
06ec9070 2372 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2373
2374 return r;
2375}
2376
e3ecdffa
AD
2377/**
2378 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2379 *
2380 * @adev: amdgpu_device pointer
2381 *
2382 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2383 */
4e99a44e 2384static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2385{
6867e1b5
ML
2386 if (amdgpu_sriov_vf(adev)) {
2387 if (adev->is_atom_fw) {
2388 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2389 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2390 } else {
2391 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2392 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2393 }
2394
2395 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2396 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2397 }
048765ad
AR
2398}
2399
e3ecdffa
AD
2400/**
2401 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2402 *
2403 * @asic_type: AMD asic type
2404 *
2405 * Check if there is DC (new modesetting infrastructre) support for an asic.
2406 * returns true if DC has support, false if not.
2407 */
4562236b
HW
2408bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2409{
2410 switch (asic_type) {
2411#if defined(CONFIG_DRM_AMD_DC)
2412 case CHIP_BONAIRE:
0d6fbccb 2413 case CHIP_KAVERI:
367e6687
AD
2414 case CHIP_KABINI:
2415 case CHIP_MULLINS:
d9fda248
HW
2416 /*
2417 * We have systems in the wild with these ASICs that require
2418 * LVDS and VGA support which is not supported with DC.
2419 *
2420 * Fallback to the non-DC driver here by default so as not to
2421 * cause regressions.
2422 */
2423 return amdgpu_dc > 0;
2424 case CHIP_HAWAII:
4562236b
HW
2425 case CHIP_CARRIZO:
2426 case CHIP_STONEY:
4562236b 2427 case CHIP_POLARIS10:
675fd32b 2428 case CHIP_POLARIS11:
2c8ad2d5 2429 case CHIP_POLARIS12:
675fd32b 2430 case CHIP_VEGAM:
4562236b
HW
2431 case CHIP_TONGA:
2432 case CHIP_FIJI:
42f8ffa1 2433 case CHIP_VEGA10:
dca7b401 2434 case CHIP_VEGA12:
c6034aa2 2435 case CHIP_VEGA20:
dc37a9a0 2436#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2437 case CHIP_RAVEN:
b4f199c7
HW
2438#endif
2439#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2440 case CHIP_NAVI10:
8fceceb6 2441 case CHIP_NAVI14:
42f8ffa1 2442#endif
fd187853 2443 return amdgpu_dc != 0;
4562236b
HW
2444#endif
2445 default:
2446 return false;
2447 }
2448}
2449
2450/**
2451 * amdgpu_device_has_dc_support - check if dc is supported
2452 *
2453 * @adev: amdgpu_device_pointer
2454 *
2455 * Returns true for supported, false for not supported
2456 */
2457bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2458{
2555039d
XY
2459 if (amdgpu_sriov_vf(adev))
2460 return false;
2461
4562236b
HW
2462 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2463}
2464
d4535e2c
AG
2465
2466static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2467{
2468 struct amdgpu_device *adev =
2469 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2470
2471 adev->asic_reset_res = amdgpu_asic_reset(adev);
2472 if (adev->asic_reset_res)
fed184e9 2473 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2474 adev->asic_reset_res, adev->ddev->unique);
2475}
2476
2477
d38ceaf9
AD
2478/**
2479 * amdgpu_device_init - initialize the driver
2480 *
2481 * @adev: amdgpu_device pointer
87e3f136 2482 * @ddev: drm dev pointer
d38ceaf9
AD
2483 * @pdev: pci dev pointer
2484 * @flags: driver flags
2485 *
2486 * Initializes the driver info and hw (all asics).
2487 * Returns 0 for success or an error on failure.
2488 * Called at driver startup.
2489 */
2490int amdgpu_device_init(struct amdgpu_device *adev,
2491 struct drm_device *ddev,
2492 struct pci_dev *pdev,
2493 uint32_t flags)
2494{
2495 int r, i;
2496 bool runtime = false;
95844d20 2497 u32 max_MBps;
d38ceaf9
AD
2498
2499 adev->shutdown = false;
2500 adev->dev = &pdev->dev;
2501 adev->ddev = ddev;
2502 adev->pdev = pdev;
2503 adev->flags = flags;
2f7d10b3 2504 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2505 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2506 if (amdgpu_emu_mode == 1)
2507 adev->usec_timeout *= 2;
770d13b1 2508 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2509 adev->accel_working = false;
2510 adev->num_rings = 0;
2511 adev->mman.buffer_funcs = NULL;
2512 adev->mman.buffer_funcs_ring = NULL;
2513 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2514 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2515 adev->gmc.gmc_funcs = NULL;
f54d1867 2516 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2517 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2518
2519 adev->smc_rreg = &amdgpu_invalid_rreg;
2520 adev->smc_wreg = &amdgpu_invalid_wreg;
2521 adev->pcie_rreg = &amdgpu_invalid_rreg;
2522 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2523 adev->pciep_rreg = &amdgpu_invalid_rreg;
2524 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2525 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2526 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2527 adev->didt_rreg = &amdgpu_invalid_rreg;
2528 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2529 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2530 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2531 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2532 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2533
3e39ab90
AD
2534 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2535 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2536 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2537
2538 /* mutex initialization are all done here so we
2539 * can recall function without having locking issues */
d38ceaf9 2540 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2541 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2542 mutex_init(&adev->pm.mutex);
2543 mutex_init(&adev->gfx.gpu_clock_mutex);
2544 mutex_init(&adev->srbm_mutex);
b8866c26 2545 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2546 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2547 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2548 mutex_init(&adev->mn_lock);
e23b74aa 2549 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2550 hash_init(adev->mn_hash);
13a752e3 2551 mutex_init(&adev->lock_reset);
bb5a2bdf 2552 mutex_init(&adev->virt.dpm_mutex);
32eaeae0 2553 mutex_init(&adev->psp.mutex);
d38ceaf9 2554
912dfc84
EQ
2555 r = amdgpu_device_check_arguments(adev);
2556 if (r)
2557 return r;
d38ceaf9 2558
d38ceaf9
AD
2559 spin_lock_init(&adev->mmio_idx_lock);
2560 spin_lock_init(&adev->smc_idx_lock);
2561 spin_lock_init(&adev->pcie_idx_lock);
2562 spin_lock_init(&adev->uvd_ctx_idx_lock);
2563 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2564 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2565 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2566 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2567 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2568
0c4e7fa5
CZ
2569 INIT_LIST_HEAD(&adev->shadow_list);
2570 mutex_init(&adev->shadow_list_lock);
2571
795f2813
AR
2572 INIT_LIST_HEAD(&adev->ring_lru_list);
2573 spin_lock_init(&adev->ring_lru_list_lock);
2574
beff74bc
AD
2575 INIT_DELAYED_WORK(&adev->delayed_init_work,
2576 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2577 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2578 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2579
d4535e2c
AG
2580 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2581
d23ee13f 2582 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2583 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2584
0fa49558
AX
2585 /* Registers mapping */
2586 /* TODO: block userspace mapping of io register */
da69c161
KW
2587 if (adev->asic_type >= CHIP_BONAIRE) {
2588 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2589 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2590 } else {
2591 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2592 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2593 }
d38ceaf9 2594
d38ceaf9
AD
2595 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2596 if (adev->rmmio == NULL) {
2597 return -ENOMEM;
2598 }
2599 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2600 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2601
d38ceaf9
AD
2602 /* io port mapping */
2603 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2604 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2605 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2606 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2607 break;
2608 }
2609 }
2610 if (adev->rio_mem == NULL)
b64a18c5 2611 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2612
b2109d8e
JX
2613 /* enable PCIE atomic ops */
2614 r = pci_enable_atomic_ops_to_root(adev->pdev,
2615 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2616 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2617 if (r) {
2618 adev->have_atomics_support = false;
2619 DRM_INFO("PCIE atomic ops is not supported\n");
2620 } else {
2621 adev->have_atomics_support = true;
2622 }
2623
5494d864
AD
2624 amdgpu_device_get_pcie_info(adev);
2625
b239c017
JX
2626 if (amdgpu_mcbp)
2627 DRM_INFO("MCBP is enabled\n");
2628
5f84cc63
JX
2629 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2630 adev->enable_mes = true;
2631
f54eeab4 2632 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2633 r = amdgpu_discovery_init(adev);
2634 if (r) {
2635 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2636 return r;
2637 }
2638 }
2639
d38ceaf9 2640 /* early init functions */
06ec9070 2641 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2642 if (r)
2643 return r;
2644
6585661d
OZ
2645 /* doorbell bar mapping and doorbell index init*/
2646 amdgpu_device_doorbell_init(adev);
2647
d38ceaf9
AD
2648 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2649 /* this will fail for cards that aren't VGA class devices, just
2650 * ignore it */
06ec9070 2651 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2652
e9bef455 2653 if (amdgpu_device_is_px(ddev))
d38ceaf9 2654 runtime = true;
84c8b22e
LW
2655 if (!pci_is_thunderbolt_attached(adev->pdev))
2656 vga_switcheroo_register_client(adev->pdev,
2657 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2658 if (runtime)
2659 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2660
9475a943
SL
2661 if (amdgpu_emu_mode == 1) {
2662 /* post the asic on emulation mode */
2663 emu_soc_asic_init(adev);
bfca0289 2664 goto fence_driver_init;
9475a943 2665 }
bfca0289 2666
4e99a44e
ML
2667 /* detect if we are with an SRIOV vbios */
2668 amdgpu_device_detect_sriov_bios(adev);
048765ad 2669
95e8e59e
AD
2670 /* check if we need to reset the asic
2671 * E.g., driver was not cleanly unloaded previously, etc.
2672 */
f14899fd 2673 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2674 r = amdgpu_asic_reset(adev);
2675 if (r) {
2676 dev_err(adev->dev, "asic reset on init failed\n");
2677 goto failed;
2678 }
2679 }
2680
d38ceaf9 2681 /* Post card if necessary */
39c640c0 2682 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2683 if (!adev->bios) {
bec86378 2684 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2685 r = -EINVAL;
2686 goto failed;
d38ceaf9 2687 }
bec86378 2688 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2689 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2690 if (r) {
2691 dev_err(adev->dev, "gpu post error!\n");
2692 goto failed;
2693 }
d38ceaf9
AD
2694 }
2695
88b64e95
AD
2696 if (adev->is_atom_fw) {
2697 /* Initialize clocks */
2698 r = amdgpu_atomfirmware_get_clock_info(adev);
2699 if (r) {
2700 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2701 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2702 goto failed;
2703 }
2704 } else {
a5bde2f9
AD
2705 /* Initialize clocks */
2706 r = amdgpu_atombios_get_clock_info(adev);
2707 if (r) {
2708 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2709 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2710 goto failed;
a5bde2f9
AD
2711 }
2712 /* init i2c buses */
4562236b
HW
2713 if (!amdgpu_device_has_dc_support(adev))
2714 amdgpu_atombios_i2c_init(adev);
2c1a2784 2715 }
d38ceaf9 2716
bfca0289 2717fence_driver_init:
d38ceaf9
AD
2718 /* Fence driver */
2719 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2720 if (r) {
2721 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2722 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2723 goto failed;
2c1a2784 2724 }
d38ceaf9
AD
2725
2726 /* init the mode config */
2727 drm_mode_config_init(adev->ddev);
2728
06ec9070 2729 r = amdgpu_device_ip_init(adev);
d38ceaf9 2730 if (r) {
8840a387 2731 /* failed in exclusive mode due to timeout */
2732 if (amdgpu_sriov_vf(adev) &&
2733 !amdgpu_sriov_runtime(adev) &&
2734 amdgpu_virt_mmio_blocked(adev) &&
2735 !amdgpu_virt_wait_reset(adev)) {
2736 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2737 /* Don't send request since VF is inactive. */
2738 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2739 adev->virt.ops = NULL;
8840a387 2740 r = -EAGAIN;
2741 goto failed;
2742 }
06ec9070 2743 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2744 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2745 if (amdgpu_virt_request_full_gpu(adev, false))
2746 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2747 goto failed;
d38ceaf9
AD
2748 }
2749
2750 adev->accel_working = true;
2751
e59c0205
AX
2752 amdgpu_vm_check_compute_bug(adev);
2753
95844d20
MO
2754 /* Initialize the buffer migration limit. */
2755 if (amdgpu_moverate >= 0)
2756 max_MBps = amdgpu_moverate;
2757 else
2758 max_MBps = 8; /* Allow 8 MB/s. */
2759 /* Get a log2 for easy divisions. */
2760 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2761
9bc92b9c
ML
2762 amdgpu_fbdev_init(adev);
2763
e9bc1bf7
YT
2764 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2765 amdgpu_pm_virt_sysfs_init(adev);
2766
d2f52ac8
RZ
2767 r = amdgpu_pm_sysfs_init(adev);
2768 if (r)
2769 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2770
5bb23532
OM
2771 r = amdgpu_ucode_sysfs_init(adev);
2772 if (r)
2773 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2774
75758255 2775 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2776 if (r)
d38ceaf9 2777 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2778
2779 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2780 if (r)
d38ceaf9 2781 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2782
50ab2533 2783 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2784 if (r)
50ab2533 2785 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2786
763efb6c 2787 r = amdgpu_debugfs_init(adev);
db95e218 2788 if (r)
763efb6c 2789 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2790
d38ceaf9
AD
2791 if ((amdgpu_testing & 1)) {
2792 if (adev->accel_working)
2793 amdgpu_test_moves(adev);
2794 else
2795 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2796 }
d38ceaf9
AD
2797 if (amdgpu_benchmarking) {
2798 if (adev->accel_working)
2799 amdgpu_benchmark(adev, amdgpu_benchmarking);
2800 else
2801 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2802 }
2803
2804 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2805 * explicit gating rather than handling it automatically.
2806 */
06ec9070 2807 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2808 if (r) {
06ec9070 2809 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2810 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2811 goto failed;
2c1a2784 2812 }
d38ceaf9 2813
108c6a63 2814 /* must succeed. */
511fdbc3 2815 amdgpu_ras_resume(adev);
108c6a63 2816
beff74bc
AD
2817 queue_delayed_work(system_wq, &adev->delayed_init_work,
2818 msecs_to_jiffies(AMDGPU_RESUME_MS));
2819
dcea6e65
KR
2820 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2821 if (r) {
2822 dev_err(adev->dev, "Could not create pcie_replay_count");
2823 return r;
2824 }
108c6a63 2825
d155bef0
AB
2826 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2827 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
2828 if (r)
2829 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2830
d38ceaf9 2831 return 0;
83ba126a
AD
2832
2833failed:
89041940 2834 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2835 if (runtime)
2836 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2837
83ba126a 2838 return r;
d38ceaf9
AD
2839}
2840
d38ceaf9
AD
2841/**
2842 * amdgpu_device_fini - tear down the driver
2843 *
2844 * @adev: amdgpu_device pointer
2845 *
2846 * Tear down the driver info (all asics).
2847 * Called at driver shutdown.
2848 */
2849void amdgpu_device_fini(struct amdgpu_device *adev)
2850{
2851 int r;
2852
2853 DRM_INFO("amdgpu: finishing device.\n");
2854 adev->shutdown = true;
e5b03032
ML
2855 /* disable all interrupts */
2856 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2857 if (adev->mode_info.mode_config_initialized){
2858 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2859 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2860 else
2861 drm_atomic_helper_shutdown(adev->ddev);
2862 }
d38ceaf9 2863 amdgpu_fence_driver_fini(adev);
58e955d9 2864 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2865 amdgpu_fbdev_fini(adev);
06ec9070 2866 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2867 if (adev->firmware.gpu_info_fw) {
2868 release_firmware(adev->firmware.gpu_info_fw);
2869 adev->firmware.gpu_info_fw = NULL;
2870 }
d38ceaf9 2871 adev->accel_working = false;
beff74bc 2872 cancel_delayed_work_sync(&adev->delayed_init_work);
d38ceaf9 2873 /* free i2c buses */
4562236b
HW
2874 if (!amdgpu_device_has_dc_support(adev))
2875 amdgpu_i2c_fini(adev);
bfca0289
SL
2876
2877 if (amdgpu_emu_mode != 1)
2878 amdgpu_atombios_fini(adev);
2879
d38ceaf9
AD
2880 kfree(adev->bios);
2881 adev->bios = NULL;
84c8b22e
LW
2882 if (!pci_is_thunderbolt_attached(adev->pdev))
2883 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2884 if (adev->flags & AMD_IS_PX)
2885 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2886 vga_client_register(adev->pdev, NULL, NULL, NULL);
2887 if (adev->rio_mem)
2888 pci_iounmap(adev->pdev, adev->rio_mem);
2889 adev->rio_mem = NULL;
2890 iounmap(adev->rmmio);
2891 adev->rmmio = NULL;
06ec9070 2892 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
2893 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2894 amdgpu_pm_virt_sysfs_fini(adev);
2895
d38ceaf9 2896 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 2897 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 2898 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
2899 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2900 amdgpu_pmu_fini(adev);
6698a3d0 2901 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 2902 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 2903 amdgpu_discovery_fini(adev);
d38ceaf9
AD
2904}
2905
2906
2907/*
2908 * Suspend & resume.
2909 */
2910/**
810ddc3a 2911 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2912 *
87e3f136
DP
2913 * @dev: drm dev pointer
2914 * @suspend: suspend state
2915 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2916 *
2917 * Puts the hw in the suspend state (all asics).
2918 * Returns 0 for success or an error on failure.
2919 * Called at driver suspend.
2920 */
810ddc3a 2921int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2922{
2923 struct amdgpu_device *adev;
2924 struct drm_crtc *crtc;
2925 struct drm_connector *connector;
5ceb54c6 2926 int r;
d38ceaf9
AD
2927
2928 if (dev == NULL || dev->dev_private == NULL) {
2929 return -ENODEV;
2930 }
2931
2932 adev = dev->dev_private;
2933
2934 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2935 return 0;
2936
44779b43 2937 adev->in_suspend = true;
d38ceaf9
AD
2938 drm_kms_helper_poll_disable(dev);
2939
5f818173
S
2940 if (fbcon)
2941 amdgpu_fbdev_set_suspend(adev, 1);
2942
beff74bc 2943 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 2944
4562236b
HW
2945 if (!amdgpu_device_has_dc_support(adev)) {
2946 /* turn off display hw */
2947 drm_modeset_lock_all(dev);
2948 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2949 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2950 }
2951 drm_modeset_unlock_all(dev);
fe1053b7
AD
2952 /* unpin the front buffers and cursors */
2953 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2954 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2955 struct drm_framebuffer *fb = crtc->primary->fb;
2956 struct amdgpu_bo *robj;
2957
91334223 2958 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2959 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2960 r = amdgpu_bo_reserve(aobj, true);
2961 if (r == 0) {
2962 amdgpu_bo_unpin(aobj);
2963 amdgpu_bo_unreserve(aobj);
2964 }
756e6880 2965 }
756e6880 2966
fe1053b7
AD
2967 if (fb == NULL || fb->obj[0] == NULL) {
2968 continue;
2969 }
2970 robj = gem_to_amdgpu_bo(fb->obj[0]);
2971 /* don't unpin kernel fb objects */
2972 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2973 r = amdgpu_bo_reserve(robj, true);
2974 if (r == 0) {
2975 amdgpu_bo_unpin(robj);
2976 amdgpu_bo_unreserve(robj);
2977 }
d38ceaf9
AD
2978 }
2979 }
2980 }
fe1053b7
AD
2981
2982 amdgpu_amdkfd_suspend(adev);
2983
5e6932fe 2984 amdgpu_ras_suspend(adev);
2985
fe1053b7
AD
2986 r = amdgpu_device_ip_suspend_phase1(adev);
2987
d38ceaf9
AD
2988 /* evict vram memory */
2989 amdgpu_bo_evict_vram(adev);
2990
5ceb54c6 2991 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2992
fe1053b7 2993 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2994
a0a71e49
AD
2995 /* evict remaining vram memory
2996 * This second call to evict vram is to evict the gart page table
2997 * using the CPU.
2998 */
d38ceaf9
AD
2999 amdgpu_bo_evict_vram(adev);
3000
3001 pci_save_state(dev->pdev);
3002 if (suspend) {
3003 /* Shut down the device */
3004 pci_disable_device(dev->pdev);
3005 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 3006 } else {
3007 r = amdgpu_asic_reset(adev);
3008 if (r)
3009 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
3010 }
3011
d38ceaf9
AD
3012 return 0;
3013}
3014
3015/**
810ddc3a 3016 * amdgpu_device_resume - initiate device resume
d38ceaf9 3017 *
87e3f136
DP
3018 * @dev: drm dev pointer
3019 * @resume: resume state
3020 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3021 *
3022 * Bring the hw back to operating state (all asics).
3023 * Returns 0 for success or an error on failure.
3024 * Called at driver resume.
3025 */
810ddc3a 3026int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
3027{
3028 struct drm_connector *connector;
3029 struct amdgpu_device *adev = dev->dev_private;
756e6880 3030 struct drm_crtc *crtc;
03161a6e 3031 int r = 0;
d38ceaf9
AD
3032
3033 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3034 return 0;
3035
d38ceaf9
AD
3036 if (resume) {
3037 pci_set_power_state(dev->pdev, PCI_D0);
3038 pci_restore_state(dev->pdev);
74b0b157 3039 r = pci_enable_device(dev->pdev);
03161a6e 3040 if (r)
4d3b9ae5 3041 return r;
d38ceaf9
AD
3042 }
3043
3044 /* post card */
39c640c0 3045 if (amdgpu_device_need_post(adev)) {
74b0b157 3046 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3047 if (r)
3048 DRM_ERROR("amdgpu asic init failed\n");
3049 }
d38ceaf9 3050
06ec9070 3051 r = amdgpu_device_ip_resume(adev);
e6707218 3052 if (r) {
06ec9070 3053 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3054 return r;
e6707218 3055 }
5ceb54c6
AD
3056 amdgpu_fence_driver_resume(adev);
3057
d38ceaf9 3058
06ec9070 3059 r = amdgpu_device_ip_late_init(adev);
03161a6e 3060 if (r)
4d3b9ae5 3061 return r;
d38ceaf9 3062
beff74bc
AD
3063 queue_delayed_work(system_wq, &adev->delayed_init_work,
3064 msecs_to_jiffies(AMDGPU_RESUME_MS));
3065
fe1053b7
AD
3066 if (!amdgpu_device_has_dc_support(adev)) {
3067 /* pin cursors */
3068 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3069 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3070
91334223 3071 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3072 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3073 r = amdgpu_bo_reserve(aobj, true);
3074 if (r == 0) {
3075 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3076 if (r != 0)
3077 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3078 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3079 amdgpu_bo_unreserve(aobj);
3080 }
756e6880
AD
3081 }
3082 }
3083 }
ba997709
YZ
3084 r = amdgpu_amdkfd_resume(adev);
3085 if (r)
3086 return r;
756e6880 3087
96a5d8d4 3088 /* Make sure IB tests flushed */
beff74bc 3089 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3090
d38ceaf9
AD
3091 /* blat the mode back in */
3092 if (fbcon) {
4562236b
HW
3093 if (!amdgpu_device_has_dc_support(adev)) {
3094 /* pre DCE11 */
3095 drm_helper_resume_force_mode(dev);
3096
3097 /* turn on display hw */
3098 drm_modeset_lock_all(dev);
3099 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3100 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3101 }
3102 drm_modeset_unlock_all(dev);
d38ceaf9 3103 }
4d3b9ae5 3104 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3105 }
3106
3107 drm_kms_helper_poll_enable(dev);
23a1a9e5 3108
5e6932fe 3109 amdgpu_ras_resume(adev);
3110
23a1a9e5
L
3111 /*
3112 * Most of the connector probing functions try to acquire runtime pm
3113 * refs to ensure that the GPU is powered on when connector polling is
3114 * performed. Since we're calling this from a runtime PM callback,
3115 * trying to acquire rpm refs will cause us to deadlock.
3116 *
3117 * Since we're guaranteed to be holding the rpm lock, it's safe to
3118 * temporarily disable the rpm helpers so this doesn't deadlock us.
3119 */
3120#ifdef CONFIG_PM
3121 dev->dev->power.disable_depth++;
3122#endif
4562236b
HW
3123 if (!amdgpu_device_has_dc_support(adev))
3124 drm_helper_hpd_irq_event(dev);
3125 else
3126 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3127#ifdef CONFIG_PM
3128 dev->dev->power.disable_depth--;
3129#endif
44779b43
RZ
3130 adev->in_suspend = false;
3131
4d3b9ae5 3132 return 0;
d38ceaf9
AD
3133}
3134
e3ecdffa
AD
3135/**
3136 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3137 *
3138 * @adev: amdgpu_device pointer
3139 *
3140 * The list of all the hardware IPs that make up the asic is walked and
3141 * the check_soft_reset callbacks are run. check_soft_reset determines
3142 * if the asic is still hung or not.
3143 * Returns true if any of the IPs are still in a hung state, false if not.
3144 */
06ec9070 3145static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3146{
3147 int i;
3148 bool asic_hang = false;
3149
f993d628
ML
3150 if (amdgpu_sriov_vf(adev))
3151 return true;
3152
8bc04c29
AD
3153 if (amdgpu_asic_need_full_reset(adev))
3154 return true;
3155
63fbf42f 3156 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3157 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3158 continue;
a1255107
AD
3159 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3160 adev->ip_blocks[i].status.hang =
3161 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3162 if (adev->ip_blocks[i].status.hang) {
3163 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3164 asic_hang = true;
3165 }
3166 }
3167 return asic_hang;
3168}
3169
e3ecdffa
AD
3170/**
3171 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3172 *
3173 * @adev: amdgpu_device pointer
3174 *
3175 * The list of all the hardware IPs that make up the asic is walked and the
3176 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3177 * handles any IP specific hardware or software state changes that are
3178 * necessary for a soft reset to succeed.
3179 * Returns 0 on success, negative error code on failure.
3180 */
06ec9070 3181static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3182{
3183 int i, r = 0;
3184
3185 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3186 if (!adev->ip_blocks[i].status.valid)
d31a501e 3187 continue;
a1255107
AD
3188 if (adev->ip_blocks[i].status.hang &&
3189 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3190 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3191 if (r)
3192 return r;
3193 }
3194 }
3195
3196 return 0;
3197}
3198
e3ecdffa
AD
3199/**
3200 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3201 *
3202 * @adev: amdgpu_device pointer
3203 *
3204 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3205 * reset is necessary to recover.
3206 * Returns true if a full asic reset is required, false if not.
3207 */
06ec9070 3208static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3209{
da146d3b
AD
3210 int i;
3211
8bc04c29
AD
3212 if (amdgpu_asic_need_full_reset(adev))
3213 return true;
3214
da146d3b 3215 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3216 if (!adev->ip_blocks[i].status.valid)
da146d3b 3217 continue;
a1255107
AD
3218 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3219 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3220 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3221 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3222 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3223 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3224 DRM_INFO("Some block need full reset!\n");
3225 return true;
3226 }
3227 }
35d782fe
CZ
3228 }
3229 return false;
3230}
3231
e3ecdffa
AD
3232/**
3233 * amdgpu_device_ip_soft_reset - do a soft reset
3234 *
3235 * @adev: amdgpu_device pointer
3236 *
3237 * The list of all the hardware IPs that make up the asic is walked and the
3238 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3239 * IP specific hardware or software state changes that are necessary to soft
3240 * reset the IP.
3241 * Returns 0 on success, negative error code on failure.
3242 */
06ec9070 3243static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3244{
3245 int i, r = 0;
3246
3247 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3248 if (!adev->ip_blocks[i].status.valid)
35d782fe 3249 continue;
a1255107
AD
3250 if (adev->ip_blocks[i].status.hang &&
3251 adev->ip_blocks[i].version->funcs->soft_reset) {
3252 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3253 if (r)
3254 return r;
3255 }
3256 }
3257
3258 return 0;
3259}
3260
e3ecdffa
AD
3261/**
3262 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3263 *
3264 * @adev: amdgpu_device pointer
3265 *
3266 * The list of all the hardware IPs that make up the asic is walked and the
3267 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3268 * handles any IP specific hardware or software state changes that are
3269 * necessary after the IP has been soft reset.
3270 * Returns 0 on success, negative error code on failure.
3271 */
06ec9070 3272static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3273{
3274 int i, r = 0;
3275
3276 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3277 if (!adev->ip_blocks[i].status.valid)
35d782fe 3278 continue;
a1255107
AD
3279 if (adev->ip_blocks[i].status.hang &&
3280 adev->ip_blocks[i].version->funcs->post_soft_reset)
3281 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3282 if (r)
3283 return r;
3284 }
3285
3286 return 0;
3287}
3288
e3ecdffa 3289/**
c33adbc7 3290 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3291 *
3292 * @adev: amdgpu_device pointer
3293 *
3294 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3295 * restore things like GPUVM page tables after a GPU reset where
3296 * the contents of VRAM might be lost.
403009bf
CK
3297 *
3298 * Returns:
3299 * 0 on success, negative error code on failure.
e3ecdffa 3300 */
c33adbc7 3301static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3302{
c41d1cf6 3303 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3304 struct amdgpu_bo *shadow;
3305 long r = 1, tmo;
c41d1cf6
ML
3306
3307 if (amdgpu_sriov_runtime(adev))
b045d3af 3308 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3309 else
3310 tmo = msecs_to_jiffies(100);
3311
3312 DRM_INFO("recover vram bo from shadow start\n");
3313 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3314 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3315
3316 /* No need to recover an evicted BO */
3317 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3318 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3319 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3320 continue;
3321
3322 r = amdgpu_bo_restore_shadow(shadow, &next);
3323 if (r)
3324 break;
3325
c41d1cf6 3326 if (fence) {
1712fb1a 3327 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3328 dma_fence_put(fence);
3329 fence = next;
1712fb1a 3330 if (tmo == 0) {
3331 r = -ETIMEDOUT;
c41d1cf6 3332 break;
1712fb1a 3333 } else if (tmo < 0) {
3334 r = tmo;
3335 break;
3336 }
403009bf
CK
3337 } else {
3338 fence = next;
c41d1cf6 3339 }
c41d1cf6
ML
3340 }
3341 mutex_unlock(&adev->shadow_list_lock);
3342
403009bf
CK
3343 if (fence)
3344 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3345 dma_fence_put(fence);
3346
1712fb1a 3347 if (r < 0 || tmo <= 0) {
3348 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3349 return -EIO;
3350 }
c41d1cf6 3351
403009bf
CK
3352 DRM_INFO("recover vram bo from shadow done\n");
3353 return 0;
c41d1cf6
ML
3354}
3355
a90ad3c2 3356
e3ecdffa 3357/**
06ec9070 3358 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3359 *
3360 * @adev: amdgpu device pointer
87e3f136 3361 * @from_hypervisor: request from hypervisor
5740682e
ML
3362 *
3363 * do VF FLR and reinitialize Asic
3f48c681 3364 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3365 */
3366static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3367 bool from_hypervisor)
5740682e
ML
3368{
3369 int r;
3370
3371 if (from_hypervisor)
3372 r = amdgpu_virt_request_full_gpu(adev, true);
3373 else
3374 r = amdgpu_virt_reset_gpu(adev);
3375 if (r)
3376 return r;
a90ad3c2 3377
f81e8d53
WL
3378 amdgpu_amdkfd_pre_reset(adev);
3379
a90ad3c2 3380 /* Resume IP prior to SMC */
06ec9070 3381 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3382 if (r)
3383 goto error;
a90ad3c2
ML
3384
3385 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3386 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3387
7a3e0bb2
RZ
3388 r = amdgpu_device_fw_loading(adev);
3389 if (r)
3390 return r;
3391
a90ad3c2 3392 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3393 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3394 if (r)
3395 goto error;
a90ad3c2
ML
3396
3397 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3398 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3399 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3400
abc34253 3401error:
d3c117e5 3402 amdgpu_virt_init_data_exchange(adev);
abc34253 3403 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3404 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3405 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3406 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3407 }
3408
3409 return r;
3410}
3411
12938fad
CK
3412/**
3413 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3414 *
3415 * @adev: amdgpu device pointer
3416 *
3417 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3418 * a hung GPU.
3419 */
3420bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3421{
3422 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3423 DRM_INFO("Timeout, but no hardware hang detected.\n");
3424 return false;
3425 }
3426
3ba7b418
AG
3427 if (amdgpu_gpu_recovery == 0)
3428 goto disabled;
3429
3430 if (amdgpu_sriov_vf(adev))
3431 return true;
3432
3433 if (amdgpu_gpu_recovery == -1) {
3434 switch (adev->asic_type) {
fc42d47c
AG
3435 case CHIP_BONAIRE:
3436 case CHIP_HAWAII:
3ba7b418
AG
3437 case CHIP_TOPAZ:
3438 case CHIP_TONGA:
3439 case CHIP_FIJI:
3440 case CHIP_POLARIS10:
3441 case CHIP_POLARIS11:
3442 case CHIP_POLARIS12:
3443 case CHIP_VEGAM:
3444 case CHIP_VEGA20:
3445 case CHIP_VEGA10:
3446 case CHIP_VEGA12:
3447 break;
3448 default:
3449 goto disabled;
3450 }
12938fad
CK
3451 }
3452
3453 return true;
3ba7b418
AG
3454
3455disabled:
3456 DRM_INFO("GPU recovery disabled.\n");
3457 return false;
12938fad
CK
3458}
3459
5c6dd71e 3460
26bc5340
AG
3461static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3462 struct amdgpu_job *job,
3463 bool *need_full_reset_arg)
3464{
3465 int i, r = 0;
3466 bool need_full_reset = *need_full_reset_arg;
71182665 3467
71182665 3468 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3469 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3470 struct amdgpu_ring *ring = adev->rings[i];
3471
51687759 3472 if (!ring || !ring->sched.thread)
0875dc9e 3473 continue;
5740682e 3474
2f9d4084
ML
3475 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3476 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3477 }
d38ceaf9 3478
222b5f04
AG
3479 if(job)
3480 drm_sched_increase_karma(&job->base);
3481
1d721ed6 3482 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3483 if (!amdgpu_sriov_vf(adev)) {
3484
3485 if (!need_full_reset)
3486 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3487
3488 if (!need_full_reset) {
3489 amdgpu_device_ip_pre_soft_reset(adev);
3490 r = amdgpu_device_ip_soft_reset(adev);
3491 amdgpu_device_ip_post_soft_reset(adev);
3492 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3493 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3494 need_full_reset = true;
3495 }
3496 }
3497
3498 if (need_full_reset)
3499 r = amdgpu_device_ip_suspend(adev);
3500
3501 *need_full_reset_arg = need_full_reset;
3502 }
3503
3504 return r;
3505}
3506
3507static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3508 struct list_head *device_list_handle,
3509 bool *need_full_reset_arg)
3510{
3511 struct amdgpu_device *tmp_adev = NULL;
3512 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3513 int r = 0;
3514
3515 /*
3516 * ASIC reset has to be done on all HGMI hive nodes ASAP
3517 * to allow proper links negotiation in FW (within 1 sec)
3518 */
3519 if (need_full_reset) {
3520 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3521 /* For XGMI run all resets in parallel to speed up the process */
3522 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3523 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3524 r = -EALREADY;
3525 } else
3526 r = amdgpu_asic_reset(tmp_adev);
3527
3528 if (r) {
fed184e9 3529 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3530 r, tmp_adev->ddev->unique);
d4535e2c
AG
3531 break;
3532 }
3533 }
3534
3535 /* For XGMI wait for all PSP resets to complete before proceed */
3536 if (!r) {
3537 list_for_each_entry(tmp_adev, device_list_handle,
3538 gmc.xgmi.head) {
3539 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3540 flush_work(&tmp_adev->xgmi_reset_work);
3541 r = tmp_adev->asic_reset_res;
3542 if (r)
3543 break;
3544 }
3545 }
2be4c4a9 3546
3547 list_for_each_entry(tmp_adev, device_list_handle,
3548 gmc.xgmi.head) {
3549 amdgpu_ras_reserve_bad_pages(tmp_adev);
3550 }
26bc5340
AG
3551 }
3552 }
3553
3554
3555 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3556 if (need_full_reset) {
3557 /* post card */
3558 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3559 DRM_WARN("asic atom init failed!");
3560
3561 if (!r) {
3562 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3563 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3564 if (r)
3565 goto out;
3566
3567 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3568 if (vram_lost) {
77e7f829 3569 DRM_INFO("VRAM is lost due to GPU reset!\n");
26bc5340
AG
3570 atomic_inc(&tmp_adev->vram_lost_counter);
3571 }
3572
3573 r = amdgpu_gtt_mgr_recover(
3574 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3575 if (r)
3576 goto out;
3577
3578 r = amdgpu_device_fw_loading(tmp_adev);
3579 if (r)
3580 return r;
3581
3582 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3583 if (r)
3584 goto out;
3585
3586 if (vram_lost)
3587 amdgpu_device_fill_reset_magic(tmp_adev);
3588
fdafb359
EQ
3589 /*
3590 * Add this ASIC as tracked as reset was already
3591 * complete successfully.
3592 */
3593 amdgpu_register_gpu_instance(tmp_adev);
3594
7c04ca50 3595 r = amdgpu_device_ip_late_init(tmp_adev);
3596 if (r)
3597 goto out;
3598
e79a04d5 3599 /* must succeed. */
511fdbc3 3600 amdgpu_ras_resume(tmp_adev);
e79a04d5 3601
26bc5340
AG
3602 /* Update PSP FW topology after reset */
3603 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3604 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3605 }
3606 }
3607
3608
3609out:
3610 if (!r) {
3611 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3612 r = amdgpu_ib_ring_tests(tmp_adev);
3613 if (r) {
3614 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3615 r = amdgpu_device_ip_suspend(tmp_adev);
3616 need_full_reset = true;
3617 r = -EAGAIN;
3618 goto end;
3619 }
3620 }
3621
3622 if (!r)
3623 r = amdgpu_device_recover_vram(tmp_adev);
3624 else
3625 tmp_adev->asic_reset_res = r;
3626 }
3627
3628end:
3629 *need_full_reset_arg = need_full_reset;
3630 return r;
3631}
3632
1d721ed6 3633static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3634{
1d721ed6
AG
3635 if (trylock) {
3636 if (!mutex_trylock(&adev->lock_reset))
3637 return false;
3638 } else
3639 mutex_lock(&adev->lock_reset);
5740682e 3640
26bc5340
AG
3641 atomic_inc(&adev->gpu_reset_counter);
3642 adev->in_gpu_reset = 1;
7b184b00 3643 /* Block kfd: SRIOV would do it separately */
3644 if (!amdgpu_sriov_vf(adev))
3645 amdgpu_amdkfd_pre_reset(adev);
1d721ed6
AG
3646
3647 return true;
26bc5340 3648}
d38ceaf9 3649
26bc5340
AG
3650static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3651{
7b184b00 3652 /*unlock kfd: SRIOV would do it separately */
3653 if (!amdgpu_sriov_vf(adev))
3654 amdgpu_amdkfd_post_reset(adev);
89041940 3655 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3656 adev->in_gpu_reset = 0;
3657 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3658}
3659
3660
3661/**
3662 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3663 *
3664 * @adev: amdgpu device pointer
3665 * @job: which job trigger hang
3666 *
3667 * Attempt to reset the GPU if it has hung (all asics).
3668 * Attempt to do soft-reset or full-reset and reinitialize Asic
3669 * Returns 0 for success or an error on failure.
3670 */
3671
3672int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3673 struct amdgpu_job *job)
3674{
1d721ed6
AG
3675 struct list_head device_list, *device_list_handle = NULL;
3676 bool need_full_reset, job_signaled;
26bc5340 3677 struct amdgpu_hive_info *hive = NULL;
26bc5340 3678 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3679 int i, r = 0;
26bc5340 3680
1d721ed6 3681 need_full_reset = job_signaled = false;
26bc5340
AG
3682 INIT_LIST_HEAD(&device_list);
3683
3684 dev_info(adev->dev, "GPU reset begin!\n");
3685
beff74bc 3686 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3687
1d721ed6
AG
3688 hive = amdgpu_get_xgmi_hive(adev, false);
3689
26bc5340 3690 /*
1d721ed6
AG
3691 * Here we trylock to avoid chain of resets executing from
3692 * either trigger by jobs on different adevs in XGMI hive or jobs on
3693 * different schedulers for same device while this TO handler is running.
3694 * We always reset all schedulers for device and all devices for XGMI
3695 * hive so that should take care of them too.
26bc5340 3696 */
1d721ed6
AG
3697
3698 if (hive && !mutex_trylock(&hive->reset_lock)) {
3699 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3700 job->base.id, hive->hive_id);
26bc5340 3701 return 0;
1d721ed6 3702 }
26bc5340
AG
3703
3704 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3705 if (!amdgpu_device_lock_adev(adev, !hive)) {
3706 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3707 job->base.id);
3708 return 0;
26bc5340
AG
3709 }
3710
3711 /* Build list of devices to reset */
1d721ed6 3712 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340
AG
3713 if (!hive) {
3714 amdgpu_device_unlock_adev(adev);
3715 return -ENODEV;
3716 }
3717
3718 /*
3719 * In case we are in XGMI hive mode device reset is done for all the
3720 * nodes in the hive to retrain all XGMI links and hence the reset
3721 * sequence is executed in loop on all nodes.
3722 */
3723 device_list_handle = &hive->device_list;
3724 } else {
3725 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3726 device_list_handle = &device_list;
3727 }
3728
fdafb359
EQ
3729 /*
3730 * Mark these ASICs to be reseted as untracked first
3731 * And add them back after reset completed
3732 */
3733 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
3734 amdgpu_unregister_gpu_instance(tmp_adev);
3735
1d721ed6
AG
3736 /* block all schedulers and reset given job's ring */
3737 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
f1c1314b 3738 /* disable ras on ALL IPs */
3739 if (amdgpu_device_ip_need_full_reset(tmp_adev))
3740 amdgpu_ras_suspend(tmp_adev);
3741
1d721ed6
AG
3742 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3743 struct amdgpu_ring *ring = tmp_adev->rings[i];
3744
3745 if (!ring || !ring->sched.thread)
3746 continue;
3747
3748 drm_sched_stop(&ring->sched, &job->base);
3749 }
3750 }
3751
3752
3753 /*
3754 * Must check guilty signal here since after this point all old
3755 * HW fences are force signaled.
3756 *
3757 * job->base holds a reference to parent fence
3758 */
3759 if (job && job->base.s_fence->parent &&
3760 dma_fence_is_signaled(job->base.s_fence->parent))
3761 job_signaled = true;
3762
3763 if (!amdgpu_device_ip_need_full_reset(adev))
3764 device_list_handle = &device_list;
3765
3766 if (job_signaled) {
3767 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3768 goto skip_hw_reset;
3769 }
3770
3771
3772 /* Guilty job will be freed after this*/
3773 r = amdgpu_device_pre_asic_reset(adev,
3774 job,
3775 &need_full_reset);
3776 if (r) {
3777 /*TODO Should we stop ?*/
3778 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3779 r, adev->ddev->unique);
3780 adev->asic_reset_res = r;
3781 }
3782
26bc5340
AG
3783retry: /* Rest of adevs pre asic reset from XGMI hive. */
3784 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3785
3786 if (tmp_adev == adev)
3787 continue;
3788
1d721ed6 3789 amdgpu_device_lock_adev(tmp_adev, false);
26bc5340
AG
3790 r = amdgpu_device_pre_asic_reset(tmp_adev,
3791 NULL,
3792 &need_full_reset);
3793 /*TODO Should we stop ?*/
3794 if (r) {
3795 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3796 r, tmp_adev->ddev->unique);
3797 tmp_adev->asic_reset_res = r;
3798 }
3799 }
3800
3801 /* Actual ASIC resets if needed.*/
3802 /* TODO Implement XGMI hive reset logic for SRIOV */
3803 if (amdgpu_sriov_vf(adev)) {
3804 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3805 if (r)
3806 adev->asic_reset_res = r;
3807 } else {
3808 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3809 if (r && r == -EAGAIN)
3810 goto retry;
3811 }
3812
1d721ed6
AG
3813skip_hw_reset:
3814
26bc5340
AG
3815 /* Post ASIC reset for all devs .*/
3816 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
1d721ed6
AG
3817 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3818 struct amdgpu_ring *ring = tmp_adev->rings[i];
3819
3820 if (!ring || !ring->sched.thread)
3821 continue;
3822
3823 /* No point to resubmit jobs if we didn't HW reset*/
3824 if (!tmp_adev->asic_reset_res && !job_signaled)
3825 drm_sched_resubmit_jobs(&ring->sched);
3826
3827 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3828 }
3829
3830 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3831 drm_helper_resume_force_mode(tmp_adev->ddev);
3832 }
3833
3834 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3835
3836 if (r) {
3837 /* bad news, how to tell it to userspace ? */
3838 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3839 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3840 } else {
3841 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3842 }
3843
3844 amdgpu_device_unlock_adev(tmp_adev);
3845 }
3846
1d721ed6 3847 if (hive)
22d6575b 3848 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3849
3850 if (r)
3851 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3852 return r;
3853}
3854
e3ecdffa
AD
3855/**
3856 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3857 *
3858 * @adev: amdgpu_device pointer
3859 *
3860 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3861 * and lanes) of the slot the device is in. Handles APUs and
3862 * virtualized environments where PCIE config space may not be available.
3863 */
5494d864 3864static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3865{
5d9a6330 3866 struct pci_dev *pdev;
c5313457
HK
3867 enum pci_bus_speed speed_cap, platform_speed_cap;
3868 enum pcie_link_width platform_link_width;
d0dd7f0c 3869
cd474ba0
AD
3870 if (amdgpu_pcie_gen_cap)
3871 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3872
cd474ba0
AD
3873 if (amdgpu_pcie_lane_cap)
3874 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3875
cd474ba0
AD
3876 /* covers APUs as well */
3877 if (pci_is_root_bus(adev->pdev->bus)) {
3878 if (adev->pm.pcie_gen_mask == 0)
3879 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3880 if (adev->pm.pcie_mlw_mask == 0)
3881 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3882 return;
cd474ba0 3883 }
d0dd7f0c 3884
c5313457
HK
3885 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3886 return;
3887
dbaa922b
AD
3888 pcie_bandwidth_available(adev->pdev, NULL,
3889 &platform_speed_cap, &platform_link_width);
c5313457 3890
cd474ba0 3891 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3892 /* asic caps */
3893 pdev = adev->pdev;
3894 speed_cap = pcie_get_speed_cap(pdev);
3895 if (speed_cap == PCI_SPEED_UNKNOWN) {
3896 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3897 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3898 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3899 } else {
5d9a6330
AD
3900 if (speed_cap == PCIE_SPEED_16_0GT)
3901 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3902 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3903 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3904 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3905 else if (speed_cap == PCIE_SPEED_8_0GT)
3906 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3907 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3908 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3909 else if (speed_cap == PCIE_SPEED_5_0GT)
3910 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3911 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3912 else
3913 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3914 }
3915 /* platform caps */
c5313457 3916 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
3917 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3918 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3919 } else {
c5313457 3920 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
3921 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3922 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3923 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3924 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 3925 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
3926 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3927 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3928 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 3929 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
3930 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3931 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3932 else
3933 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3934
cd474ba0
AD
3935 }
3936 }
3937 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 3938 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
3939 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3940 } else {
c5313457 3941 switch (platform_link_width) {
5d9a6330 3942 case PCIE_LNK_X32:
cd474ba0
AD
3943 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3944 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3945 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3946 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3947 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3948 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3949 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3950 break;
5d9a6330 3951 case PCIE_LNK_X16:
cd474ba0
AD
3952 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3953 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3954 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3955 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3956 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3957 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3958 break;
5d9a6330 3959 case PCIE_LNK_X12:
cd474ba0
AD
3960 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3961 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3962 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3963 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3964 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3965 break;
5d9a6330 3966 case PCIE_LNK_X8:
cd474ba0
AD
3967 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3968 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3969 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3970 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3971 break;
5d9a6330 3972 case PCIE_LNK_X4:
cd474ba0
AD
3973 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3974 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3975 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3976 break;
5d9a6330 3977 case PCIE_LNK_X2:
cd474ba0
AD
3978 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3979 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3980 break;
5d9a6330 3981 case PCIE_LNK_X1:
cd474ba0
AD
3982 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3983 break;
3984 default:
3985 break;
3986 }
d0dd7f0c
AD
3987 }
3988 }
3989}
d38ceaf9 3990