drm/amdgpu: Handle xgmi device removal.
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9
AD
32#include <drm/drmP.h>
33#include <drm/drm_crtc_helper.h>
4562236b 34#include <drm/drm_atomic_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
5183411b
AG
62#include "amdgpu_xgmi.h"
63
e2a75f88 64MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 65MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 66MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 67MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 68MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 69
2dc80b00
S
70#define AMDGPU_RESUME_MS 2000
71
d38ceaf9 72static const char *amdgpu_asic_name[] = {
da69c161
KW
73 "TAHITI",
74 "PITCAIRN",
75 "VERDE",
76 "OLAND",
77 "HAINAN",
d38ceaf9
AD
78 "BONAIRE",
79 "KAVERI",
80 "KABINI",
81 "HAWAII",
82 "MULLINS",
83 "TOPAZ",
84 "TONGA",
48299f95 85 "FIJI",
d38ceaf9 86 "CARRIZO",
139f4917 87 "STONEY",
2cc0c0b5
FC
88 "POLARIS10",
89 "POLARIS11",
c4642a47 90 "POLARIS12",
48ff108d 91 "VEGAM",
d4196f01 92 "VEGA10",
8fab806a 93 "VEGA12",
956fcddc 94 "VEGA20",
2ca8a5d2 95 "RAVEN",
d38ceaf9
AD
96 "LAST",
97};
98
5494d864
AD
99static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
100
e3ecdffa
AD
101/**
102 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
103 *
104 * @dev: drm_device pointer
105 *
106 * Returns true if the device is a dGPU with HG/PX power control,
107 * otherwise return false.
108 */
d38ceaf9
AD
109bool amdgpu_device_is_px(struct drm_device *dev)
110{
111 struct amdgpu_device *adev = dev->dev_private;
112
2f7d10b3 113 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
114 return true;
115 return false;
116}
117
118/*
119 * MMIO register access helper functions.
120 */
e3ecdffa
AD
121/**
122 * amdgpu_mm_rreg - read a memory mapped IO register
123 *
124 * @adev: amdgpu_device pointer
125 * @reg: dword aligned register offset
126 * @acc_flags: access flags which require special behavior
127 *
128 * Returns the 32 bit value from the offset specified.
129 */
d38ceaf9 130uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 131 uint32_t acc_flags)
d38ceaf9 132{
f4b373f4
TSD
133 uint32_t ret;
134
43ca8efa 135 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 136 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 137
15d72fd7 138 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 139 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
140 else {
141 unsigned long flags;
d38ceaf9
AD
142
143 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
144 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
145 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
146 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 147 }
f4b373f4
TSD
148 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
149 return ret;
d38ceaf9
AD
150}
151
421a2a30
ML
152/*
153 * MMIO register read with bytes helper functions
154 * @offset:bytes offset from MMIO start
155 *
156*/
157
e3ecdffa
AD
158/**
159 * amdgpu_mm_rreg8 - read a memory mapped IO register
160 *
161 * @adev: amdgpu_device pointer
162 * @offset: byte aligned register offset
163 *
164 * Returns the 8 bit value from the offset specified.
165 */
421a2a30
ML
166uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
167 if (offset < adev->rmmio_size)
168 return (readb(adev->rmmio + offset));
169 BUG();
170}
171
172/*
173 * MMIO register write with bytes helper functions
174 * @offset:bytes offset from MMIO start
175 * @value: the value want to be written to the register
176 *
177*/
e3ecdffa
AD
178/**
179 * amdgpu_mm_wreg8 - read a memory mapped IO register
180 *
181 * @adev: amdgpu_device pointer
182 * @offset: byte aligned register offset
183 * @value: 8 bit value to write
184 *
185 * Writes the value specified to the offset specified.
186 */
421a2a30
ML
187void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
188 if (offset < adev->rmmio_size)
189 writeb(value, adev->rmmio + offset);
190 else
191 BUG();
192}
193
e3ecdffa
AD
194/**
195 * amdgpu_mm_wreg - write to a memory mapped IO register
196 *
197 * @adev: amdgpu_device pointer
198 * @reg: dword aligned register offset
199 * @v: 32 bit value to write to the register
200 * @acc_flags: access flags which require special behavior
201 *
202 * Writes the value specified to the offset specified.
203 */
d38ceaf9 204void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 205 uint32_t acc_flags)
d38ceaf9 206{
f4b373f4 207 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 208
47ed4e1c
KW
209 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
210 adev->last_mm_index = v;
211 }
212
43ca8efa 213 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 214 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 215
15d72fd7 216 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
217 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
218 else {
219 unsigned long flags;
220
221 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
222 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
223 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
224 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
225 }
47ed4e1c
KW
226
227 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
228 udelay(500);
229 }
d38ceaf9
AD
230}
231
e3ecdffa
AD
232/**
233 * amdgpu_io_rreg - read an IO register
234 *
235 * @adev: amdgpu_device pointer
236 * @reg: dword aligned register offset
237 *
238 * Returns the 32 bit value from the offset specified.
239 */
d38ceaf9
AD
240u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
241{
242 if ((reg * 4) < adev->rio_mem_size)
243 return ioread32(adev->rio_mem + (reg * 4));
244 else {
245 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
246 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
247 }
248}
249
e3ecdffa
AD
250/**
251 * amdgpu_io_wreg - write to an IO register
252 *
253 * @adev: amdgpu_device pointer
254 * @reg: dword aligned register offset
255 * @v: 32 bit value to write to the register
256 *
257 * Writes the value specified to the offset specified.
258 */
d38ceaf9
AD
259void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
260{
47ed4e1c
KW
261 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
262 adev->last_mm_index = v;
263 }
d38ceaf9
AD
264
265 if ((reg * 4) < adev->rio_mem_size)
266 iowrite32(v, adev->rio_mem + (reg * 4));
267 else {
268 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
269 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
270 }
47ed4e1c
KW
271
272 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
273 udelay(500);
274 }
d38ceaf9
AD
275}
276
277/**
278 * amdgpu_mm_rdoorbell - read a doorbell dword
279 *
280 * @adev: amdgpu_device pointer
281 * @index: doorbell index
282 *
283 * Returns the value in the doorbell aperture at the
284 * requested doorbell index (CIK).
285 */
286u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
287{
288 if (index < adev->doorbell.num_doorbells) {
289 return readl(adev->doorbell.ptr + index);
290 } else {
291 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
292 return 0;
293 }
294}
295
296/**
297 * amdgpu_mm_wdoorbell - write a doorbell dword
298 *
299 * @adev: amdgpu_device pointer
300 * @index: doorbell index
301 * @v: value to write
302 *
303 * Writes @v to the doorbell aperture at the
304 * requested doorbell index (CIK).
305 */
306void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
307{
308 if (index < adev->doorbell.num_doorbells) {
309 writel(v, adev->doorbell.ptr + index);
310 } else {
311 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
312 }
313}
314
832be404
KW
315/**
316 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
317 *
318 * @adev: amdgpu_device pointer
319 * @index: doorbell index
320 *
321 * Returns the value in the doorbell aperture at the
322 * requested doorbell index (VEGA10+).
323 */
324u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
325{
326 if (index < adev->doorbell.num_doorbells) {
327 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
328 } else {
329 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
330 return 0;
331 }
332}
333
334/**
335 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
336 *
337 * @adev: amdgpu_device pointer
338 * @index: doorbell index
339 * @v: value to write
340 *
341 * Writes @v to the doorbell aperture at the
342 * requested doorbell index (VEGA10+).
343 */
344void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
345{
346 if (index < adev->doorbell.num_doorbells) {
347 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
348 } else {
349 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
350 }
351}
352
d38ceaf9
AD
353/**
354 * amdgpu_invalid_rreg - dummy reg read function
355 *
356 * @adev: amdgpu device pointer
357 * @reg: offset of register
358 *
359 * Dummy register read function. Used for register blocks
360 * that certain asics don't have (all asics).
361 * Returns the value in the register.
362 */
363static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
364{
365 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
366 BUG();
367 return 0;
368}
369
370/**
371 * amdgpu_invalid_wreg - dummy reg write function
372 *
373 * @adev: amdgpu device pointer
374 * @reg: offset of register
375 * @v: value to write to the register
376 *
377 * Dummy register read function. Used for register blocks
378 * that certain asics don't have (all asics).
379 */
380static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
381{
382 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
383 reg, v);
384 BUG();
385}
386
387/**
388 * amdgpu_block_invalid_rreg - dummy reg read function
389 *
390 * @adev: amdgpu device pointer
391 * @block: offset of instance
392 * @reg: offset of register
393 *
394 * Dummy register read function. Used for register blocks
395 * that certain asics don't have (all asics).
396 * Returns the value in the register.
397 */
398static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
399 uint32_t block, uint32_t reg)
400{
401 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
402 reg, block);
403 BUG();
404 return 0;
405}
406
407/**
408 * amdgpu_block_invalid_wreg - dummy reg write function
409 *
410 * @adev: amdgpu device pointer
411 * @block: offset of instance
412 * @reg: offset of register
413 * @v: value to write to the register
414 *
415 * Dummy register read function. Used for register blocks
416 * that certain asics don't have (all asics).
417 */
418static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
419 uint32_t block,
420 uint32_t reg, uint32_t v)
421{
422 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
423 reg, block, v);
424 BUG();
425}
426
e3ecdffa
AD
427/**
428 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
429 *
430 * @adev: amdgpu device pointer
431 *
432 * Allocates a scratch page of VRAM for use by various things in the
433 * driver.
434 */
06ec9070 435static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 436{
a4a02777
CK
437 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
438 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
439 &adev->vram_scratch.robj,
440 &adev->vram_scratch.gpu_addr,
441 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
442}
443
e3ecdffa
AD
444/**
445 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
446 *
447 * @adev: amdgpu device pointer
448 *
449 * Frees the VRAM scratch page.
450 */
06ec9070 451static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 452{
078af1a3 453 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
454}
455
456/**
9c3f2b54 457 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
458 *
459 * @adev: amdgpu_device pointer
460 * @registers: pointer to the register array
461 * @array_size: size of the register array
462 *
463 * Programs an array or registers with and and or masks.
464 * This is a helper for setting golden registers.
465 */
9c3f2b54
AD
466void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
467 const u32 *registers,
468 const u32 array_size)
d38ceaf9
AD
469{
470 u32 tmp, reg, and_mask, or_mask;
471 int i;
472
473 if (array_size % 3)
474 return;
475
476 for (i = 0; i < array_size; i +=3) {
477 reg = registers[i + 0];
478 and_mask = registers[i + 1];
479 or_mask = registers[i + 2];
480
481 if (and_mask == 0xffffffff) {
482 tmp = or_mask;
483 } else {
484 tmp = RREG32(reg);
485 tmp &= ~and_mask;
486 tmp |= or_mask;
487 }
488 WREG32(reg, tmp);
489 }
490}
491
e3ecdffa
AD
492/**
493 * amdgpu_device_pci_config_reset - reset the GPU
494 *
495 * @adev: amdgpu_device pointer
496 *
497 * Resets the GPU using the pci config reset sequence.
498 * Only applicable to asics prior to vega10.
499 */
8111c387 500void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
501{
502 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
503}
504
505/*
506 * GPU doorbell aperture helpers function.
507 */
508/**
06ec9070 509 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
510 *
511 * @adev: amdgpu_device pointer
512 *
513 * Init doorbell driver information (CIK)
514 * Returns 0 on success, error on failure.
515 */
06ec9070 516static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 517{
6585661d
OZ
518 amdgpu_asic_init_doorbell_index(adev);
519
705e519e
CK
520 /* No doorbell on SI hardware generation */
521 if (adev->asic_type < CHIP_BONAIRE) {
522 adev->doorbell.base = 0;
523 adev->doorbell.size = 0;
524 adev->doorbell.num_doorbells = 0;
525 adev->doorbell.ptr = NULL;
526 return 0;
527 }
528
d6895ad3
CK
529 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
530 return -EINVAL;
531
d38ceaf9
AD
532 /* doorbell bar mapping */
533 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
534 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
535
edf600da 536 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 537 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
538 if (adev->doorbell.num_doorbells == 0)
539 return -EINVAL;
540
ec3db8a6 541 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
542 * paging queue doorbell use the second page. The
543 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
544 * doorbells are in the first page. So with paging queue enabled,
545 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
546 */
547 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 548 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 549
8972e5d2
CK
550 adev->doorbell.ptr = ioremap(adev->doorbell.base,
551 adev->doorbell.num_doorbells *
552 sizeof(u32));
553 if (adev->doorbell.ptr == NULL)
d38ceaf9 554 return -ENOMEM;
d38ceaf9
AD
555
556 return 0;
557}
558
559/**
06ec9070 560 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
561 *
562 * @adev: amdgpu_device pointer
563 *
564 * Tear down doorbell driver information (CIK)
565 */
06ec9070 566static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
567{
568 iounmap(adev->doorbell.ptr);
569 adev->doorbell.ptr = NULL;
570}
571
22cb0164 572
d38ceaf9
AD
573
574/*
06ec9070 575 * amdgpu_device_wb_*()
455a7bc2 576 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 577 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
578 */
579
580/**
06ec9070 581 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
582 *
583 * @adev: amdgpu_device pointer
584 *
585 * Disables Writeback and frees the Writeback memory (all asics).
586 * Used at driver shutdown.
587 */
06ec9070 588static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
589{
590 if (adev->wb.wb_obj) {
a76ed485
AD
591 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
592 &adev->wb.gpu_addr,
593 (void **)&adev->wb.wb);
d38ceaf9
AD
594 adev->wb.wb_obj = NULL;
595 }
596}
597
598/**
06ec9070 599 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
600 *
601 * @adev: amdgpu_device pointer
602 *
455a7bc2 603 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
604 * Used at driver startup.
605 * Returns 0 on success or an -error on failure.
606 */
06ec9070 607static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
608{
609 int r;
610
611 if (adev->wb.wb_obj == NULL) {
97407b63
AD
612 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
613 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
614 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
615 &adev->wb.wb_obj, &adev->wb.gpu_addr,
616 (void **)&adev->wb.wb);
d38ceaf9
AD
617 if (r) {
618 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
619 return r;
620 }
d38ceaf9
AD
621
622 adev->wb.num_wb = AMDGPU_MAX_WB;
623 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
624
625 /* clear wb memory */
73469585 626 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
627 }
628
629 return 0;
630}
631
632/**
131b4b36 633 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
634 *
635 * @adev: amdgpu_device pointer
636 * @wb: wb index
637 *
638 * Allocate a wb slot for use by the driver (all asics).
639 * Returns 0 on success or -EINVAL on failure.
640 */
131b4b36 641int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
642{
643 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 644
97407b63 645 if (offset < adev->wb.num_wb) {
7014285a 646 __set_bit(offset, adev->wb.used);
63ae07ca 647 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
648 return 0;
649 } else {
650 return -EINVAL;
651 }
652}
653
d38ceaf9 654/**
131b4b36 655 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
656 *
657 * @adev: amdgpu_device pointer
658 * @wb: wb index
659 *
660 * Free a wb slot allocated for use by the driver (all asics)
661 */
131b4b36 662void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 663{
73469585 664 wb >>= 3;
d38ceaf9 665 if (wb < adev->wb.num_wb)
73469585 666 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
667}
668
d6895ad3
CK
669/**
670 * amdgpu_device_resize_fb_bar - try to resize FB BAR
671 *
672 * @adev: amdgpu_device pointer
673 *
674 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
675 * to fail, but if any of the BARs is not accessible after the size we abort
676 * driver loading by returning -ENODEV.
677 */
678int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
679{
770d13b1 680 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 681 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
682 struct pci_bus *root;
683 struct resource *res;
684 unsigned i;
d6895ad3
CK
685 u16 cmd;
686 int r;
687
0c03b912 688 /* Bypass for VF */
689 if (amdgpu_sriov_vf(adev))
690 return 0;
691
31b8adab
CK
692 /* Check if the root BUS has 64bit memory resources */
693 root = adev->pdev->bus;
694 while (root->parent)
695 root = root->parent;
696
697 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 698 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
699 res->start > 0x100000000ull)
700 break;
701 }
702
703 /* Trying to resize is pointless without a root hub window above 4GB */
704 if (!res)
705 return 0;
706
d6895ad3
CK
707 /* Disable memory decoding while we change the BAR addresses and size */
708 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
709 pci_write_config_word(adev->pdev, PCI_COMMAND,
710 cmd & ~PCI_COMMAND_MEMORY);
711
712 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 713 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
714 if (adev->asic_type >= CHIP_BONAIRE)
715 pci_release_resource(adev->pdev, 2);
716
717 pci_release_resource(adev->pdev, 0);
718
719 r = pci_resize_resource(adev->pdev, 0, rbar_size);
720 if (r == -ENOSPC)
721 DRM_INFO("Not enough PCI address space for a large BAR.");
722 else if (r && r != -ENOTSUPP)
723 DRM_ERROR("Problem resizing BAR0 (%d).", r);
724
725 pci_assign_unassigned_bus_resources(adev->pdev->bus);
726
727 /* When the doorbell or fb BAR isn't available we have no chance of
728 * using the device.
729 */
06ec9070 730 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
731 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
732 return -ENODEV;
733
734 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
735
736 return 0;
737}
a05502e5 738
d38ceaf9
AD
739/*
740 * GPU helpers function.
741 */
742/**
39c640c0 743 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
744 *
745 * @adev: amdgpu_device pointer
746 *
c836fec5
JQ
747 * Check if the asic has been initialized (all asics) at driver startup
748 * or post is needed if hw reset is performed.
749 * Returns true if need or false if not.
d38ceaf9 750 */
39c640c0 751bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
752{
753 uint32_t reg;
754
bec86378
ML
755 if (amdgpu_sriov_vf(adev))
756 return false;
757
758 if (amdgpu_passthrough(adev)) {
1da2c326
ML
759 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
760 * some old smc fw still need driver do vPost otherwise gpu hang, while
761 * those smc fw version above 22.15 doesn't have this flaw, so we force
762 * vpost executed for smc version below 22.15
bec86378
ML
763 */
764 if (adev->asic_type == CHIP_FIJI) {
765 int err;
766 uint32_t fw_ver;
767 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
768 /* force vPost if error occured */
769 if (err)
770 return true;
771
772 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
773 if (fw_ver < 0x00160e00)
774 return true;
bec86378 775 }
bec86378 776 }
91fe77eb 777
778 if (adev->has_hw_reset) {
779 adev->has_hw_reset = false;
780 return true;
781 }
782
783 /* bios scratch used on CIK+ */
784 if (adev->asic_type >= CHIP_BONAIRE)
785 return amdgpu_atombios_scratch_need_asic_init(adev);
786
787 /* check MEM_SIZE for older asics */
788 reg = amdgpu_asic_get_config_memsize(adev);
789
790 if ((reg != 0) && (reg != 0xffffffff))
791 return false;
792
793 return true;
bec86378
ML
794}
795
d38ceaf9
AD
796/* if we get transitioned to only one device, take VGA back */
797/**
06ec9070 798 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
799 *
800 * @cookie: amdgpu_device pointer
801 * @state: enable/disable vga decode
802 *
803 * Enable/disable vga decode (all asics).
804 * Returns VGA resource flags.
805 */
06ec9070 806static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
807{
808 struct amdgpu_device *adev = cookie;
809 amdgpu_asic_set_vga_state(adev, state);
810 if (state)
811 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
812 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
813 else
814 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
815}
816
e3ecdffa
AD
817/**
818 * amdgpu_device_check_block_size - validate the vm block size
819 *
820 * @adev: amdgpu_device pointer
821 *
822 * Validates the vm block size specified via module parameter.
823 * The vm block size defines number of bits in page table versus page directory,
824 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
825 * page table and the remaining bits are in the page directory.
826 */
06ec9070 827static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
828{
829 /* defines number of bits in page table versus page directory,
830 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
831 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
832 if (amdgpu_vm_block_size == -1)
833 return;
a1adf8be 834
bab4fee7 835 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
836 dev_warn(adev->dev, "VM page table size (%d) too small\n",
837 amdgpu_vm_block_size);
97489129 838 amdgpu_vm_block_size = -1;
a1adf8be 839 }
a1adf8be
CZ
840}
841
e3ecdffa
AD
842/**
843 * amdgpu_device_check_vm_size - validate the vm size
844 *
845 * @adev: amdgpu_device pointer
846 *
847 * Validates the vm size in GB specified via module parameter.
848 * The VM size is the size of the GPU virtual memory space in GB.
849 */
06ec9070 850static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 851{
64dab074
AD
852 /* no need to check the default value */
853 if (amdgpu_vm_size == -1)
854 return;
855
83ca145d
ZJ
856 if (amdgpu_vm_size < 1) {
857 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
858 amdgpu_vm_size);
f3368128 859 amdgpu_vm_size = -1;
83ca145d 860 }
83ca145d
ZJ
861}
862
7951e376
RZ
863static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
864{
865 struct sysinfo si;
866 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
867 uint64_t total_memory;
868 uint64_t dram_size_seven_GB = 0x1B8000000;
869 uint64_t dram_size_three_GB = 0xB8000000;
870
871 if (amdgpu_smu_memory_pool_size == 0)
872 return;
873
874 if (!is_os_64) {
875 DRM_WARN("Not 64-bit OS, feature not supported\n");
876 goto def_value;
877 }
878 si_meminfo(&si);
879 total_memory = (uint64_t)si.totalram * si.mem_unit;
880
881 if ((amdgpu_smu_memory_pool_size == 1) ||
882 (amdgpu_smu_memory_pool_size == 2)) {
883 if (total_memory < dram_size_three_GB)
884 goto def_value1;
885 } else if ((amdgpu_smu_memory_pool_size == 4) ||
886 (amdgpu_smu_memory_pool_size == 8)) {
887 if (total_memory < dram_size_seven_GB)
888 goto def_value1;
889 } else {
890 DRM_WARN("Smu memory pool size not supported\n");
891 goto def_value;
892 }
893 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
894
895 return;
896
897def_value1:
898 DRM_WARN("No enough system memory\n");
899def_value:
900 adev->pm.smu_prv_buffer_size = 0;
901}
902
d38ceaf9 903/**
06ec9070 904 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
905 *
906 * @adev: amdgpu_device pointer
907 *
908 * Validates certain module parameters and updates
909 * the associated values used by the driver (all asics).
910 */
06ec9070 911static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 912{
5b011235
CZ
913 if (amdgpu_sched_jobs < 4) {
914 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
915 amdgpu_sched_jobs);
916 amdgpu_sched_jobs = 4;
76117507 917 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
918 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
919 amdgpu_sched_jobs);
920 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
921 }
d38ceaf9 922
83e74db6 923 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
924 /* gart size must be greater or equal to 32M */
925 dev_warn(adev->dev, "gart size (%d) too small\n",
926 amdgpu_gart_size);
83e74db6 927 amdgpu_gart_size = -1;
d38ceaf9
AD
928 }
929
36d38372 930 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 931 /* gtt size must be greater or equal to 32M */
36d38372
CK
932 dev_warn(adev->dev, "gtt size (%d) too small\n",
933 amdgpu_gtt_size);
934 amdgpu_gtt_size = -1;
d38ceaf9
AD
935 }
936
d07f14be
RH
937 /* valid range is between 4 and 9 inclusive */
938 if (amdgpu_vm_fragment_size != -1 &&
939 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
940 dev_warn(adev->dev, "valid range is between 4 and 9\n");
941 amdgpu_vm_fragment_size = -1;
942 }
943
7951e376
RZ
944 amdgpu_device_check_smu_prv_buffer_size(adev);
945
06ec9070 946 amdgpu_device_check_vm_size(adev);
d38ceaf9 947
06ec9070 948 amdgpu_device_check_block_size(adev);
6a7f76e7 949
526bae37 950 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 951 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
952 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
953 amdgpu_vram_page_split);
954 amdgpu_vram_page_split = 1024;
955 }
8854695a
AG
956
957 if (amdgpu_lockup_timeout == 0) {
958 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
959 amdgpu_lockup_timeout = 10000;
960 }
19aede77
AD
961
962 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
d38ceaf9
AD
963}
964
965/**
966 * amdgpu_switcheroo_set_state - set switcheroo state
967 *
968 * @pdev: pci dev pointer
1694467b 969 * @state: vga_switcheroo state
d38ceaf9
AD
970 *
971 * Callback for the switcheroo driver. Suspends or resumes the
972 * the asics before or after it is powered up using ACPI methods.
973 */
974static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
975{
976 struct drm_device *dev = pci_get_drvdata(pdev);
977
978 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
979 return;
980
981 if (state == VGA_SWITCHEROO_ON) {
7ca85295 982 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
983 /* don't suspend or resume card normally */
984 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
985
810ddc3a 986 amdgpu_device_resume(dev, true, true);
d38ceaf9 987
d38ceaf9
AD
988 dev->switch_power_state = DRM_SWITCH_POWER_ON;
989 drm_kms_helper_poll_enable(dev);
990 } else {
7ca85295 991 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
992 drm_kms_helper_poll_disable(dev);
993 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 994 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
995 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
996 }
997}
998
999/**
1000 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1001 *
1002 * @pdev: pci dev pointer
1003 *
1004 * Callback for the switcheroo driver. Check of the switcheroo
1005 * state can be changed.
1006 * Returns true if the state can be changed, false if not.
1007 */
1008static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1009{
1010 struct drm_device *dev = pci_get_drvdata(pdev);
1011
1012 /*
1013 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1014 * locking inversion with the driver load path. And the access here is
1015 * completely racy anyway. So don't bother with locking for now.
1016 */
1017 return dev->open_count == 0;
1018}
1019
1020static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1021 .set_gpu_state = amdgpu_switcheroo_set_state,
1022 .reprobe = NULL,
1023 .can_switch = amdgpu_switcheroo_can_switch,
1024};
1025
e3ecdffa
AD
1026/**
1027 * amdgpu_device_ip_set_clockgating_state - set the CG state
1028 *
87e3f136 1029 * @dev: amdgpu_device pointer
e3ecdffa
AD
1030 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1031 * @state: clockgating state (gate or ungate)
1032 *
1033 * Sets the requested clockgating state for all instances of
1034 * the hardware IP specified.
1035 * Returns the error code from the last instance.
1036 */
43fa561f 1037int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1038 enum amd_ip_block_type block_type,
1039 enum amd_clockgating_state state)
d38ceaf9 1040{
43fa561f 1041 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1042 int i, r = 0;
1043
1044 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1045 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1046 continue;
c722865a
RZ
1047 if (adev->ip_blocks[i].version->type != block_type)
1048 continue;
1049 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1050 continue;
1051 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1052 (void *)adev, state);
1053 if (r)
1054 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1055 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1056 }
1057 return r;
1058}
1059
e3ecdffa
AD
1060/**
1061 * amdgpu_device_ip_set_powergating_state - set the PG state
1062 *
87e3f136 1063 * @dev: amdgpu_device pointer
e3ecdffa
AD
1064 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1065 * @state: powergating state (gate or ungate)
1066 *
1067 * Sets the requested powergating state for all instances of
1068 * the hardware IP specified.
1069 * Returns the error code from the last instance.
1070 */
43fa561f 1071int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1072 enum amd_ip_block_type block_type,
1073 enum amd_powergating_state state)
d38ceaf9 1074{
43fa561f 1075 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1076 int i, r = 0;
1077
1078 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1079 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1080 continue;
c722865a
RZ
1081 if (adev->ip_blocks[i].version->type != block_type)
1082 continue;
1083 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1084 continue;
1085 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1086 (void *)adev, state);
1087 if (r)
1088 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1089 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1090 }
1091 return r;
1092}
1093
e3ecdffa
AD
1094/**
1095 * amdgpu_device_ip_get_clockgating_state - get the CG state
1096 *
1097 * @adev: amdgpu_device pointer
1098 * @flags: clockgating feature flags
1099 *
1100 * Walks the list of IPs on the device and updates the clockgating
1101 * flags for each IP.
1102 * Updates @flags with the feature flags for each hardware IP where
1103 * clockgating is enabled.
1104 */
2990a1fc
AD
1105void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1106 u32 *flags)
6cb2d4e4
HR
1107{
1108 int i;
1109
1110 for (i = 0; i < adev->num_ip_blocks; i++) {
1111 if (!adev->ip_blocks[i].status.valid)
1112 continue;
1113 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1114 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1115 }
1116}
1117
e3ecdffa
AD
1118/**
1119 * amdgpu_device_ip_wait_for_idle - wait for idle
1120 *
1121 * @adev: amdgpu_device pointer
1122 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1123 *
1124 * Waits for the request hardware IP to be idle.
1125 * Returns 0 for success or a negative error code on failure.
1126 */
2990a1fc
AD
1127int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1128 enum amd_ip_block_type block_type)
5dbbb60b
AD
1129{
1130 int i, r;
1131
1132 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1133 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1134 continue;
a1255107
AD
1135 if (adev->ip_blocks[i].version->type == block_type) {
1136 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1137 if (r)
1138 return r;
1139 break;
1140 }
1141 }
1142 return 0;
1143
1144}
1145
e3ecdffa
AD
1146/**
1147 * amdgpu_device_ip_is_idle - is the hardware IP idle
1148 *
1149 * @adev: amdgpu_device pointer
1150 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1151 *
1152 * Check if the hardware IP is idle or not.
1153 * Returns true if it the IP is idle, false if not.
1154 */
2990a1fc
AD
1155bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1156 enum amd_ip_block_type block_type)
5dbbb60b
AD
1157{
1158 int i;
1159
1160 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1161 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1162 continue;
a1255107
AD
1163 if (adev->ip_blocks[i].version->type == block_type)
1164 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1165 }
1166 return true;
1167
1168}
1169
e3ecdffa
AD
1170/**
1171 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1172 *
1173 * @adev: amdgpu_device pointer
87e3f136 1174 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1175 *
1176 * Returns a pointer to the hardware IP block structure
1177 * if it exists for the asic, otherwise NULL.
1178 */
2990a1fc
AD
1179struct amdgpu_ip_block *
1180amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1181 enum amd_ip_block_type type)
d38ceaf9
AD
1182{
1183 int i;
1184
1185 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1186 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1187 return &adev->ip_blocks[i];
1188
1189 return NULL;
1190}
1191
1192/**
2990a1fc 1193 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1194 *
1195 * @adev: amdgpu_device pointer
5fc3aeeb 1196 * @type: enum amd_ip_block_type
d38ceaf9
AD
1197 * @major: major version
1198 * @minor: minor version
1199 *
1200 * return 0 if equal or greater
1201 * return 1 if smaller or the ip_block doesn't exist
1202 */
2990a1fc
AD
1203int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1204 enum amd_ip_block_type type,
1205 u32 major, u32 minor)
d38ceaf9 1206{
2990a1fc 1207 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1208
a1255107
AD
1209 if (ip_block && ((ip_block->version->major > major) ||
1210 ((ip_block->version->major == major) &&
1211 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1212 return 0;
1213
1214 return 1;
1215}
1216
a1255107 1217/**
2990a1fc 1218 * amdgpu_device_ip_block_add
a1255107
AD
1219 *
1220 * @adev: amdgpu_device pointer
1221 * @ip_block_version: pointer to the IP to add
1222 *
1223 * Adds the IP block driver information to the collection of IPs
1224 * on the asic.
1225 */
2990a1fc
AD
1226int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1227 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1228{
1229 if (!ip_block_version)
1230 return -EINVAL;
1231
e966a725 1232 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1233 ip_block_version->funcs->name);
1234
a1255107
AD
1235 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1236
1237 return 0;
1238}
1239
e3ecdffa
AD
1240/**
1241 * amdgpu_device_enable_virtual_display - enable virtual display feature
1242 *
1243 * @adev: amdgpu_device pointer
1244 *
1245 * Enabled the virtual display feature if the user has enabled it via
1246 * the module parameter virtual_display. This feature provides a virtual
1247 * display hardware on headless boards or in virtualized environments.
1248 * This function parses and validates the configuration string specified by
1249 * the user and configues the virtual display configuration (number of
1250 * virtual connectors, crtcs, etc.) specified.
1251 */
483ef985 1252static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1253{
1254 adev->enable_virtual_display = false;
1255
1256 if (amdgpu_virtual_display) {
1257 struct drm_device *ddev = adev->ddev;
1258 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1259 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1260
1261 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1262 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1263 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1264 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1265 if (!strcmp("all", pciaddname)
1266 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1267 long num_crtc;
1268 int res = -1;
1269
9accf2fd 1270 adev->enable_virtual_display = true;
0f66356d
ED
1271
1272 if (pciaddname_tmp)
1273 res = kstrtol(pciaddname_tmp, 10,
1274 &num_crtc);
1275
1276 if (!res) {
1277 if (num_crtc < 1)
1278 num_crtc = 1;
1279 if (num_crtc > 6)
1280 num_crtc = 6;
1281 adev->mode_info.num_crtc = num_crtc;
1282 } else {
1283 adev->mode_info.num_crtc = 1;
1284 }
9accf2fd
ED
1285 break;
1286 }
1287 }
1288
0f66356d
ED
1289 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1290 amdgpu_virtual_display, pci_address_name,
1291 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1292
1293 kfree(pciaddstr);
1294 }
1295}
1296
e3ecdffa
AD
1297/**
1298 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1299 *
1300 * @adev: amdgpu_device pointer
1301 *
1302 * Parses the asic configuration parameters specified in the gpu info
1303 * firmware and makes them availale to the driver for use in configuring
1304 * the asic.
1305 * Returns 0 on success, -EINVAL on failure.
1306 */
e2a75f88
AD
1307static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1308{
e2a75f88
AD
1309 const char *chip_name;
1310 char fw_name[30];
1311 int err;
1312 const struct gpu_info_firmware_header_v1_0 *hdr;
1313
ab4fe3e1
HR
1314 adev->firmware.gpu_info_fw = NULL;
1315
e2a75f88
AD
1316 switch (adev->asic_type) {
1317 case CHIP_TOPAZ:
1318 case CHIP_TONGA:
1319 case CHIP_FIJI:
e2a75f88 1320 case CHIP_POLARIS10:
cc07f18d 1321 case CHIP_POLARIS11:
e2a75f88 1322 case CHIP_POLARIS12:
cc07f18d 1323 case CHIP_VEGAM:
e2a75f88
AD
1324 case CHIP_CARRIZO:
1325 case CHIP_STONEY:
1326#ifdef CONFIG_DRM_AMDGPU_SI
1327 case CHIP_VERDE:
1328 case CHIP_TAHITI:
1329 case CHIP_PITCAIRN:
1330 case CHIP_OLAND:
1331 case CHIP_HAINAN:
1332#endif
1333#ifdef CONFIG_DRM_AMDGPU_CIK
1334 case CHIP_BONAIRE:
1335 case CHIP_HAWAII:
1336 case CHIP_KAVERI:
1337 case CHIP_KABINI:
1338 case CHIP_MULLINS:
1339#endif
27c0bc71 1340 case CHIP_VEGA20:
e2a75f88
AD
1341 default:
1342 return 0;
1343 case CHIP_VEGA10:
1344 chip_name = "vega10";
1345 break;
3f76dced
AD
1346 case CHIP_VEGA12:
1347 chip_name = "vega12";
1348 break;
2d2e5e7e 1349 case CHIP_RAVEN:
54c4d17e
FX
1350 if (adev->rev_id >= 8)
1351 chip_name = "raven2";
741deade
AD
1352 else if (adev->pdev->device == 0x15d8)
1353 chip_name = "picasso";
54c4d17e
FX
1354 else
1355 chip_name = "raven";
2d2e5e7e 1356 break;
e2a75f88
AD
1357 }
1358
1359 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1360 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1361 if (err) {
1362 dev_err(adev->dev,
1363 "Failed to load gpu_info firmware \"%s\"\n",
1364 fw_name);
1365 goto out;
1366 }
ab4fe3e1 1367 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1368 if (err) {
1369 dev_err(adev->dev,
1370 "Failed to validate gpu_info firmware \"%s\"\n",
1371 fw_name);
1372 goto out;
1373 }
1374
ab4fe3e1 1375 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1376 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1377
1378 switch (hdr->version_major) {
1379 case 1:
1380 {
1381 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1382 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1383 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1384
b5ab16bf
AD
1385 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1386 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1387 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1388 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1389 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1390 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1391 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1392 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1393 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1394 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1395 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1396 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1397 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1398 adev->gfx.cu_info.max_waves_per_simd =
1399 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1400 adev->gfx.cu_info.max_scratch_slots_per_cu =
1401 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1402 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1403 break;
1404 }
1405 default:
1406 dev_err(adev->dev,
1407 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1408 err = -EINVAL;
1409 goto out;
1410 }
1411out:
e2a75f88
AD
1412 return err;
1413}
1414
e3ecdffa
AD
1415/**
1416 * amdgpu_device_ip_early_init - run early init for hardware IPs
1417 *
1418 * @adev: amdgpu_device pointer
1419 *
1420 * Early initialization pass for hardware IPs. The hardware IPs that make
1421 * up each asic are discovered each IP's early_init callback is run. This
1422 * is the first stage in initializing the asic.
1423 * Returns 0 on success, negative error code on failure.
1424 */
06ec9070 1425static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1426{
aaa36a97 1427 int i, r;
d38ceaf9 1428
483ef985 1429 amdgpu_device_enable_virtual_display(adev);
a6be7570 1430
d38ceaf9 1431 switch (adev->asic_type) {
aaa36a97
AD
1432 case CHIP_TOPAZ:
1433 case CHIP_TONGA:
48299f95 1434 case CHIP_FIJI:
2cc0c0b5 1435 case CHIP_POLARIS10:
32cc7e53 1436 case CHIP_POLARIS11:
c4642a47 1437 case CHIP_POLARIS12:
32cc7e53 1438 case CHIP_VEGAM:
aaa36a97 1439 case CHIP_CARRIZO:
39bb0c92
SL
1440 case CHIP_STONEY:
1441 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1442 adev->family = AMDGPU_FAMILY_CZ;
1443 else
1444 adev->family = AMDGPU_FAMILY_VI;
1445
1446 r = vi_set_ip_blocks(adev);
1447 if (r)
1448 return r;
1449 break;
33f34802
KW
1450#ifdef CONFIG_DRM_AMDGPU_SI
1451 case CHIP_VERDE:
1452 case CHIP_TAHITI:
1453 case CHIP_PITCAIRN:
1454 case CHIP_OLAND:
1455 case CHIP_HAINAN:
295d0daf 1456 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1457 r = si_set_ip_blocks(adev);
1458 if (r)
1459 return r;
1460 break;
1461#endif
a2e73f56
AD
1462#ifdef CONFIG_DRM_AMDGPU_CIK
1463 case CHIP_BONAIRE:
1464 case CHIP_HAWAII:
1465 case CHIP_KAVERI:
1466 case CHIP_KABINI:
1467 case CHIP_MULLINS:
1468 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1469 adev->family = AMDGPU_FAMILY_CI;
1470 else
1471 adev->family = AMDGPU_FAMILY_KV;
1472
1473 r = cik_set_ip_blocks(adev);
1474 if (r)
1475 return r;
1476 break;
1477#endif
e48a3cd9
AD
1478 case CHIP_VEGA10:
1479 case CHIP_VEGA12:
e4bd8170 1480 case CHIP_VEGA20:
e48a3cd9 1481 case CHIP_RAVEN:
741deade 1482 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1483 adev->family = AMDGPU_FAMILY_RV;
1484 else
1485 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1486
1487 r = soc15_set_ip_blocks(adev);
1488 if (r)
1489 return r;
1490 break;
d38ceaf9
AD
1491 default:
1492 /* FIXME: not supported yet */
1493 return -EINVAL;
1494 }
1495
e2a75f88
AD
1496 r = amdgpu_device_parse_gpu_info_fw(adev);
1497 if (r)
1498 return r;
1499
1884734a 1500 amdgpu_amdkfd_device_probe(adev);
1501
3149d9da
XY
1502 if (amdgpu_sriov_vf(adev)) {
1503 r = amdgpu_virt_request_full_gpu(adev, true);
1504 if (r)
5ffa61c1 1505 return -EAGAIN;
3149d9da
XY
1506 }
1507
00f54b97
HR
1508 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1509
d38ceaf9
AD
1510 for (i = 0; i < adev->num_ip_blocks; i++) {
1511 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1512 DRM_ERROR("disabled ip block: %d <%s>\n",
1513 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1514 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1515 } else {
a1255107
AD
1516 if (adev->ip_blocks[i].version->funcs->early_init) {
1517 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1518 if (r == -ENOENT) {
a1255107 1519 adev->ip_blocks[i].status.valid = false;
2c1a2784 1520 } else if (r) {
a1255107
AD
1521 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1522 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1523 return r;
2c1a2784 1524 } else {
a1255107 1525 adev->ip_blocks[i].status.valid = true;
2c1a2784 1526 }
974e6b64 1527 } else {
a1255107 1528 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1529 }
d38ceaf9
AD
1530 }
1531 }
1532
395d1fb9
NH
1533 adev->cg_flags &= amdgpu_cg_mask;
1534 adev->pg_flags &= amdgpu_pg_mask;
1535
d38ceaf9
AD
1536 return 0;
1537}
1538
0a4f2520
RZ
1539static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1540{
1541 int i, r;
1542
1543 for (i = 0; i < adev->num_ip_blocks; i++) {
1544 if (!adev->ip_blocks[i].status.sw)
1545 continue;
1546 if (adev->ip_blocks[i].status.hw)
1547 continue;
1548 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1549 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1550 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1551 if (r) {
1552 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1553 adev->ip_blocks[i].version->funcs->name, r);
1554 return r;
1555 }
1556 adev->ip_blocks[i].status.hw = true;
1557 }
1558 }
1559
1560 return 0;
1561}
1562
1563static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1564{
1565 int i, r;
1566
1567 for (i = 0; i < adev->num_ip_blocks; i++) {
1568 if (!adev->ip_blocks[i].status.sw)
1569 continue;
1570 if (adev->ip_blocks[i].status.hw)
1571 continue;
1572 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1573 if (r) {
1574 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1575 adev->ip_blocks[i].version->funcs->name, r);
1576 return r;
1577 }
1578 adev->ip_blocks[i].status.hw = true;
1579 }
1580
1581 return 0;
1582}
1583
7a3e0bb2
RZ
1584static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1585{
1586 int r = 0;
1587 int i;
1588
1589 if (adev->asic_type >= CHIP_VEGA10) {
1590 for (i = 0; i < adev->num_ip_blocks; i++) {
1591 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1592 if (adev->in_gpu_reset || adev->in_suspend) {
1593 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1594 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1595 r = adev->ip_blocks[i].version->funcs->resume(adev);
1596 if (r) {
1597 DRM_ERROR("resume of IP block <%s> failed %d\n",
1598 adev->ip_blocks[i].version->funcs->name, r);
1599 return r;
1600 }
1601 } else {
1602 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1603 if (r) {
1604 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1605 adev->ip_blocks[i].version->funcs->name, r);
1606 return r;
1607 }
1608 }
1609 adev->ip_blocks[i].status.hw = true;
1610 }
1611 }
1612 }
1613
91eec27e 1614 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1615 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1616 if (r) {
1617 pr_err("firmware loading failed\n");
1618 return r;
1619 }
1620 }
1621
1622 return 0;
1623}
1624
e3ecdffa
AD
1625/**
1626 * amdgpu_device_ip_init - run init for hardware IPs
1627 *
1628 * @adev: amdgpu_device pointer
1629 *
1630 * Main initialization pass for hardware IPs. The list of all the hardware
1631 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1632 * are run. sw_init initializes the software state associated with each IP
1633 * and hw_init initializes the hardware associated with each IP.
1634 * Returns 0 on success, negative error code on failure.
1635 */
06ec9070 1636static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1637{
1638 int i, r;
1639
1640 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1641 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1642 continue;
a1255107 1643 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1644 if (r) {
a1255107
AD
1645 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1646 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1647 return r;
2c1a2784 1648 }
a1255107 1649 adev->ip_blocks[i].status.sw = true;
bfca0289 1650
d38ceaf9 1651 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1652 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1653 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1654 if (r) {
1655 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
d38ceaf9 1656 return r;
2c1a2784 1657 }
a1255107 1658 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1659 if (r) {
1660 DRM_ERROR("hw_init %d failed %d\n", i, r);
d38ceaf9 1661 return r;
2c1a2784 1662 }
06ec9070 1663 r = amdgpu_device_wb_init(adev);
2c1a2784 1664 if (r) {
06ec9070 1665 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
d38ceaf9 1666 return r;
2c1a2784 1667 }
a1255107 1668 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1669
1670 /* right after GMC hw init, we create CSA */
1671 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1672 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1673 AMDGPU_GEM_DOMAIN_VRAM,
1674 AMDGPU_CSA_SIZE);
2493664f
ML
1675 if (r) {
1676 DRM_ERROR("allocate CSA failed %d\n", r);
1677 return r;
1678 }
1679 }
d38ceaf9
AD
1680 }
1681 }
1682
c8963ea4
RZ
1683 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1684 if (r)
1685 return r;
0a4f2520
RZ
1686
1687 r = amdgpu_device_ip_hw_init_phase1(adev);
1688 if (r)
1689 return r;
1690
7a3e0bb2
RZ
1691 r = amdgpu_device_fw_loading(adev);
1692 if (r)
1693 return r;
1694
0a4f2520
RZ
1695 r = amdgpu_device_ip_hw_init_phase2(adev);
1696 if (r)
1697 return r;
d38ceaf9 1698
3e2e2ab5
HZ
1699 if (adev->gmc.xgmi.num_physical_nodes > 1)
1700 amdgpu_xgmi_add_device(adev);
1884734a 1701 amdgpu_amdkfd_device_init(adev);
c6332b97 1702
1703 if (amdgpu_sriov_vf(adev))
1704 amdgpu_virt_release_full_gpu(adev, true);
1705
d38ceaf9
AD
1706 return 0;
1707}
1708
e3ecdffa
AD
1709/**
1710 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1711 *
1712 * @adev: amdgpu_device pointer
1713 *
1714 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1715 * this function before a GPU reset. If the value is retained after a
1716 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1717 */
06ec9070 1718static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1719{
1720 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1721}
1722
e3ecdffa
AD
1723/**
1724 * amdgpu_device_check_vram_lost - check if vram is valid
1725 *
1726 * @adev: amdgpu_device pointer
1727 *
1728 * Checks the reset magic value written to the gart pointer in VRAM.
1729 * The driver calls this after a GPU reset to see if the contents of
1730 * VRAM is lost or now.
1731 * returns true if vram is lost, false if not.
1732 */
06ec9070 1733static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1734{
1735 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1736 AMDGPU_RESET_MAGIC_NUM);
1737}
1738
e3ecdffa 1739/**
1112a46b 1740 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1741 *
1742 * @adev: amdgpu_device pointer
1743 *
e3ecdffa 1744 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1745 * set_clockgating_state callbacks are run.
1746 * Late initialization pass enabling clockgating for hardware IPs.
1747 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1748 * Returns 0 on success, negative error code on failure.
1749 */
fdd34271 1750
1112a46b
RZ
1751static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1752 enum amd_clockgating_state state)
d38ceaf9 1753{
1112a46b 1754 int i, j, r;
d38ceaf9 1755
4a2ba394
SL
1756 if (amdgpu_emu_mode == 1)
1757 return 0;
1758
1112a46b
RZ
1759 for (j = 0; j < adev->num_ip_blocks; j++) {
1760 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1761 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1762 continue;
4a446d55 1763 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1764 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1765 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1766 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1767 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1768 /* enable clockgating to save power */
a1255107 1769 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1770 state);
4a446d55
AD
1771 if (r) {
1772 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1773 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1774 return r;
1775 }
b0b00ff1 1776 }
d38ceaf9 1777 }
06b18f61 1778
c9f96fd5
RZ
1779 return 0;
1780}
1781
1112a46b 1782static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1783{
1112a46b 1784 int i, j, r;
06b18f61 1785
c9f96fd5
RZ
1786 if (amdgpu_emu_mode == 1)
1787 return 0;
1788
1112a46b
RZ
1789 for (j = 0; j < adev->num_ip_blocks; j++) {
1790 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1791 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1792 continue;
1793 /* skip CG for VCE/UVD, it's handled specially */
1794 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1795 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1796 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1797 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1798 /* enable powergating to save power */
1799 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1800 state);
c9f96fd5
RZ
1801 if (r) {
1802 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1803 adev->ip_blocks[i].version->funcs->name, r);
1804 return r;
1805 }
1806 }
1807 }
2dc80b00
S
1808 return 0;
1809}
1810
e3ecdffa
AD
1811/**
1812 * amdgpu_device_ip_late_init - run late init for hardware IPs
1813 *
1814 * @adev: amdgpu_device pointer
1815 *
1816 * Late initialization pass for hardware IPs. The list of all the hardware
1817 * IPs that make up the asic is walked and the late_init callbacks are run.
1818 * late_init covers any special initialization that an IP requires
1819 * after all of the have been initialized or something that needs to happen
1820 * late in the init process.
1821 * Returns 0 on success, negative error code on failure.
1822 */
06ec9070 1823static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1824{
1825 int i = 0, r;
1826
1827 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1828 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1829 continue;
1830 if (adev->ip_blocks[i].version->funcs->late_init) {
1831 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1832 if (r) {
1833 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1834 adev->ip_blocks[i].version->funcs->name, r);
1835 return r;
1836 }
2dc80b00 1837 }
73f847db 1838 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1839 }
1840
1112a46b
RZ
1841 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1842 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1843
2c773de2
S
1844 queue_delayed_work(system_wq, &adev->late_init_work,
1845 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1846
06ec9070 1847 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1848
1849 return 0;
1850}
1851
e3ecdffa
AD
1852/**
1853 * amdgpu_device_ip_fini - run fini for hardware IPs
1854 *
1855 * @adev: amdgpu_device pointer
1856 *
1857 * Main teardown pass for hardware IPs. The list of all the hardware
1858 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1859 * are run. hw_fini tears down the hardware associated with each IP
1860 * and sw_fini tears down any software state associated with each IP.
1861 * Returns 0 on success, negative error code on failure.
1862 */
06ec9070 1863static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1864{
1865 int i, r;
1866
a82400b5
AG
1867 if (adev->gmc.xgmi.num_physical_nodes > 1)
1868 amdgpu_xgmi_remove_device(adev);
1869
1884734a 1870 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1871
1872 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1873 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1874
3e96dbfd
AD
1875 /* need to disable SMC first */
1876 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1877 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1878 continue;
fdd34271 1879 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1880 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1881 /* XXX handle errors */
1882 if (r) {
1883 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1884 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1885 }
a1255107 1886 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1887 break;
1888 }
1889 }
1890
d38ceaf9 1891 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1892 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1893 continue;
8201a67a 1894
a1255107 1895 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1896 /* XXX handle errors */
2c1a2784 1897 if (r) {
a1255107
AD
1898 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1899 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1900 }
8201a67a 1901
a1255107 1902 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1903 }
1904
9950cda2 1905
d38ceaf9 1906 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1907 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1908 continue;
c12aba3a
ML
1909
1910 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1911 amdgpu_ucode_free_bo(adev);
1e256e27 1912 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1913 amdgpu_device_wb_fini(adev);
1914 amdgpu_device_vram_scratch_fini(adev);
1915 }
1916
a1255107 1917 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1918 /* XXX handle errors */
2c1a2784 1919 if (r) {
a1255107
AD
1920 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1921 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1922 }
a1255107
AD
1923 adev->ip_blocks[i].status.sw = false;
1924 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1925 }
1926
a6dcfd9c 1927 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1928 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1929 continue;
a1255107
AD
1930 if (adev->ip_blocks[i].version->funcs->late_fini)
1931 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1932 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1933 }
1934
030308fc 1935 if (amdgpu_sriov_vf(adev))
24136135
ML
1936 if (amdgpu_virt_release_full_gpu(adev, false))
1937 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 1938
d38ceaf9
AD
1939 return 0;
1940}
1941
b55c9e7a
EQ
1942static int amdgpu_device_enable_mgpu_fan_boost(void)
1943{
1944 struct amdgpu_gpu_instance *gpu_ins;
1945 struct amdgpu_device *adev;
1946 int i, ret = 0;
1947
1948 mutex_lock(&mgpu_info.mutex);
1949
1950 /*
1951 * MGPU fan boost feature should be enabled
1952 * only when there are two or more dGPUs in
1953 * the system
1954 */
1955 if (mgpu_info.num_dgpu < 2)
1956 goto out;
1957
1958 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1959 gpu_ins = &(mgpu_info.gpu_ins[i]);
1960 adev = gpu_ins->adev;
1961 if (!(adev->flags & AMD_IS_APU) &&
1962 !gpu_ins->mgpu_fan_enabled &&
1963 adev->powerplay.pp_funcs &&
1964 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1965 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1966 if (ret)
1967 break;
1968
1969 gpu_ins->mgpu_fan_enabled = 1;
1970 }
1971 }
1972
1973out:
1974 mutex_unlock(&mgpu_info.mutex);
1975
1976 return ret;
1977}
1978
e3ecdffa 1979/**
1112a46b 1980 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 1981 *
1112a46b 1982 * @work: work_struct.
e3ecdffa 1983 */
06ec9070 1984static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
1985{
1986 struct amdgpu_device *adev =
1987 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
1988 int r;
1989
1990 r = amdgpu_ib_ring_tests(adev);
1991 if (r)
1992 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
1993
1994 r = amdgpu_device_enable_mgpu_fan_boost();
1995 if (r)
1996 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2dc80b00
S
1997}
1998
1e317b99
RZ
1999static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2000{
2001 struct amdgpu_device *adev =
2002 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2003
2004 mutex_lock(&adev->gfx.gfx_off_mutex);
2005 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2006 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2007 adev->gfx.gfx_off_state = true;
2008 }
2009 mutex_unlock(&adev->gfx.gfx_off_mutex);
2010}
2011
e3ecdffa 2012/**
e7854a03 2013 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2014 *
2015 * @adev: amdgpu_device pointer
2016 *
2017 * Main suspend function for hardware IPs. The list of all the hardware
2018 * IPs that make up the asic is walked, clockgating is disabled and the
2019 * suspend callbacks are run. suspend puts the hardware and software state
2020 * in each IP into a state suitable for suspend.
2021 * Returns 0 on success, negative error code on failure.
2022 */
e7854a03
AD
2023static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2024{
2025 int i, r;
2026
05df1f01 2027 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2028 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2029
e7854a03
AD
2030 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2031 if (!adev->ip_blocks[i].status.valid)
2032 continue;
2033 /* displays are handled separately */
2034 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2035 /* XXX handle errors */
2036 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2037 /* XXX handle errors */
2038 if (r) {
2039 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2040 adev->ip_blocks[i].version->funcs->name, r);
2041 }
2042 }
2043 }
2044
e7854a03
AD
2045 return 0;
2046}
2047
2048/**
2049 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2050 *
2051 * @adev: amdgpu_device pointer
2052 *
2053 * Main suspend function for hardware IPs. The list of all the hardware
2054 * IPs that make up the asic is walked, clockgating is disabled and the
2055 * suspend callbacks are run. suspend puts the hardware and software state
2056 * in each IP into a state suitable for suspend.
2057 * Returns 0 on success, negative error code on failure.
2058 */
2059static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2060{
2061 int i, r;
2062
2063 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2064 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2065 continue;
e7854a03
AD
2066 /* displays are handled in phase1 */
2067 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2068 continue;
d38ceaf9 2069 /* XXX handle errors */
a1255107 2070 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2071 /* XXX handle errors */
2c1a2784 2072 if (r) {
a1255107
AD
2073 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2074 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2075 }
d38ceaf9
AD
2076 }
2077
2078 return 0;
2079}
2080
e7854a03
AD
2081/**
2082 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2083 *
2084 * @adev: amdgpu_device pointer
2085 *
2086 * Main suspend function for hardware IPs. The list of all the hardware
2087 * IPs that make up the asic is walked, clockgating is disabled and the
2088 * suspend callbacks are run. suspend puts the hardware and software state
2089 * in each IP into a state suitable for suspend.
2090 * Returns 0 on success, negative error code on failure.
2091 */
2092int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2093{
2094 int r;
2095
e7819644
YT
2096 if (amdgpu_sriov_vf(adev))
2097 amdgpu_virt_request_full_gpu(adev, false);
2098
e7854a03
AD
2099 r = amdgpu_device_ip_suspend_phase1(adev);
2100 if (r)
2101 return r;
2102 r = amdgpu_device_ip_suspend_phase2(adev);
2103
e7819644
YT
2104 if (amdgpu_sriov_vf(adev))
2105 amdgpu_virt_release_full_gpu(adev, false);
2106
e7854a03
AD
2107 return r;
2108}
2109
06ec9070 2110static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2111{
2112 int i, r;
2113
2cb681b6
ML
2114 static enum amd_ip_block_type ip_order[] = {
2115 AMD_IP_BLOCK_TYPE_GMC,
2116 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2117 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2118 AMD_IP_BLOCK_TYPE_IH,
2119 };
a90ad3c2 2120
2cb681b6
ML
2121 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2122 int j;
2123 struct amdgpu_ip_block *block;
a90ad3c2 2124
2cb681b6
ML
2125 for (j = 0; j < adev->num_ip_blocks; j++) {
2126 block = &adev->ip_blocks[j];
2127
2128 if (block->version->type != ip_order[i] ||
2129 !block->status.valid)
2130 continue;
2131
2132 r = block->version->funcs->hw_init(adev);
3f48c681 2133 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2134 if (r)
2135 return r;
a90ad3c2
ML
2136 }
2137 }
2138
2139 return 0;
2140}
2141
06ec9070 2142static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2143{
2144 int i, r;
2145
2cb681b6
ML
2146 static enum amd_ip_block_type ip_order[] = {
2147 AMD_IP_BLOCK_TYPE_SMC,
2148 AMD_IP_BLOCK_TYPE_DCE,
2149 AMD_IP_BLOCK_TYPE_GFX,
2150 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2151 AMD_IP_BLOCK_TYPE_UVD,
2152 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2153 };
a90ad3c2 2154
2cb681b6
ML
2155 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2156 int j;
2157 struct amdgpu_ip_block *block;
a90ad3c2 2158
2cb681b6
ML
2159 for (j = 0; j < adev->num_ip_blocks; j++) {
2160 block = &adev->ip_blocks[j];
2161
2162 if (block->version->type != ip_order[i] ||
2163 !block->status.valid)
2164 continue;
2165
2166 r = block->version->funcs->hw_init(adev);
3f48c681 2167 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2168 if (r)
2169 return r;
a90ad3c2
ML
2170 }
2171 }
2172
2173 return 0;
2174}
2175
e3ecdffa
AD
2176/**
2177 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2178 *
2179 * @adev: amdgpu_device pointer
2180 *
2181 * First resume function for hardware IPs. The list of all the hardware
2182 * IPs that make up the asic is walked and the resume callbacks are run for
2183 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2184 * after a suspend and updates the software state as necessary. This
2185 * function is also used for restoring the GPU after a GPU reset.
2186 * Returns 0 on success, negative error code on failure.
2187 */
06ec9070 2188static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2189{
2190 int i, r;
2191
a90ad3c2
ML
2192 for (i = 0; i < adev->num_ip_blocks; i++) {
2193 if (!adev->ip_blocks[i].status.valid)
2194 continue;
a90ad3c2 2195 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2196 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2197 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2198 r = adev->ip_blocks[i].version->funcs->resume(adev);
2199 if (r) {
2200 DRM_ERROR("resume of IP block <%s> failed %d\n",
2201 adev->ip_blocks[i].version->funcs->name, r);
2202 return r;
2203 }
a90ad3c2
ML
2204 }
2205 }
2206
2207 return 0;
2208}
2209
e3ecdffa
AD
2210/**
2211 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2212 *
2213 * @adev: amdgpu_device pointer
2214 *
2215 * First resume function for hardware IPs. The list of all the hardware
2216 * IPs that make up the asic is walked and the resume callbacks are run for
2217 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2218 * functional state after a suspend and updates the software state as
2219 * necessary. This function is also used for restoring the GPU after a GPU
2220 * reset.
2221 * Returns 0 on success, negative error code on failure.
2222 */
06ec9070 2223static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2224{
2225 int i, r;
2226
2227 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2228 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2229 continue;
fcf0649f 2230 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2231 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2232 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2233 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2234 continue;
a1255107 2235 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2236 if (r) {
a1255107
AD
2237 DRM_ERROR("resume of IP block <%s> failed %d\n",
2238 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2239 return r;
2c1a2784 2240 }
d38ceaf9
AD
2241 }
2242
2243 return 0;
2244}
2245
e3ecdffa
AD
2246/**
2247 * amdgpu_device_ip_resume - run resume for hardware IPs
2248 *
2249 * @adev: amdgpu_device pointer
2250 *
2251 * Main resume function for hardware IPs. The hardware IPs
2252 * are split into two resume functions because they are
2253 * are also used in in recovering from a GPU reset and some additional
2254 * steps need to be take between them. In this case (S3/S4) they are
2255 * run sequentially.
2256 * Returns 0 on success, negative error code on failure.
2257 */
06ec9070 2258static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2259{
2260 int r;
2261
06ec9070 2262 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2263 if (r)
2264 return r;
7a3e0bb2
RZ
2265
2266 r = amdgpu_device_fw_loading(adev);
2267 if (r)
2268 return r;
2269
06ec9070 2270 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2271
2272 return r;
2273}
2274
e3ecdffa
AD
2275/**
2276 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2277 *
2278 * @adev: amdgpu_device pointer
2279 *
2280 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2281 */
4e99a44e 2282static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2283{
6867e1b5
ML
2284 if (amdgpu_sriov_vf(adev)) {
2285 if (adev->is_atom_fw) {
2286 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2287 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2288 } else {
2289 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2290 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2291 }
2292
2293 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2294 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2295 }
048765ad
AR
2296}
2297
e3ecdffa
AD
2298/**
2299 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2300 *
2301 * @asic_type: AMD asic type
2302 *
2303 * Check if there is DC (new modesetting infrastructre) support for an asic.
2304 * returns true if DC has support, false if not.
2305 */
4562236b
HW
2306bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2307{
2308 switch (asic_type) {
2309#if defined(CONFIG_DRM_AMD_DC)
2310 case CHIP_BONAIRE:
0d6fbccb 2311 case CHIP_KAVERI:
367e6687
AD
2312 case CHIP_KABINI:
2313 case CHIP_MULLINS:
d9fda248
HW
2314 /*
2315 * We have systems in the wild with these ASICs that require
2316 * LVDS and VGA support which is not supported with DC.
2317 *
2318 * Fallback to the non-DC driver here by default so as not to
2319 * cause regressions.
2320 */
2321 return amdgpu_dc > 0;
2322 case CHIP_HAWAII:
4562236b
HW
2323 case CHIP_CARRIZO:
2324 case CHIP_STONEY:
4562236b 2325 case CHIP_POLARIS10:
675fd32b 2326 case CHIP_POLARIS11:
2c8ad2d5 2327 case CHIP_POLARIS12:
675fd32b 2328 case CHIP_VEGAM:
4562236b
HW
2329 case CHIP_TONGA:
2330 case CHIP_FIJI:
42f8ffa1 2331 case CHIP_VEGA10:
dca7b401 2332 case CHIP_VEGA12:
c6034aa2 2333 case CHIP_VEGA20:
dc37a9a0 2334#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2335 case CHIP_RAVEN:
42f8ffa1 2336#endif
fd187853 2337 return amdgpu_dc != 0;
4562236b
HW
2338#endif
2339 default:
2340 return false;
2341 }
2342}
2343
2344/**
2345 * amdgpu_device_has_dc_support - check if dc is supported
2346 *
2347 * @adev: amdgpu_device_pointer
2348 *
2349 * Returns true for supported, false for not supported
2350 */
2351bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2352{
2555039d
XY
2353 if (amdgpu_sriov_vf(adev))
2354 return false;
2355
4562236b
HW
2356 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2357}
2358
d38ceaf9
AD
2359/**
2360 * amdgpu_device_init - initialize the driver
2361 *
2362 * @adev: amdgpu_device pointer
87e3f136 2363 * @ddev: drm dev pointer
d38ceaf9
AD
2364 * @pdev: pci dev pointer
2365 * @flags: driver flags
2366 *
2367 * Initializes the driver info and hw (all asics).
2368 * Returns 0 for success or an error on failure.
2369 * Called at driver startup.
2370 */
2371int amdgpu_device_init(struct amdgpu_device *adev,
2372 struct drm_device *ddev,
2373 struct pci_dev *pdev,
2374 uint32_t flags)
2375{
2376 int r, i;
2377 bool runtime = false;
95844d20 2378 u32 max_MBps;
d38ceaf9
AD
2379
2380 adev->shutdown = false;
2381 adev->dev = &pdev->dev;
2382 adev->ddev = ddev;
2383 adev->pdev = pdev;
2384 adev->flags = flags;
2f7d10b3 2385 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2386 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2387 if (amdgpu_emu_mode == 1)
2388 adev->usec_timeout *= 2;
770d13b1 2389 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2390 adev->accel_working = false;
2391 adev->num_rings = 0;
2392 adev->mman.buffer_funcs = NULL;
2393 adev->mman.buffer_funcs_ring = NULL;
2394 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2395 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2396 adev->gmc.gmc_funcs = NULL;
f54d1867 2397 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2398 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2399
2400 adev->smc_rreg = &amdgpu_invalid_rreg;
2401 adev->smc_wreg = &amdgpu_invalid_wreg;
2402 adev->pcie_rreg = &amdgpu_invalid_rreg;
2403 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2404 adev->pciep_rreg = &amdgpu_invalid_rreg;
2405 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2406 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2407 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2408 adev->didt_rreg = &amdgpu_invalid_rreg;
2409 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2410 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2411 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2412 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2413 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2414
3e39ab90
AD
2415 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2416 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2417 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2418
2419 /* mutex initialization are all done here so we
2420 * can recall function without having locking issues */
d38ceaf9 2421 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2422 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2423 mutex_init(&adev->pm.mutex);
2424 mutex_init(&adev->gfx.gpu_clock_mutex);
2425 mutex_init(&adev->srbm_mutex);
b8866c26 2426 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2427 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2428 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2429 mutex_init(&adev->mn_lock);
e23b74aa 2430 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2431 hash_init(adev->mn_hash);
13a752e3 2432 mutex_init(&adev->lock_reset);
d38ceaf9 2433
06ec9070 2434 amdgpu_device_check_arguments(adev);
d38ceaf9 2435
d38ceaf9
AD
2436 spin_lock_init(&adev->mmio_idx_lock);
2437 spin_lock_init(&adev->smc_idx_lock);
2438 spin_lock_init(&adev->pcie_idx_lock);
2439 spin_lock_init(&adev->uvd_ctx_idx_lock);
2440 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2441 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2442 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2443 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2444 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2445
0c4e7fa5
CZ
2446 INIT_LIST_HEAD(&adev->shadow_list);
2447 mutex_init(&adev->shadow_list_lock);
2448
795f2813
AR
2449 INIT_LIST_HEAD(&adev->ring_lru_list);
2450 spin_lock_init(&adev->ring_lru_list_lock);
2451
06ec9070
AD
2452 INIT_DELAYED_WORK(&adev->late_init_work,
2453 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2454 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2455 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2456
d23ee13f 2457 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2458 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2459
0fa49558
AX
2460 /* Registers mapping */
2461 /* TODO: block userspace mapping of io register */
da69c161
KW
2462 if (adev->asic_type >= CHIP_BONAIRE) {
2463 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2464 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2465 } else {
2466 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2467 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2468 }
d38ceaf9 2469
d38ceaf9
AD
2470 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2471 if (adev->rmmio == NULL) {
2472 return -ENOMEM;
2473 }
2474 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2475 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2476
d38ceaf9
AD
2477 /* io port mapping */
2478 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2479 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2480 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2481 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2482 break;
2483 }
2484 }
2485 if (adev->rio_mem == NULL)
b64a18c5 2486 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2487
5494d864
AD
2488 amdgpu_device_get_pcie_info(adev);
2489
d38ceaf9 2490 /* early init functions */
06ec9070 2491 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2492 if (r)
2493 return r;
2494
6585661d
OZ
2495 /* doorbell bar mapping and doorbell index init*/
2496 amdgpu_device_doorbell_init(adev);
2497
d38ceaf9
AD
2498 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2499 /* this will fail for cards that aren't VGA class devices, just
2500 * ignore it */
06ec9070 2501 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2502
e9bef455 2503 if (amdgpu_device_is_px(ddev))
d38ceaf9 2504 runtime = true;
84c8b22e
LW
2505 if (!pci_is_thunderbolt_attached(adev->pdev))
2506 vga_switcheroo_register_client(adev->pdev,
2507 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2508 if (runtime)
2509 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2510
9475a943
SL
2511 if (amdgpu_emu_mode == 1) {
2512 /* post the asic on emulation mode */
2513 emu_soc_asic_init(adev);
bfca0289 2514 goto fence_driver_init;
9475a943 2515 }
bfca0289 2516
d38ceaf9 2517 /* Read BIOS */
83ba126a
AD
2518 if (!amdgpu_get_bios(adev)) {
2519 r = -EINVAL;
2520 goto failed;
2521 }
f7e9e9fe 2522
d38ceaf9 2523 r = amdgpu_atombios_init(adev);
2c1a2784
AD
2524 if (r) {
2525 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
e23b74aa 2526 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
83ba126a 2527 goto failed;
2c1a2784 2528 }
d38ceaf9 2529
4e99a44e
ML
2530 /* detect if we are with an SRIOV vbios */
2531 amdgpu_device_detect_sriov_bios(adev);
048765ad 2532
d38ceaf9 2533 /* Post card if necessary */
39c640c0 2534 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2535 if (!adev->bios) {
bec86378 2536 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2537 r = -EINVAL;
2538 goto failed;
d38ceaf9 2539 }
bec86378 2540 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2541 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2542 if (r) {
2543 dev_err(adev->dev, "gpu post error!\n");
2544 goto failed;
2545 }
d38ceaf9
AD
2546 }
2547
88b64e95
AD
2548 if (adev->is_atom_fw) {
2549 /* Initialize clocks */
2550 r = amdgpu_atomfirmware_get_clock_info(adev);
2551 if (r) {
2552 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2553 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2554 goto failed;
2555 }
2556 } else {
a5bde2f9
AD
2557 /* Initialize clocks */
2558 r = amdgpu_atombios_get_clock_info(adev);
2559 if (r) {
2560 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2561 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2562 goto failed;
a5bde2f9
AD
2563 }
2564 /* init i2c buses */
4562236b
HW
2565 if (!amdgpu_device_has_dc_support(adev))
2566 amdgpu_atombios_i2c_init(adev);
2c1a2784 2567 }
d38ceaf9 2568
bfca0289 2569fence_driver_init:
d38ceaf9
AD
2570 /* Fence driver */
2571 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2572 if (r) {
2573 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2574 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2575 goto failed;
2c1a2784 2576 }
d38ceaf9
AD
2577
2578 /* init the mode config */
2579 drm_mode_config_init(adev->ddev);
2580
06ec9070 2581 r = amdgpu_device_ip_init(adev);
d38ceaf9 2582 if (r) {
8840a387 2583 /* failed in exclusive mode due to timeout */
2584 if (amdgpu_sriov_vf(adev) &&
2585 !amdgpu_sriov_runtime(adev) &&
2586 amdgpu_virt_mmio_blocked(adev) &&
2587 !amdgpu_virt_wait_reset(adev)) {
2588 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2589 /* Don't send request since VF is inactive. */
2590 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2591 adev->virt.ops = NULL;
8840a387 2592 r = -EAGAIN;
2593 goto failed;
2594 }
06ec9070 2595 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2596 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 2597 goto failed;
d38ceaf9
AD
2598 }
2599
2600 adev->accel_working = true;
2601
e59c0205
AX
2602 amdgpu_vm_check_compute_bug(adev);
2603
95844d20
MO
2604 /* Initialize the buffer migration limit. */
2605 if (amdgpu_moverate >= 0)
2606 max_MBps = amdgpu_moverate;
2607 else
2608 max_MBps = 8; /* Allow 8 MB/s. */
2609 /* Get a log2 for easy divisions. */
2610 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2611
d38ceaf9
AD
2612 r = amdgpu_ib_pool_init(adev);
2613 if (r) {
2614 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
e23b74aa 2615 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
83ba126a 2616 goto failed;
d38ceaf9
AD
2617 }
2618
2dc8f81e
HC
2619 if (amdgpu_sriov_vf(adev))
2620 amdgpu_virt_init_data_exchange(adev);
2621
9bc92b9c
ML
2622 amdgpu_fbdev_init(adev);
2623
d2f52ac8
RZ
2624 r = amdgpu_pm_sysfs_init(adev);
2625 if (r)
2626 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2627
75758255 2628 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2629 if (r)
d38ceaf9 2630 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2631
2632 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2633 if (r)
d38ceaf9 2634 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2635
50ab2533 2636 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2637 if (r)
50ab2533 2638 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2639
763efb6c 2640 r = amdgpu_debugfs_init(adev);
db95e218 2641 if (r)
763efb6c 2642 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2643
d38ceaf9
AD
2644 if ((amdgpu_testing & 1)) {
2645 if (adev->accel_working)
2646 amdgpu_test_moves(adev);
2647 else
2648 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2649 }
d38ceaf9
AD
2650 if (amdgpu_benchmarking) {
2651 if (adev->accel_working)
2652 amdgpu_benchmark(adev, amdgpu_benchmarking);
2653 else
2654 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2655 }
2656
2657 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2658 * explicit gating rather than handling it automatically.
2659 */
06ec9070 2660 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2661 if (r) {
06ec9070 2662 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2663 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2664 goto failed;
2c1a2784 2665 }
d38ceaf9
AD
2666
2667 return 0;
83ba126a
AD
2668
2669failed:
89041940 2670 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2671 if (runtime)
2672 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2673
83ba126a 2674 return r;
d38ceaf9
AD
2675}
2676
d38ceaf9
AD
2677/**
2678 * amdgpu_device_fini - tear down the driver
2679 *
2680 * @adev: amdgpu_device pointer
2681 *
2682 * Tear down the driver info (all asics).
2683 * Called at driver shutdown.
2684 */
2685void amdgpu_device_fini(struct amdgpu_device *adev)
2686{
2687 int r;
2688
2689 DRM_INFO("amdgpu: finishing device.\n");
2690 adev->shutdown = true;
e5b03032
ML
2691 /* disable all interrupts */
2692 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2693 if (adev->mode_info.mode_config_initialized){
2694 if (!amdgpu_device_has_dc_support(adev))
2695 drm_crtc_force_disable_all(adev->ddev);
2696 else
2697 drm_atomic_helper_shutdown(adev->ddev);
2698 }
d38ceaf9
AD
2699 amdgpu_ib_pool_fini(adev);
2700 amdgpu_fence_driver_fini(adev);
58e955d9 2701 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2702 amdgpu_fbdev_fini(adev);
06ec9070 2703 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2704 if (adev->firmware.gpu_info_fw) {
2705 release_firmware(adev->firmware.gpu_info_fw);
2706 adev->firmware.gpu_info_fw = NULL;
2707 }
d38ceaf9 2708 adev->accel_working = false;
2dc80b00 2709 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2710 /* free i2c buses */
4562236b
HW
2711 if (!amdgpu_device_has_dc_support(adev))
2712 amdgpu_i2c_fini(adev);
bfca0289
SL
2713
2714 if (amdgpu_emu_mode != 1)
2715 amdgpu_atombios_fini(adev);
2716
d38ceaf9
AD
2717 kfree(adev->bios);
2718 adev->bios = NULL;
84c8b22e
LW
2719 if (!pci_is_thunderbolt_attached(adev->pdev))
2720 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2721 if (adev->flags & AMD_IS_PX)
2722 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2723 vga_client_register(adev->pdev, NULL, NULL, NULL);
2724 if (adev->rio_mem)
2725 pci_iounmap(adev->pdev, adev->rio_mem);
2726 adev->rio_mem = NULL;
2727 iounmap(adev->rmmio);
2728 adev->rmmio = NULL;
06ec9070 2729 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2730 amdgpu_debugfs_regs_cleanup(adev);
d38ceaf9
AD
2731}
2732
2733
2734/*
2735 * Suspend & resume.
2736 */
2737/**
810ddc3a 2738 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2739 *
87e3f136
DP
2740 * @dev: drm dev pointer
2741 * @suspend: suspend state
2742 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2743 *
2744 * Puts the hw in the suspend state (all asics).
2745 * Returns 0 for success or an error on failure.
2746 * Called at driver suspend.
2747 */
810ddc3a 2748int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2749{
2750 struct amdgpu_device *adev;
2751 struct drm_crtc *crtc;
2752 struct drm_connector *connector;
5ceb54c6 2753 int r;
d38ceaf9
AD
2754
2755 if (dev == NULL || dev->dev_private == NULL) {
2756 return -ENODEV;
2757 }
2758
2759 adev = dev->dev_private;
2760
2761 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2762 return 0;
2763
44779b43 2764 adev->in_suspend = true;
d38ceaf9
AD
2765 drm_kms_helper_poll_disable(dev);
2766
5f818173
S
2767 if (fbcon)
2768 amdgpu_fbdev_set_suspend(adev, 1);
2769
a5459475
RZ
2770 cancel_delayed_work_sync(&adev->late_init_work);
2771
4562236b
HW
2772 if (!amdgpu_device_has_dc_support(adev)) {
2773 /* turn off display hw */
2774 drm_modeset_lock_all(dev);
2775 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2776 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2777 }
2778 drm_modeset_unlock_all(dev);
fe1053b7
AD
2779 /* unpin the front buffers and cursors */
2780 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2781 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2782 struct drm_framebuffer *fb = crtc->primary->fb;
2783 struct amdgpu_bo *robj;
2784
2785 if (amdgpu_crtc->cursor_bo) {
2786 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2787 r = amdgpu_bo_reserve(aobj, true);
2788 if (r == 0) {
2789 amdgpu_bo_unpin(aobj);
2790 amdgpu_bo_unreserve(aobj);
2791 }
756e6880 2792 }
756e6880 2793
fe1053b7
AD
2794 if (fb == NULL || fb->obj[0] == NULL) {
2795 continue;
2796 }
2797 robj = gem_to_amdgpu_bo(fb->obj[0]);
2798 /* don't unpin kernel fb objects */
2799 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2800 r = amdgpu_bo_reserve(robj, true);
2801 if (r == 0) {
2802 amdgpu_bo_unpin(robj);
2803 amdgpu_bo_unreserve(robj);
2804 }
d38ceaf9
AD
2805 }
2806 }
2807 }
fe1053b7
AD
2808
2809 amdgpu_amdkfd_suspend(adev);
2810
2811 r = amdgpu_device_ip_suspend_phase1(adev);
2812
d38ceaf9
AD
2813 /* evict vram memory */
2814 amdgpu_bo_evict_vram(adev);
2815
5ceb54c6 2816 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2817
fe1053b7 2818 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2819
a0a71e49
AD
2820 /* evict remaining vram memory
2821 * This second call to evict vram is to evict the gart page table
2822 * using the CPU.
2823 */
d38ceaf9
AD
2824 amdgpu_bo_evict_vram(adev);
2825
2826 pci_save_state(dev->pdev);
2827 if (suspend) {
2828 /* Shut down the device */
2829 pci_disable_device(dev->pdev);
2830 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2831 } else {
2832 r = amdgpu_asic_reset(adev);
2833 if (r)
2834 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2835 }
2836
d38ceaf9
AD
2837 return 0;
2838}
2839
2840/**
810ddc3a 2841 * amdgpu_device_resume - initiate device resume
d38ceaf9 2842 *
87e3f136
DP
2843 * @dev: drm dev pointer
2844 * @resume: resume state
2845 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2846 *
2847 * Bring the hw back to operating state (all asics).
2848 * Returns 0 for success or an error on failure.
2849 * Called at driver resume.
2850 */
810ddc3a 2851int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2852{
2853 struct drm_connector *connector;
2854 struct amdgpu_device *adev = dev->dev_private;
756e6880 2855 struct drm_crtc *crtc;
03161a6e 2856 int r = 0;
d38ceaf9
AD
2857
2858 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2859 return 0;
2860
d38ceaf9
AD
2861 if (resume) {
2862 pci_set_power_state(dev->pdev, PCI_D0);
2863 pci_restore_state(dev->pdev);
74b0b157 2864 r = pci_enable_device(dev->pdev);
03161a6e 2865 if (r)
4d3b9ae5 2866 return r;
d38ceaf9
AD
2867 }
2868
2869 /* post card */
39c640c0 2870 if (amdgpu_device_need_post(adev)) {
74b0b157 2871 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2872 if (r)
2873 DRM_ERROR("amdgpu asic init failed\n");
2874 }
d38ceaf9 2875
06ec9070 2876 r = amdgpu_device_ip_resume(adev);
e6707218 2877 if (r) {
06ec9070 2878 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2879 return r;
e6707218 2880 }
5ceb54c6
AD
2881 amdgpu_fence_driver_resume(adev);
2882
d38ceaf9 2883
06ec9070 2884 r = amdgpu_device_ip_late_init(adev);
03161a6e 2885 if (r)
4d3b9ae5 2886 return r;
d38ceaf9 2887
fe1053b7
AD
2888 if (!amdgpu_device_has_dc_support(adev)) {
2889 /* pin cursors */
2890 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2891 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2892
2893 if (amdgpu_crtc->cursor_bo) {
2894 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2895 r = amdgpu_bo_reserve(aobj, true);
2896 if (r == 0) {
2897 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2898 if (r != 0)
2899 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2900 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2901 amdgpu_bo_unreserve(aobj);
2902 }
756e6880
AD
2903 }
2904 }
2905 }
ba997709
YZ
2906 r = amdgpu_amdkfd_resume(adev);
2907 if (r)
2908 return r;
756e6880 2909
96a5d8d4
LL
2910 /* Make sure IB tests flushed */
2911 flush_delayed_work(&adev->late_init_work);
2912
d38ceaf9
AD
2913 /* blat the mode back in */
2914 if (fbcon) {
4562236b
HW
2915 if (!amdgpu_device_has_dc_support(adev)) {
2916 /* pre DCE11 */
2917 drm_helper_resume_force_mode(dev);
2918
2919 /* turn on display hw */
2920 drm_modeset_lock_all(dev);
2921 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2922 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2923 }
2924 drm_modeset_unlock_all(dev);
d38ceaf9 2925 }
4d3b9ae5 2926 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
2927 }
2928
2929 drm_kms_helper_poll_enable(dev);
23a1a9e5
L
2930
2931 /*
2932 * Most of the connector probing functions try to acquire runtime pm
2933 * refs to ensure that the GPU is powered on when connector polling is
2934 * performed. Since we're calling this from a runtime PM callback,
2935 * trying to acquire rpm refs will cause us to deadlock.
2936 *
2937 * Since we're guaranteed to be holding the rpm lock, it's safe to
2938 * temporarily disable the rpm helpers so this doesn't deadlock us.
2939 */
2940#ifdef CONFIG_PM
2941 dev->dev->power.disable_depth++;
2942#endif
4562236b
HW
2943 if (!amdgpu_device_has_dc_support(adev))
2944 drm_helper_hpd_irq_event(dev);
2945 else
2946 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
2947#ifdef CONFIG_PM
2948 dev->dev->power.disable_depth--;
2949#endif
44779b43
RZ
2950 adev->in_suspend = false;
2951
4d3b9ae5 2952 return 0;
d38ceaf9
AD
2953}
2954
e3ecdffa
AD
2955/**
2956 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2957 *
2958 * @adev: amdgpu_device pointer
2959 *
2960 * The list of all the hardware IPs that make up the asic is walked and
2961 * the check_soft_reset callbacks are run. check_soft_reset determines
2962 * if the asic is still hung or not.
2963 * Returns true if any of the IPs are still in a hung state, false if not.
2964 */
06ec9070 2965static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
2966{
2967 int i;
2968 bool asic_hang = false;
2969
f993d628
ML
2970 if (amdgpu_sriov_vf(adev))
2971 return true;
2972
8bc04c29
AD
2973 if (amdgpu_asic_need_full_reset(adev))
2974 return true;
2975
63fbf42f 2976 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2977 if (!adev->ip_blocks[i].status.valid)
63fbf42f 2978 continue;
a1255107
AD
2979 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2980 adev->ip_blocks[i].status.hang =
2981 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
2982 if (adev->ip_blocks[i].status.hang) {
2983 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
2984 asic_hang = true;
2985 }
2986 }
2987 return asic_hang;
2988}
2989
e3ecdffa
AD
2990/**
2991 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
2992 *
2993 * @adev: amdgpu_device pointer
2994 *
2995 * The list of all the hardware IPs that make up the asic is walked and the
2996 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
2997 * handles any IP specific hardware or software state changes that are
2998 * necessary for a soft reset to succeed.
2999 * Returns 0 on success, negative error code on failure.
3000 */
06ec9070 3001static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3002{
3003 int i, r = 0;
3004
3005 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3006 if (!adev->ip_blocks[i].status.valid)
d31a501e 3007 continue;
a1255107
AD
3008 if (adev->ip_blocks[i].status.hang &&
3009 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3010 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3011 if (r)
3012 return r;
3013 }
3014 }
3015
3016 return 0;
3017}
3018
e3ecdffa
AD
3019/**
3020 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3021 *
3022 * @adev: amdgpu_device pointer
3023 *
3024 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3025 * reset is necessary to recover.
3026 * Returns true if a full asic reset is required, false if not.
3027 */
06ec9070 3028static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3029{
da146d3b
AD
3030 int i;
3031
8bc04c29
AD
3032 if (amdgpu_asic_need_full_reset(adev))
3033 return true;
3034
da146d3b 3035 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3036 if (!adev->ip_blocks[i].status.valid)
da146d3b 3037 continue;
a1255107
AD
3038 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3039 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3040 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3041 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3042 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3043 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3044 DRM_INFO("Some block need full reset!\n");
3045 return true;
3046 }
3047 }
35d782fe
CZ
3048 }
3049 return false;
3050}
3051
e3ecdffa
AD
3052/**
3053 * amdgpu_device_ip_soft_reset - do a soft reset
3054 *
3055 * @adev: amdgpu_device pointer
3056 *
3057 * The list of all the hardware IPs that make up the asic is walked and the
3058 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3059 * IP specific hardware or software state changes that are necessary to soft
3060 * reset the IP.
3061 * Returns 0 on success, negative error code on failure.
3062 */
06ec9070 3063static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3064{
3065 int i, r = 0;
3066
3067 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3068 if (!adev->ip_blocks[i].status.valid)
35d782fe 3069 continue;
a1255107
AD
3070 if (adev->ip_blocks[i].status.hang &&
3071 adev->ip_blocks[i].version->funcs->soft_reset) {
3072 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3073 if (r)
3074 return r;
3075 }
3076 }
3077
3078 return 0;
3079}
3080
e3ecdffa
AD
3081/**
3082 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3083 *
3084 * @adev: amdgpu_device pointer
3085 *
3086 * The list of all the hardware IPs that make up the asic is walked and the
3087 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3088 * handles any IP specific hardware or software state changes that are
3089 * necessary after the IP has been soft reset.
3090 * Returns 0 on success, negative error code on failure.
3091 */
06ec9070 3092static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3093{
3094 int i, r = 0;
3095
3096 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3097 if (!adev->ip_blocks[i].status.valid)
35d782fe 3098 continue;
a1255107
AD
3099 if (adev->ip_blocks[i].status.hang &&
3100 adev->ip_blocks[i].version->funcs->post_soft_reset)
3101 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3102 if (r)
3103 return r;
3104 }
3105
3106 return 0;
3107}
3108
e3ecdffa 3109/**
c33adbc7 3110 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3111 *
3112 * @adev: amdgpu_device pointer
3113 *
3114 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3115 * restore things like GPUVM page tables after a GPU reset where
3116 * the contents of VRAM might be lost.
403009bf
CK
3117 *
3118 * Returns:
3119 * 0 on success, negative error code on failure.
e3ecdffa 3120 */
c33adbc7 3121static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3122{
c41d1cf6 3123 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3124 struct amdgpu_bo *shadow;
3125 long r = 1, tmo;
c41d1cf6
ML
3126
3127 if (amdgpu_sriov_runtime(adev))
b045d3af 3128 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3129 else
3130 tmo = msecs_to_jiffies(100);
3131
3132 DRM_INFO("recover vram bo from shadow start\n");
3133 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3134 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3135
3136 /* No need to recover an evicted BO */
3137 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3138 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3139 continue;
3140
3141 r = amdgpu_bo_restore_shadow(shadow, &next);
3142 if (r)
3143 break;
3144
c41d1cf6
ML
3145 if (fence) {
3146 r = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3147 dma_fence_put(fence);
3148 fence = next;
3149 if (r <= 0)
c41d1cf6 3150 break;
403009bf
CK
3151 } else {
3152 fence = next;
c41d1cf6 3153 }
c41d1cf6
ML
3154 }
3155 mutex_unlock(&adev->shadow_list_lock);
3156
403009bf
CK
3157 if (fence)
3158 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3159 dma_fence_put(fence);
3160
403009bf 3161 if (r <= 0 || tmo <= 0) {
c41d1cf6 3162 DRM_ERROR("recover vram bo from shadow failed\n");
403009bf
CK
3163 return -EIO;
3164 }
c41d1cf6 3165
403009bf
CK
3166 DRM_INFO("recover vram bo from shadow done\n");
3167 return 0;
c41d1cf6
ML
3168}
3169
a90ad3c2 3170
e3ecdffa 3171/**
06ec9070 3172 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3173 *
3174 * @adev: amdgpu device pointer
87e3f136 3175 * @from_hypervisor: request from hypervisor
5740682e
ML
3176 *
3177 * do VF FLR and reinitialize Asic
3f48c681 3178 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3179 */
3180static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3181 bool from_hypervisor)
5740682e
ML
3182{
3183 int r;
3184
3185 if (from_hypervisor)
3186 r = amdgpu_virt_request_full_gpu(adev, true);
3187 else
3188 r = amdgpu_virt_reset_gpu(adev);
3189 if (r)
3190 return r;
a90ad3c2
ML
3191
3192 /* Resume IP prior to SMC */
06ec9070 3193 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3194 if (r)
3195 goto error;
a90ad3c2
ML
3196
3197 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3198 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3199
7a3e0bb2
RZ
3200 r = amdgpu_device_fw_loading(adev);
3201 if (r)
3202 return r;
3203
a90ad3c2 3204 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3205 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3206 if (r)
3207 goto error;
a90ad3c2
ML
3208
3209 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3210 r = amdgpu_ib_ring_tests(adev);
a90ad3c2 3211
abc34253
ED
3212error:
3213 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3214 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3215 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3216 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3217 }
3218
3219 return r;
3220}
3221
12938fad
CK
3222/**
3223 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3224 *
3225 * @adev: amdgpu device pointer
3226 *
3227 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3228 * a hung GPU.
3229 */
3230bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3231{
3232 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3233 DRM_INFO("Timeout, but no hardware hang detected.\n");
3234 return false;
3235 }
3236
3ba7b418
AG
3237 if (amdgpu_gpu_recovery == 0)
3238 goto disabled;
3239
3240 if (amdgpu_sriov_vf(adev))
3241 return true;
3242
3243 if (amdgpu_gpu_recovery == -1) {
3244 switch (adev->asic_type) {
3245 case CHIP_TOPAZ:
3246 case CHIP_TONGA:
3247 case CHIP_FIJI:
3248 case CHIP_POLARIS10:
3249 case CHIP_POLARIS11:
3250 case CHIP_POLARIS12:
3251 case CHIP_VEGAM:
3252 case CHIP_VEGA20:
3253 case CHIP_VEGA10:
3254 case CHIP_VEGA12:
3255 break;
3256 default:
3257 goto disabled;
3258 }
12938fad
CK
3259 }
3260
3261 return true;
3ba7b418
AG
3262
3263disabled:
3264 DRM_INFO("GPU recovery disabled.\n");
3265 return false;
12938fad
CK
3266}
3267
5c6dd71e 3268
26bc5340
AG
3269static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3270 struct amdgpu_job *job,
3271 bool *need_full_reset_arg)
3272{
3273 int i, r = 0;
3274 bool need_full_reset = *need_full_reset_arg;
71182665 3275
71182665 3276 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3277 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3278 struct amdgpu_ring *ring = adev->rings[i];
3279
51687759 3280 if (!ring || !ring->sched.thread)
0875dc9e 3281 continue;
5740682e 3282
71182665
ML
3283 kthread_park(ring->sched.thread);
3284
734afd4b 3285 if (job && job->base.sched != &ring->sched)
5740682e
ML
3286 continue;
3287
67ccea60 3288 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
5740682e 3289
2f9d4084
ML
3290 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3291 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3292 }
d38ceaf9 3293
26bc5340
AG
3294
3295
3296 if (!amdgpu_sriov_vf(adev)) {
3297
3298 if (!need_full_reset)
3299 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3300
3301 if (!need_full_reset) {
3302 amdgpu_device_ip_pre_soft_reset(adev);
3303 r = amdgpu_device_ip_soft_reset(adev);
3304 amdgpu_device_ip_post_soft_reset(adev);
3305 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3306 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3307 need_full_reset = true;
3308 }
3309 }
3310
3311 if (need_full_reset)
3312 r = amdgpu_device_ip_suspend(adev);
3313
3314 *need_full_reset_arg = need_full_reset;
3315 }
3316
3317 return r;
3318}
3319
3320static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3321 struct list_head *device_list_handle,
3322 bool *need_full_reset_arg)
3323{
3324 struct amdgpu_device *tmp_adev = NULL;
3325 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3326 int r = 0;
3327
3328 /*
3329 * ASIC reset has to be done on all HGMI hive nodes ASAP
3330 * to allow proper links negotiation in FW (within 1 sec)
3331 */
3332 if (need_full_reset) {
3333 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3334 r = amdgpu_asic_reset(tmp_adev);
3335 if (r)
3336 DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
3337 r, tmp_adev->ddev->unique);
3338 }
3339 }
3340
3341
3342 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3343 if (need_full_reset) {
3344 /* post card */
3345 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3346 DRM_WARN("asic atom init failed!");
3347
3348 if (!r) {
3349 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3350 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3351 if (r)
3352 goto out;
3353
3354 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3355 if (vram_lost) {
3356 DRM_ERROR("VRAM is lost!\n");
3357 atomic_inc(&tmp_adev->vram_lost_counter);
3358 }
3359
3360 r = amdgpu_gtt_mgr_recover(
3361 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3362 if (r)
3363 goto out;
3364
3365 r = amdgpu_device_fw_loading(tmp_adev);
3366 if (r)
3367 return r;
3368
3369 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3370 if (r)
3371 goto out;
3372
3373 if (vram_lost)
3374 amdgpu_device_fill_reset_magic(tmp_adev);
3375
3376 /* Update PSP FW topology after reset */
3377 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3378 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3379 }
3380 }
3381
3382
3383out:
3384 if (!r) {
3385 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3386 r = amdgpu_ib_ring_tests(tmp_adev);
3387 if (r) {
3388 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3389 r = amdgpu_device_ip_suspend(tmp_adev);
3390 need_full_reset = true;
3391 r = -EAGAIN;
3392 goto end;
3393 }
3394 }
3395
3396 if (!r)
3397 r = amdgpu_device_recover_vram(tmp_adev);
3398 else
3399 tmp_adev->asic_reset_res = r;
3400 }
3401
3402end:
3403 *need_full_reset_arg = need_full_reset;
3404 return r;
3405}
3406
3407static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
3408 struct amdgpu_job *job)
3409{
3410 int i;
5740682e 3411
71182665
ML
3412 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3413 struct amdgpu_ring *ring = adev->rings[i];
53cdccd5 3414
71182665
ML
3415 if (!ring || !ring->sched.thread)
3416 continue;
5740682e 3417
71182665
ML
3418 /* only need recovery sched of the given job's ring
3419 * or all rings (in the case @job is NULL)
3420 * after above amdgpu_reset accomplished
3421 */
26bc5340 3422 if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
1b1f42d8 3423 drm_sched_job_recovery(&ring->sched);
5740682e 3424
71182665 3425 kthread_unpark(ring->sched.thread);
d38ceaf9
AD
3426 }
3427
bf830604 3428 if (!amdgpu_device_has_dc_support(adev)) {
4562236b 3429 drm_helper_resume_force_mode(adev->ddev);
5740682e 3430 }
d38ceaf9 3431
26bc5340
AG
3432 adev->asic_reset_res = 0;
3433}
5740682e 3434
26bc5340
AG
3435static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
3436{
3437 mutex_lock(&adev->lock_reset);
3438 atomic_inc(&adev->gpu_reset_counter);
3439 adev->in_gpu_reset = 1;
3440 /* Block kfd */
3441 amdgpu_amdkfd_pre_reset(adev);
3442}
d38ceaf9 3443
26bc5340
AG
3444static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3445{
5c6dd71e
SL
3446 /*unlock kfd */
3447 amdgpu_amdkfd_post_reset(adev);
89041940 3448 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3449 adev->in_gpu_reset = 0;
3450 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3451}
3452
3453
3454/**
3455 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3456 *
3457 * @adev: amdgpu device pointer
3458 * @job: which job trigger hang
3459 *
3460 * Attempt to reset the GPU if it has hung (all asics).
3461 * Attempt to do soft-reset or full-reset and reinitialize Asic
3462 * Returns 0 for success or an error on failure.
3463 */
3464
3465int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3466 struct amdgpu_job *job)
3467{
3468 int r;
3469 struct amdgpu_hive_info *hive = NULL;
3470 bool need_full_reset = false;
3471 struct amdgpu_device *tmp_adev = NULL;
3472 struct list_head device_list, *device_list_handle = NULL;
3473
3474 INIT_LIST_HEAD(&device_list);
3475
3476 dev_info(adev->dev, "GPU reset begin!\n");
3477
3478 /*
3479 * In case of XGMI hive disallow concurrent resets to be triggered
3480 * by different nodes. No point also since the one node already executing
3481 * reset will also reset all the other nodes in the hive.
3482 */
3483 hive = amdgpu_get_xgmi_hive(adev);
3484 if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
3485 !mutex_trylock(&hive->hive_lock))
3486 return 0;
3487
3488 /* Start with adev pre asic reset first for soft reset check.*/
3489 amdgpu_device_lock_adev(adev);
3490 r = amdgpu_device_pre_asic_reset(adev,
3491 job,
3492 &need_full_reset);
3493 if (r) {
3494 /*TODO Should we stop ?*/
3495 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3496 r, adev->ddev->unique);
3497 adev->asic_reset_res = r;
3498 }
3499
3500 /* Build list of devices to reset */
3501 if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
3502 if (!hive) {
3503 amdgpu_device_unlock_adev(adev);
3504 return -ENODEV;
3505 }
3506
3507 /*
3508 * In case we are in XGMI hive mode device reset is done for all the
3509 * nodes in the hive to retrain all XGMI links and hence the reset
3510 * sequence is executed in loop on all nodes.
3511 */
3512 device_list_handle = &hive->device_list;
3513 } else {
3514 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3515 device_list_handle = &device_list;
3516 }
3517
3518retry: /* Rest of adevs pre asic reset from XGMI hive. */
3519 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3520
3521 if (tmp_adev == adev)
3522 continue;
3523
3524 dev_info(tmp_adev->dev, "GPU reset begin for drm dev %s!\n", adev->ddev->unique);
3525
3526 amdgpu_device_lock_adev(tmp_adev);
3527 r = amdgpu_device_pre_asic_reset(tmp_adev,
3528 NULL,
3529 &need_full_reset);
3530 /*TODO Should we stop ?*/
3531 if (r) {
3532 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3533 r, tmp_adev->ddev->unique);
3534 tmp_adev->asic_reset_res = r;
3535 }
3536 }
3537
3538 /* Actual ASIC resets if needed.*/
3539 /* TODO Implement XGMI hive reset logic for SRIOV */
3540 if (amdgpu_sriov_vf(adev)) {
3541 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3542 if (r)
3543 adev->asic_reset_res = r;
3544 } else {
3545 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3546 if (r && r == -EAGAIN)
3547 goto retry;
3548 }
3549
3550 /* Post ASIC reset for all devs .*/
3551 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3552 amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
3553
3554 if (r) {
3555 /* bad news, how to tell it to userspace ? */
3556 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3557 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3558 } else {
3559 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3560 }
3561
3562 amdgpu_device_unlock_adev(tmp_adev);
3563 }
3564
3565 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
3566 mutex_unlock(&hive->hive_lock);
3567
3568 if (r)
3569 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3570 return r;
3571}
3572
e3ecdffa
AD
3573/**
3574 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3575 *
3576 * @adev: amdgpu_device pointer
3577 *
3578 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3579 * and lanes) of the slot the device is in. Handles APUs and
3580 * virtualized environments where PCIE config space may not be available.
3581 */
5494d864 3582static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3583{
5d9a6330
AD
3584 struct pci_dev *pdev;
3585 enum pci_bus_speed speed_cap;
3586 enum pcie_link_width link_width;
d0dd7f0c 3587
cd474ba0
AD
3588 if (amdgpu_pcie_gen_cap)
3589 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3590
cd474ba0
AD
3591 if (amdgpu_pcie_lane_cap)
3592 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3593
cd474ba0
AD
3594 /* covers APUs as well */
3595 if (pci_is_root_bus(adev->pdev->bus)) {
3596 if (adev->pm.pcie_gen_mask == 0)
3597 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3598 if (adev->pm.pcie_mlw_mask == 0)
3599 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3600 return;
cd474ba0 3601 }
d0dd7f0c 3602
cd474ba0 3603 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3604 /* asic caps */
3605 pdev = adev->pdev;
3606 speed_cap = pcie_get_speed_cap(pdev);
3607 if (speed_cap == PCI_SPEED_UNKNOWN) {
3608 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3609 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3610 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3611 } else {
5d9a6330
AD
3612 if (speed_cap == PCIE_SPEED_16_0GT)
3613 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3614 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3615 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3616 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3617 else if (speed_cap == PCIE_SPEED_8_0GT)
3618 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3619 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3620 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3621 else if (speed_cap == PCIE_SPEED_5_0GT)
3622 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3623 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3624 else
3625 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3626 }
3627 /* platform caps */
3628 pdev = adev->ddev->pdev->bus->self;
3629 speed_cap = pcie_get_speed_cap(pdev);
3630 if (speed_cap == PCI_SPEED_UNKNOWN) {
3631 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3632 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3633 } else {
3634 if (speed_cap == PCIE_SPEED_16_0GT)
3635 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3636 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3637 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3638 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3639 else if (speed_cap == PCIE_SPEED_8_0GT)
3640 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3641 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3642 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3643 else if (speed_cap == PCIE_SPEED_5_0GT)
3644 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3645 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3646 else
3647 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3648
cd474ba0
AD
3649 }
3650 }
3651 if (adev->pm.pcie_mlw_mask == 0) {
5d9a6330
AD
3652 pdev = adev->ddev->pdev->bus->self;
3653 link_width = pcie_get_width_cap(pdev);
3654 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3655 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3656 } else {
3657 switch (link_width) {
3658 case PCIE_LNK_X32:
cd474ba0
AD
3659 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3660 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3661 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3662 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3663 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3664 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3665 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3666 break;
5d9a6330 3667 case PCIE_LNK_X16:
cd474ba0
AD
3668 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3669 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3670 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3671 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3672 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3673 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3674 break;
5d9a6330 3675 case PCIE_LNK_X12:
cd474ba0
AD
3676 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3677 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3678 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3679 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3680 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3681 break;
5d9a6330 3682 case PCIE_LNK_X8:
cd474ba0
AD
3683 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3684 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3685 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3686 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3687 break;
5d9a6330 3688 case PCIE_LNK_X4:
cd474ba0
AD
3689 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3690 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3691 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3692 break;
5d9a6330 3693 case PCIE_LNK_X2:
cd474ba0
AD
3694 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3695 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3696 break;
5d9a6330 3697 case PCIE_LNK_X1:
cd474ba0
AD
3698 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3699 break;
3700 default:
3701 break;
3702 }
d0dd7f0c
AD
3703 }
3704 }
3705}
d38ceaf9 3706