drm/amd/display: Set requested plane state DCC params for GFX9
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9
AD
32#include <drm/drmP.h>
33#include <drm/drm_crtc_helper.h>
4562236b 34#include <drm/drm_atomic_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
5183411b
AG
62#include "amdgpu_xgmi.h"
63
e2a75f88 64MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 65MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 66MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 67MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 68MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 69
2dc80b00
S
70#define AMDGPU_RESUME_MS 2000
71
d38ceaf9 72static const char *amdgpu_asic_name[] = {
da69c161
KW
73 "TAHITI",
74 "PITCAIRN",
75 "VERDE",
76 "OLAND",
77 "HAINAN",
d38ceaf9
AD
78 "BONAIRE",
79 "KAVERI",
80 "KABINI",
81 "HAWAII",
82 "MULLINS",
83 "TOPAZ",
84 "TONGA",
48299f95 85 "FIJI",
d38ceaf9 86 "CARRIZO",
139f4917 87 "STONEY",
2cc0c0b5
FC
88 "POLARIS10",
89 "POLARIS11",
c4642a47 90 "POLARIS12",
48ff108d 91 "VEGAM",
d4196f01 92 "VEGA10",
8fab806a 93 "VEGA12",
956fcddc 94 "VEGA20",
2ca8a5d2 95 "RAVEN",
d38ceaf9
AD
96 "LAST",
97};
98
5494d864
AD
99static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
100
e3ecdffa
AD
101/**
102 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
103 *
104 * @dev: drm_device pointer
105 *
106 * Returns true if the device is a dGPU with HG/PX power control,
107 * otherwise return false.
108 */
d38ceaf9
AD
109bool amdgpu_device_is_px(struct drm_device *dev)
110{
111 struct amdgpu_device *adev = dev->dev_private;
112
2f7d10b3 113 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
114 return true;
115 return false;
116}
117
118/*
119 * MMIO register access helper functions.
120 */
e3ecdffa
AD
121/**
122 * amdgpu_mm_rreg - read a memory mapped IO register
123 *
124 * @adev: amdgpu_device pointer
125 * @reg: dword aligned register offset
126 * @acc_flags: access flags which require special behavior
127 *
128 * Returns the 32 bit value from the offset specified.
129 */
d38ceaf9 130uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 131 uint32_t acc_flags)
d38ceaf9 132{
f4b373f4
TSD
133 uint32_t ret;
134
43ca8efa 135 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 136 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 137
15d72fd7 138 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 139 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
140 else {
141 unsigned long flags;
d38ceaf9
AD
142
143 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
144 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
145 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
146 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 147 }
f4b373f4
TSD
148 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
149 return ret;
d38ceaf9
AD
150}
151
421a2a30
ML
152/*
153 * MMIO register read with bytes helper functions
154 * @offset:bytes offset from MMIO start
155 *
156*/
157
e3ecdffa
AD
158/**
159 * amdgpu_mm_rreg8 - read a memory mapped IO register
160 *
161 * @adev: amdgpu_device pointer
162 * @offset: byte aligned register offset
163 *
164 * Returns the 8 bit value from the offset specified.
165 */
421a2a30
ML
166uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
167 if (offset < adev->rmmio_size)
168 return (readb(adev->rmmio + offset));
169 BUG();
170}
171
172/*
173 * MMIO register write with bytes helper functions
174 * @offset:bytes offset from MMIO start
175 * @value: the value want to be written to the register
176 *
177*/
e3ecdffa
AD
178/**
179 * amdgpu_mm_wreg8 - read a memory mapped IO register
180 *
181 * @adev: amdgpu_device pointer
182 * @offset: byte aligned register offset
183 * @value: 8 bit value to write
184 *
185 * Writes the value specified to the offset specified.
186 */
421a2a30
ML
187void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
188 if (offset < adev->rmmio_size)
189 writeb(value, adev->rmmio + offset);
190 else
191 BUG();
192}
193
e3ecdffa
AD
194/**
195 * amdgpu_mm_wreg - write to a memory mapped IO register
196 *
197 * @adev: amdgpu_device pointer
198 * @reg: dword aligned register offset
199 * @v: 32 bit value to write to the register
200 * @acc_flags: access flags which require special behavior
201 *
202 * Writes the value specified to the offset specified.
203 */
d38ceaf9 204void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 205 uint32_t acc_flags)
d38ceaf9 206{
f4b373f4 207 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 208
47ed4e1c
KW
209 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
210 adev->last_mm_index = v;
211 }
212
43ca8efa 213 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 214 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 215
15d72fd7 216 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
217 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
218 else {
219 unsigned long flags;
220
221 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
222 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
223 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
224 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
225 }
47ed4e1c
KW
226
227 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
228 udelay(500);
229 }
d38ceaf9
AD
230}
231
e3ecdffa
AD
232/**
233 * amdgpu_io_rreg - read an IO register
234 *
235 * @adev: amdgpu_device pointer
236 * @reg: dword aligned register offset
237 *
238 * Returns the 32 bit value from the offset specified.
239 */
d38ceaf9
AD
240u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
241{
242 if ((reg * 4) < adev->rio_mem_size)
243 return ioread32(adev->rio_mem + (reg * 4));
244 else {
245 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
246 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
247 }
248}
249
e3ecdffa
AD
250/**
251 * amdgpu_io_wreg - write to an IO register
252 *
253 * @adev: amdgpu_device pointer
254 * @reg: dword aligned register offset
255 * @v: 32 bit value to write to the register
256 *
257 * Writes the value specified to the offset specified.
258 */
d38ceaf9
AD
259void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
260{
47ed4e1c
KW
261 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
262 adev->last_mm_index = v;
263 }
d38ceaf9
AD
264
265 if ((reg * 4) < adev->rio_mem_size)
266 iowrite32(v, adev->rio_mem + (reg * 4));
267 else {
268 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
269 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
270 }
47ed4e1c
KW
271
272 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
273 udelay(500);
274 }
d38ceaf9
AD
275}
276
277/**
278 * amdgpu_mm_rdoorbell - read a doorbell dword
279 *
280 * @adev: amdgpu_device pointer
281 * @index: doorbell index
282 *
283 * Returns the value in the doorbell aperture at the
284 * requested doorbell index (CIK).
285 */
286u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
287{
288 if (index < adev->doorbell.num_doorbells) {
289 return readl(adev->doorbell.ptr + index);
290 } else {
291 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
292 return 0;
293 }
294}
295
296/**
297 * amdgpu_mm_wdoorbell - write a doorbell dword
298 *
299 * @adev: amdgpu_device pointer
300 * @index: doorbell index
301 * @v: value to write
302 *
303 * Writes @v to the doorbell aperture at the
304 * requested doorbell index (CIK).
305 */
306void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
307{
308 if (index < adev->doorbell.num_doorbells) {
309 writel(v, adev->doorbell.ptr + index);
310 } else {
311 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
312 }
313}
314
832be404
KW
315/**
316 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
317 *
318 * @adev: amdgpu_device pointer
319 * @index: doorbell index
320 *
321 * Returns the value in the doorbell aperture at the
322 * requested doorbell index (VEGA10+).
323 */
324u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
325{
326 if (index < adev->doorbell.num_doorbells) {
327 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
328 } else {
329 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
330 return 0;
331 }
332}
333
334/**
335 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
336 *
337 * @adev: amdgpu_device pointer
338 * @index: doorbell index
339 * @v: value to write
340 *
341 * Writes @v to the doorbell aperture at the
342 * requested doorbell index (VEGA10+).
343 */
344void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
345{
346 if (index < adev->doorbell.num_doorbells) {
347 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
348 } else {
349 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
350 }
351}
352
d38ceaf9
AD
353/**
354 * amdgpu_invalid_rreg - dummy reg read function
355 *
356 * @adev: amdgpu device pointer
357 * @reg: offset of register
358 *
359 * Dummy register read function. Used for register blocks
360 * that certain asics don't have (all asics).
361 * Returns the value in the register.
362 */
363static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
364{
365 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
366 BUG();
367 return 0;
368}
369
370/**
371 * amdgpu_invalid_wreg - dummy reg write function
372 *
373 * @adev: amdgpu device pointer
374 * @reg: offset of register
375 * @v: value to write to the register
376 *
377 * Dummy register read function. Used for register blocks
378 * that certain asics don't have (all asics).
379 */
380static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
381{
382 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
383 reg, v);
384 BUG();
385}
386
387/**
388 * amdgpu_block_invalid_rreg - dummy reg read function
389 *
390 * @adev: amdgpu device pointer
391 * @block: offset of instance
392 * @reg: offset of register
393 *
394 * Dummy register read function. Used for register blocks
395 * that certain asics don't have (all asics).
396 * Returns the value in the register.
397 */
398static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
399 uint32_t block, uint32_t reg)
400{
401 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
402 reg, block);
403 BUG();
404 return 0;
405}
406
407/**
408 * amdgpu_block_invalid_wreg - dummy reg write function
409 *
410 * @adev: amdgpu device pointer
411 * @block: offset of instance
412 * @reg: offset of register
413 * @v: value to write to the register
414 *
415 * Dummy register read function. Used for register blocks
416 * that certain asics don't have (all asics).
417 */
418static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
419 uint32_t block,
420 uint32_t reg, uint32_t v)
421{
422 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
423 reg, block, v);
424 BUG();
425}
426
e3ecdffa
AD
427/**
428 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
429 *
430 * @adev: amdgpu device pointer
431 *
432 * Allocates a scratch page of VRAM for use by various things in the
433 * driver.
434 */
06ec9070 435static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 436{
a4a02777
CK
437 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
438 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
439 &adev->vram_scratch.robj,
440 &adev->vram_scratch.gpu_addr,
441 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
442}
443
e3ecdffa
AD
444/**
445 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
446 *
447 * @adev: amdgpu device pointer
448 *
449 * Frees the VRAM scratch page.
450 */
06ec9070 451static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 452{
078af1a3 453 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
454}
455
456/**
9c3f2b54 457 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
458 *
459 * @adev: amdgpu_device pointer
460 * @registers: pointer to the register array
461 * @array_size: size of the register array
462 *
463 * Programs an array or registers with and and or masks.
464 * This is a helper for setting golden registers.
465 */
9c3f2b54
AD
466void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
467 const u32 *registers,
468 const u32 array_size)
d38ceaf9
AD
469{
470 u32 tmp, reg, and_mask, or_mask;
471 int i;
472
473 if (array_size % 3)
474 return;
475
476 for (i = 0; i < array_size; i +=3) {
477 reg = registers[i + 0];
478 and_mask = registers[i + 1];
479 or_mask = registers[i + 2];
480
481 if (and_mask == 0xffffffff) {
482 tmp = or_mask;
483 } else {
484 tmp = RREG32(reg);
485 tmp &= ~and_mask;
486 tmp |= or_mask;
487 }
488 WREG32(reg, tmp);
489 }
490}
491
e3ecdffa
AD
492/**
493 * amdgpu_device_pci_config_reset - reset the GPU
494 *
495 * @adev: amdgpu_device pointer
496 *
497 * Resets the GPU using the pci config reset sequence.
498 * Only applicable to asics prior to vega10.
499 */
8111c387 500void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
501{
502 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
503}
504
505/*
506 * GPU doorbell aperture helpers function.
507 */
508/**
06ec9070 509 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
510 *
511 * @adev: amdgpu_device pointer
512 *
513 * Init doorbell driver information (CIK)
514 * Returns 0 on success, error on failure.
515 */
06ec9070 516static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 517{
6585661d 518
705e519e
CK
519 /* No doorbell on SI hardware generation */
520 if (adev->asic_type < CHIP_BONAIRE) {
521 adev->doorbell.base = 0;
522 adev->doorbell.size = 0;
523 adev->doorbell.num_doorbells = 0;
524 adev->doorbell.ptr = NULL;
525 return 0;
526 }
527
d6895ad3
CK
528 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
529 return -EINVAL;
530
22357775
AD
531 amdgpu_asic_init_doorbell_index(adev);
532
d38ceaf9
AD
533 /* doorbell bar mapping */
534 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
535 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
536
edf600da 537 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 538 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
539 if (adev->doorbell.num_doorbells == 0)
540 return -EINVAL;
541
ec3db8a6 542 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
543 * paging queue doorbell use the second page. The
544 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
545 * doorbells are in the first page. So with paging queue enabled,
546 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
547 */
548 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 549 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 550
8972e5d2
CK
551 adev->doorbell.ptr = ioremap(adev->doorbell.base,
552 adev->doorbell.num_doorbells *
553 sizeof(u32));
554 if (adev->doorbell.ptr == NULL)
d38ceaf9 555 return -ENOMEM;
d38ceaf9
AD
556
557 return 0;
558}
559
560/**
06ec9070 561 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
562 *
563 * @adev: amdgpu_device pointer
564 *
565 * Tear down doorbell driver information (CIK)
566 */
06ec9070 567static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
568{
569 iounmap(adev->doorbell.ptr);
570 adev->doorbell.ptr = NULL;
571}
572
22cb0164 573
d38ceaf9
AD
574
575/*
06ec9070 576 * amdgpu_device_wb_*()
455a7bc2 577 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 578 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
579 */
580
581/**
06ec9070 582 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
583 *
584 * @adev: amdgpu_device pointer
585 *
586 * Disables Writeback and frees the Writeback memory (all asics).
587 * Used at driver shutdown.
588 */
06ec9070 589static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
590{
591 if (adev->wb.wb_obj) {
a76ed485
AD
592 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
593 &adev->wb.gpu_addr,
594 (void **)&adev->wb.wb);
d38ceaf9
AD
595 adev->wb.wb_obj = NULL;
596 }
597}
598
599/**
06ec9070 600 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
601 *
602 * @adev: amdgpu_device pointer
603 *
455a7bc2 604 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
605 * Used at driver startup.
606 * Returns 0 on success or an -error on failure.
607 */
06ec9070 608static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
609{
610 int r;
611
612 if (adev->wb.wb_obj == NULL) {
97407b63
AD
613 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
614 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
615 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
616 &adev->wb.wb_obj, &adev->wb.gpu_addr,
617 (void **)&adev->wb.wb);
d38ceaf9
AD
618 if (r) {
619 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
620 return r;
621 }
d38ceaf9
AD
622
623 adev->wb.num_wb = AMDGPU_MAX_WB;
624 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
625
626 /* clear wb memory */
73469585 627 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
628 }
629
630 return 0;
631}
632
633/**
131b4b36 634 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
635 *
636 * @adev: amdgpu_device pointer
637 * @wb: wb index
638 *
639 * Allocate a wb slot for use by the driver (all asics).
640 * Returns 0 on success or -EINVAL on failure.
641 */
131b4b36 642int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
643{
644 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 645
97407b63 646 if (offset < adev->wb.num_wb) {
7014285a 647 __set_bit(offset, adev->wb.used);
63ae07ca 648 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
649 return 0;
650 } else {
651 return -EINVAL;
652 }
653}
654
d38ceaf9 655/**
131b4b36 656 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
657 *
658 * @adev: amdgpu_device pointer
659 * @wb: wb index
660 *
661 * Free a wb slot allocated for use by the driver (all asics)
662 */
131b4b36 663void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 664{
73469585 665 wb >>= 3;
d38ceaf9 666 if (wb < adev->wb.num_wb)
73469585 667 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
668}
669
d6895ad3
CK
670/**
671 * amdgpu_device_resize_fb_bar - try to resize FB BAR
672 *
673 * @adev: amdgpu_device pointer
674 *
675 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
676 * to fail, but if any of the BARs is not accessible after the size we abort
677 * driver loading by returning -ENODEV.
678 */
679int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
680{
770d13b1 681 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 682 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
683 struct pci_bus *root;
684 struct resource *res;
685 unsigned i;
d6895ad3
CK
686 u16 cmd;
687 int r;
688
0c03b912 689 /* Bypass for VF */
690 if (amdgpu_sriov_vf(adev))
691 return 0;
692
31b8adab
CK
693 /* Check if the root BUS has 64bit memory resources */
694 root = adev->pdev->bus;
695 while (root->parent)
696 root = root->parent;
697
698 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 699 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
700 res->start > 0x100000000ull)
701 break;
702 }
703
704 /* Trying to resize is pointless without a root hub window above 4GB */
705 if (!res)
706 return 0;
707
d6895ad3
CK
708 /* Disable memory decoding while we change the BAR addresses and size */
709 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
710 pci_write_config_word(adev->pdev, PCI_COMMAND,
711 cmd & ~PCI_COMMAND_MEMORY);
712
713 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 714 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
715 if (adev->asic_type >= CHIP_BONAIRE)
716 pci_release_resource(adev->pdev, 2);
717
718 pci_release_resource(adev->pdev, 0);
719
720 r = pci_resize_resource(adev->pdev, 0, rbar_size);
721 if (r == -ENOSPC)
722 DRM_INFO("Not enough PCI address space for a large BAR.");
723 else if (r && r != -ENOTSUPP)
724 DRM_ERROR("Problem resizing BAR0 (%d).", r);
725
726 pci_assign_unassigned_bus_resources(adev->pdev->bus);
727
728 /* When the doorbell or fb BAR isn't available we have no chance of
729 * using the device.
730 */
06ec9070 731 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
732 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
733 return -ENODEV;
734
735 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
736
737 return 0;
738}
a05502e5 739
d38ceaf9
AD
740/*
741 * GPU helpers function.
742 */
743/**
39c640c0 744 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
745 *
746 * @adev: amdgpu_device pointer
747 *
c836fec5
JQ
748 * Check if the asic has been initialized (all asics) at driver startup
749 * or post is needed if hw reset is performed.
750 * Returns true if need or false if not.
d38ceaf9 751 */
39c640c0 752bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
753{
754 uint32_t reg;
755
bec86378
ML
756 if (amdgpu_sriov_vf(adev))
757 return false;
758
759 if (amdgpu_passthrough(adev)) {
1da2c326
ML
760 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
761 * some old smc fw still need driver do vPost otherwise gpu hang, while
762 * those smc fw version above 22.15 doesn't have this flaw, so we force
763 * vpost executed for smc version below 22.15
bec86378
ML
764 */
765 if (adev->asic_type == CHIP_FIJI) {
766 int err;
767 uint32_t fw_ver;
768 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
769 /* force vPost if error occured */
770 if (err)
771 return true;
772
773 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
774 if (fw_ver < 0x00160e00)
775 return true;
bec86378 776 }
bec86378 777 }
91fe77eb 778
779 if (adev->has_hw_reset) {
780 adev->has_hw_reset = false;
781 return true;
782 }
783
784 /* bios scratch used on CIK+ */
785 if (adev->asic_type >= CHIP_BONAIRE)
786 return amdgpu_atombios_scratch_need_asic_init(adev);
787
788 /* check MEM_SIZE for older asics */
789 reg = amdgpu_asic_get_config_memsize(adev);
790
791 if ((reg != 0) && (reg != 0xffffffff))
792 return false;
793
794 return true;
bec86378
ML
795}
796
d38ceaf9
AD
797/* if we get transitioned to only one device, take VGA back */
798/**
06ec9070 799 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
800 *
801 * @cookie: amdgpu_device pointer
802 * @state: enable/disable vga decode
803 *
804 * Enable/disable vga decode (all asics).
805 * Returns VGA resource flags.
806 */
06ec9070 807static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
808{
809 struct amdgpu_device *adev = cookie;
810 amdgpu_asic_set_vga_state(adev, state);
811 if (state)
812 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
813 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
814 else
815 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
816}
817
e3ecdffa
AD
818/**
819 * amdgpu_device_check_block_size - validate the vm block size
820 *
821 * @adev: amdgpu_device pointer
822 *
823 * Validates the vm block size specified via module parameter.
824 * The vm block size defines number of bits in page table versus page directory,
825 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
826 * page table and the remaining bits are in the page directory.
827 */
06ec9070 828static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
829{
830 /* defines number of bits in page table versus page directory,
831 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
832 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
833 if (amdgpu_vm_block_size == -1)
834 return;
a1adf8be 835
bab4fee7 836 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
837 dev_warn(adev->dev, "VM page table size (%d) too small\n",
838 amdgpu_vm_block_size);
97489129 839 amdgpu_vm_block_size = -1;
a1adf8be 840 }
a1adf8be
CZ
841}
842
e3ecdffa
AD
843/**
844 * amdgpu_device_check_vm_size - validate the vm size
845 *
846 * @adev: amdgpu_device pointer
847 *
848 * Validates the vm size in GB specified via module parameter.
849 * The VM size is the size of the GPU virtual memory space in GB.
850 */
06ec9070 851static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 852{
64dab074
AD
853 /* no need to check the default value */
854 if (amdgpu_vm_size == -1)
855 return;
856
83ca145d
ZJ
857 if (amdgpu_vm_size < 1) {
858 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
859 amdgpu_vm_size);
f3368128 860 amdgpu_vm_size = -1;
83ca145d 861 }
83ca145d
ZJ
862}
863
7951e376
RZ
864static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
865{
866 struct sysinfo si;
867 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
868 uint64_t total_memory;
869 uint64_t dram_size_seven_GB = 0x1B8000000;
870 uint64_t dram_size_three_GB = 0xB8000000;
871
872 if (amdgpu_smu_memory_pool_size == 0)
873 return;
874
875 if (!is_os_64) {
876 DRM_WARN("Not 64-bit OS, feature not supported\n");
877 goto def_value;
878 }
879 si_meminfo(&si);
880 total_memory = (uint64_t)si.totalram * si.mem_unit;
881
882 if ((amdgpu_smu_memory_pool_size == 1) ||
883 (amdgpu_smu_memory_pool_size == 2)) {
884 if (total_memory < dram_size_three_GB)
885 goto def_value1;
886 } else if ((amdgpu_smu_memory_pool_size == 4) ||
887 (amdgpu_smu_memory_pool_size == 8)) {
888 if (total_memory < dram_size_seven_GB)
889 goto def_value1;
890 } else {
891 DRM_WARN("Smu memory pool size not supported\n");
892 goto def_value;
893 }
894 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
895
896 return;
897
898def_value1:
899 DRM_WARN("No enough system memory\n");
900def_value:
901 adev->pm.smu_prv_buffer_size = 0;
902}
903
d38ceaf9 904/**
06ec9070 905 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
906 *
907 * @adev: amdgpu_device pointer
908 *
909 * Validates certain module parameters and updates
910 * the associated values used by the driver (all asics).
911 */
06ec9070 912static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 913{
5b011235
CZ
914 if (amdgpu_sched_jobs < 4) {
915 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
916 amdgpu_sched_jobs);
917 amdgpu_sched_jobs = 4;
76117507 918 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
919 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
920 amdgpu_sched_jobs);
921 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
922 }
d38ceaf9 923
83e74db6 924 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
925 /* gart size must be greater or equal to 32M */
926 dev_warn(adev->dev, "gart size (%d) too small\n",
927 amdgpu_gart_size);
83e74db6 928 amdgpu_gart_size = -1;
d38ceaf9
AD
929 }
930
36d38372 931 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 932 /* gtt size must be greater or equal to 32M */
36d38372
CK
933 dev_warn(adev->dev, "gtt size (%d) too small\n",
934 amdgpu_gtt_size);
935 amdgpu_gtt_size = -1;
d38ceaf9
AD
936 }
937
d07f14be
RH
938 /* valid range is between 4 and 9 inclusive */
939 if (amdgpu_vm_fragment_size != -1 &&
940 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
941 dev_warn(adev->dev, "valid range is between 4 and 9\n");
942 amdgpu_vm_fragment_size = -1;
943 }
944
7951e376
RZ
945 amdgpu_device_check_smu_prv_buffer_size(adev);
946
06ec9070 947 amdgpu_device_check_vm_size(adev);
d38ceaf9 948
06ec9070 949 amdgpu_device_check_block_size(adev);
6a7f76e7 950
526bae37 951 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 952 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
953 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
954 amdgpu_vram_page_split);
955 amdgpu_vram_page_split = 1024;
956 }
8854695a
AG
957
958 if (amdgpu_lockup_timeout == 0) {
959 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
960 amdgpu_lockup_timeout = 10000;
961 }
19aede77
AD
962
963 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
d38ceaf9
AD
964}
965
966/**
967 * amdgpu_switcheroo_set_state - set switcheroo state
968 *
969 * @pdev: pci dev pointer
1694467b 970 * @state: vga_switcheroo state
d38ceaf9
AD
971 *
972 * Callback for the switcheroo driver. Suspends or resumes the
973 * the asics before or after it is powered up using ACPI methods.
974 */
975static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
976{
977 struct drm_device *dev = pci_get_drvdata(pdev);
978
979 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
980 return;
981
982 if (state == VGA_SWITCHEROO_ON) {
7ca85295 983 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
984 /* don't suspend or resume card normally */
985 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
986
810ddc3a 987 amdgpu_device_resume(dev, true, true);
d38ceaf9 988
d38ceaf9
AD
989 dev->switch_power_state = DRM_SWITCH_POWER_ON;
990 drm_kms_helper_poll_enable(dev);
991 } else {
7ca85295 992 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
993 drm_kms_helper_poll_disable(dev);
994 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 995 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
996 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
997 }
998}
999
1000/**
1001 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1002 *
1003 * @pdev: pci dev pointer
1004 *
1005 * Callback for the switcheroo driver. Check of the switcheroo
1006 * state can be changed.
1007 * Returns true if the state can be changed, false if not.
1008 */
1009static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1010{
1011 struct drm_device *dev = pci_get_drvdata(pdev);
1012
1013 /*
1014 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1015 * locking inversion with the driver load path. And the access here is
1016 * completely racy anyway. So don't bother with locking for now.
1017 */
1018 return dev->open_count == 0;
1019}
1020
1021static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1022 .set_gpu_state = amdgpu_switcheroo_set_state,
1023 .reprobe = NULL,
1024 .can_switch = amdgpu_switcheroo_can_switch,
1025};
1026
e3ecdffa
AD
1027/**
1028 * amdgpu_device_ip_set_clockgating_state - set the CG state
1029 *
87e3f136 1030 * @dev: amdgpu_device pointer
e3ecdffa
AD
1031 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1032 * @state: clockgating state (gate or ungate)
1033 *
1034 * Sets the requested clockgating state for all instances of
1035 * the hardware IP specified.
1036 * Returns the error code from the last instance.
1037 */
43fa561f 1038int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1039 enum amd_ip_block_type block_type,
1040 enum amd_clockgating_state state)
d38ceaf9 1041{
43fa561f 1042 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1043 int i, r = 0;
1044
1045 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1046 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1047 continue;
c722865a
RZ
1048 if (adev->ip_blocks[i].version->type != block_type)
1049 continue;
1050 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1051 continue;
1052 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1053 (void *)adev, state);
1054 if (r)
1055 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1056 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1057 }
1058 return r;
1059}
1060
e3ecdffa
AD
1061/**
1062 * amdgpu_device_ip_set_powergating_state - set the PG state
1063 *
87e3f136 1064 * @dev: amdgpu_device pointer
e3ecdffa
AD
1065 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1066 * @state: powergating state (gate or ungate)
1067 *
1068 * Sets the requested powergating state for all instances of
1069 * the hardware IP specified.
1070 * Returns the error code from the last instance.
1071 */
43fa561f 1072int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1073 enum amd_ip_block_type block_type,
1074 enum amd_powergating_state state)
d38ceaf9 1075{
43fa561f 1076 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1077 int i, r = 0;
1078
1079 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1080 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1081 continue;
c722865a
RZ
1082 if (adev->ip_blocks[i].version->type != block_type)
1083 continue;
1084 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1085 continue;
1086 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1087 (void *)adev, state);
1088 if (r)
1089 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1090 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1091 }
1092 return r;
1093}
1094
e3ecdffa
AD
1095/**
1096 * amdgpu_device_ip_get_clockgating_state - get the CG state
1097 *
1098 * @adev: amdgpu_device pointer
1099 * @flags: clockgating feature flags
1100 *
1101 * Walks the list of IPs on the device and updates the clockgating
1102 * flags for each IP.
1103 * Updates @flags with the feature flags for each hardware IP where
1104 * clockgating is enabled.
1105 */
2990a1fc
AD
1106void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1107 u32 *flags)
6cb2d4e4
HR
1108{
1109 int i;
1110
1111 for (i = 0; i < adev->num_ip_blocks; i++) {
1112 if (!adev->ip_blocks[i].status.valid)
1113 continue;
1114 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1115 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1116 }
1117}
1118
e3ecdffa
AD
1119/**
1120 * amdgpu_device_ip_wait_for_idle - wait for idle
1121 *
1122 * @adev: amdgpu_device pointer
1123 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1124 *
1125 * Waits for the request hardware IP to be idle.
1126 * Returns 0 for success or a negative error code on failure.
1127 */
2990a1fc
AD
1128int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1129 enum amd_ip_block_type block_type)
5dbbb60b
AD
1130{
1131 int i, r;
1132
1133 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1134 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1135 continue;
a1255107
AD
1136 if (adev->ip_blocks[i].version->type == block_type) {
1137 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1138 if (r)
1139 return r;
1140 break;
1141 }
1142 }
1143 return 0;
1144
1145}
1146
e3ecdffa
AD
1147/**
1148 * amdgpu_device_ip_is_idle - is the hardware IP idle
1149 *
1150 * @adev: amdgpu_device pointer
1151 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1152 *
1153 * Check if the hardware IP is idle or not.
1154 * Returns true if it the IP is idle, false if not.
1155 */
2990a1fc
AD
1156bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1157 enum amd_ip_block_type block_type)
5dbbb60b
AD
1158{
1159 int i;
1160
1161 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1162 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1163 continue;
a1255107
AD
1164 if (adev->ip_blocks[i].version->type == block_type)
1165 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1166 }
1167 return true;
1168
1169}
1170
e3ecdffa
AD
1171/**
1172 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1173 *
1174 * @adev: amdgpu_device pointer
87e3f136 1175 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1176 *
1177 * Returns a pointer to the hardware IP block structure
1178 * if it exists for the asic, otherwise NULL.
1179 */
2990a1fc
AD
1180struct amdgpu_ip_block *
1181amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1182 enum amd_ip_block_type type)
d38ceaf9
AD
1183{
1184 int i;
1185
1186 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1187 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1188 return &adev->ip_blocks[i];
1189
1190 return NULL;
1191}
1192
1193/**
2990a1fc 1194 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1195 *
1196 * @adev: amdgpu_device pointer
5fc3aeeb 1197 * @type: enum amd_ip_block_type
d38ceaf9
AD
1198 * @major: major version
1199 * @minor: minor version
1200 *
1201 * return 0 if equal or greater
1202 * return 1 if smaller or the ip_block doesn't exist
1203 */
2990a1fc
AD
1204int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1205 enum amd_ip_block_type type,
1206 u32 major, u32 minor)
d38ceaf9 1207{
2990a1fc 1208 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1209
a1255107
AD
1210 if (ip_block && ((ip_block->version->major > major) ||
1211 ((ip_block->version->major == major) &&
1212 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1213 return 0;
1214
1215 return 1;
1216}
1217
a1255107 1218/**
2990a1fc 1219 * amdgpu_device_ip_block_add
a1255107
AD
1220 *
1221 * @adev: amdgpu_device pointer
1222 * @ip_block_version: pointer to the IP to add
1223 *
1224 * Adds the IP block driver information to the collection of IPs
1225 * on the asic.
1226 */
2990a1fc
AD
1227int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1228 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1229{
1230 if (!ip_block_version)
1231 return -EINVAL;
1232
e966a725 1233 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1234 ip_block_version->funcs->name);
1235
a1255107
AD
1236 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1237
1238 return 0;
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_enable_virtual_display - enable virtual display feature
1243 *
1244 * @adev: amdgpu_device pointer
1245 *
1246 * Enabled the virtual display feature if the user has enabled it via
1247 * the module parameter virtual_display. This feature provides a virtual
1248 * display hardware on headless boards or in virtualized environments.
1249 * This function parses and validates the configuration string specified by
1250 * the user and configues the virtual display configuration (number of
1251 * virtual connectors, crtcs, etc.) specified.
1252 */
483ef985 1253static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1254{
1255 adev->enable_virtual_display = false;
1256
1257 if (amdgpu_virtual_display) {
1258 struct drm_device *ddev = adev->ddev;
1259 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1260 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1261
1262 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1263 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1264 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1265 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1266 if (!strcmp("all", pciaddname)
1267 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1268 long num_crtc;
1269 int res = -1;
1270
9accf2fd 1271 adev->enable_virtual_display = true;
0f66356d
ED
1272
1273 if (pciaddname_tmp)
1274 res = kstrtol(pciaddname_tmp, 10,
1275 &num_crtc);
1276
1277 if (!res) {
1278 if (num_crtc < 1)
1279 num_crtc = 1;
1280 if (num_crtc > 6)
1281 num_crtc = 6;
1282 adev->mode_info.num_crtc = num_crtc;
1283 } else {
1284 adev->mode_info.num_crtc = 1;
1285 }
9accf2fd
ED
1286 break;
1287 }
1288 }
1289
0f66356d
ED
1290 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1291 amdgpu_virtual_display, pci_address_name,
1292 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1293
1294 kfree(pciaddstr);
1295 }
1296}
1297
e3ecdffa
AD
1298/**
1299 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1300 *
1301 * @adev: amdgpu_device pointer
1302 *
1303 * Parses the asic configuration parameters specified in the gpu info
1304 * firmware and makes them availale to the driver for use in configuring
1305 * the asic.
1306 * Returns 0 on success, -EINVAL on failure.
1307 */
e2a75f88
AD
1308static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1309{
e2a75f88
AD
1310 const char *chip_name;
1311 char fw_name[30];
1312 int err;
1313 const struct gpu_info_firmware_header_v1_0 *hdr;
1314
ab4fe3e1
HR
1315 adev->firmware.gpu_info_fw = NULL;
1316
e2a75f88
AD
1317 switch (adev->asic_type) {
1318 case CHIP_TOPAZ:
1319 case CHIP_TONGA:
1320 case CHIP_FIJI:
e2a75f88 1321 case CHIP_POLARIS10:
cc07f18d 1322 case CHIP_POLARIS11:
e2a75f88 1323 case CHIP_POLARIS12:
cc07f18d 1324 case CHIP_VEGAM:
e2a75f88
AD
1325 case CHIP_CARRIZO:
1326 case CHIP_STONEY:
1327#ifdef CONFIG_DRM_AMDGPU_SI
1328 case CHIP_VERDE:
1329 case CHIP_TAHITI:
1330 case CHIP_PITCAIRN:
1331 case CHIP_OLAND:
1332 case CHIP_HAINAN:
1333#endif
1334#ifdef CONFIG_DRM_AMDGPU_CIK
1335 case CHIP_BONAIRE:
1336 case CHIP_HAWAII:
1337 case CHIP_KAVERI:
1338 case CHIP_KABINI:
1339 case CHIP_MULLINS:
1340#endif
27c0bc71 1341 case CHIP_VEGA20:
e2a75f88
AD
1342 default:
1343 return 0;
1344 case CHIP_VEGA10:
1345 chip_name = "vega10";
1346 break;
3f76dced
AD
1347 case CHIP_VEGA12:
1348 chip_name = "vega12";
1349 break;
2d2e5e7e 1350 case CHIP_RAVEN:
54c4d17e
FX
1351 if (adev->rev_id >= 8)
1352 chip_name = "raven2";
741deade
AD
1353 else if (adev->pdev->device == 0x15d8)
1354 chip_name = "picasso";
54c4d17e
FX
1355 else
1356 chip_name = "raven";
2d2e5e7e 1357 break;
e2a75f88
AD
1358 }
1359
1360 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1361 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1362 if (err) {
1363 dev_err(adev->dev,
1364 "Failed to load gpu_info firmware \"%s\"\n",
1365 fw_name);
1366 goto out;
1367 }
ab4fe3e1 1368 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1369 if (err) {
1370 dev_err(adev->dev,
1371 "Failed to validate gpu_info firmware \"%s\"\n",
1372 fw_name);
1373 goto out;
1374 }
1375
ab4fe3e1 1376 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1377 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1378
1379 switch (hdr->version_major) {
1380 case 1:
1381 {
1382 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1383 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1384 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1385
b5ab16bf
AD
1386 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1387 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1388 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1389 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1390 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1391 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1392 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1393 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1394 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1395 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1396 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1397 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1398 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1399 adev->gfx.cu_info.max_waves_per_simd =
1400 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1401 adev->gfx.cu_info.max_scratch_slots_per_cu =
1402 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1403 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1404 break;
1405 }
1406 default:
1407 dev_err(adev->dev,
1408 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1409 err = -EINVAL;
1410 goto out;
1411 }
1412out:
e2a75f88
AD
1413 return err;
1414}
1415
e3ecdffa
AD
1416/**
1417 * amdgpu_device_ip_early_init - run early init for hardware IPs
1418 *
1419 * @adev: amdgpu_device pointer
1420 *
1421 * Early initialization pass for hardware IPs. The hardware IPs that make
1422 * up each asic are discovered each IP's early_init callback is run. This
1423 * is the first stage in initializing the asic.
1424 * Returns 0 on success, negative error code on failure.
1425 */
06ec9070 1426static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1427{
aaa36a97 1428 int i, r;
d38ceaf9 1429
483ef985 1430 amdgpu_device_enable_virtual_display(adev);
a6be7570 1431
d38ceaf9 1432 switch (adev->asic_type) {
aaa36a97
AD
1433 case CHIP_TOPAZ:
1434 case CHIP_TONGA:
48299f95 1435 case CHIP_FIJI:
2cc0c0b5 1436 case CHIP_POLARIS10:
32cc7e53 1437 case CHIP_POLARIS11:
c4642a47 1438 case CHIP_POLARIS12:
32cc7e53 1439 case CHIP_VEGAM:
aaa36a97 1440 case CHIP_CARRIZO:
39bb0c92
SL
1441 case CHIP_STONEY:
1442 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1443 adev->family = AMDGPU_FAMILY_CZ;
1444 else
1445 adev->family = AMDGPU_FAMILY_VI;
1446
1447 r = vi_set_ip_blocks(adev);
1448 if (r)
1449 return r;
1450 break;
33f34802
KW
1451#ifdef CONFIG_DRM_AMDGPU_SI
1452 case CHIP_VERDE:
1453 case CHIP_TAHITI:
1454 case CHIP_PITCAIRN:
1455 case CHIP_OLAND:
1456 case CHIP_HAINAN:
295d0daf 1457 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1458 r = si_set_ip_blocks(adev);
1459 if (r)
1460 return r;
1461 break;
1462#endif
a2e73f56
AD
1463#ifdef CONFIG_DRM_AMDGPU_CIK
1464 case CHIP_BONAIRE:
1465 case CHIP_HAWAII:
1466 case CHIP_KAVERI:
1467 case CHIP_KABINI:
1468 case CHIP_MULLINS:
1469 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1470 adev->family = AMDGPU_FAMILY_CI;
1471 else
1472 adev->family = AMDGPU_FAMILY_KV;
1473
1474 r = cik_set_ip_blocks(adev);
1475 if (r)
1476 return r;
1477 break;
1478#endif
e48a3cd9
AD
1479 case CHIP_VEGA10:
1480 case CHIP_VEGA12:
e4bd8170 1481 case CHIP_VEGA20:
e48a3cd9 1482 case CHIP_RAVEN:
741deade 1483 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1484 adev->family = AMDGPU_FAMILY_RV;
1485 else
1486 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1487
1488 r = soc15_set_ip_blocks(adev);
1489 if (r)
1490 return r;
1491 break;
d38ceaf9
AD
1492 default:
1493 /* FIXME: not supported yet */
1494 return -EINVAL;
1495 }
1496
e2a75f88
AD
1497 r = amdgpu_device_parse_gpu_info_fw(adev);
1498 if (r)
1499 return r;
1500
1884734a 1501 amdgpu_amdkfd_device_probe(adev);
1502
3149d9da
XY
1503 if (amdgpu_sriov_vf(adev)) {
1504 r = amdgpu_virt_request_full_gpu(adev, true);
1505 if (r)
5ffa61c1 1506 return -EAGAIN;
3149d9da
XY
1507 }
1508
00f54b97
HR
1509 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1510
d38ceaf9
AD
1511 for (i = 0; i < adev->num_ip_blocks; i++) {
1512 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1513 DRM_ERROR("disabled ip block: %d <%s>\n",
1514 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1515 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1516 } else {
a1255107
AD
1517 if (adev->ip_blocks[i].version->funcs->early_init) {
1518 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1519 if (r == -ENOENT) {
a1255107 1520 adev->ip_blocks[i].status.valid = false;
2c1a2784 1521 } else if (r) {
a1255107
AD
1522 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1523 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1524 return r;
2c1a2784 1525 } else {
a1255107 1526 adev->ip_blocks[i].status.valid = true;
2c1a2784 1527 }
974e6b64 1528 } else {
a1255107 1529 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1530 }
d38ceaf9
AD
1531 }
1532 }
1533
395d1fb9
NH
1534 adev->cg_flags &= amdgpu_cg_mask;
1535 adev->pg_flags &= amdgpu_pg_mask;
1536
d38ceaf9
AD
1537 return 0;
1538}
1539
0a4f2520
RZ
1540static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1541{
1542 int i, r;
1543
1544 for (i = 0; i < adev->num_ip_blocks; i++) {
1545 if (!adev->ip_blocks[i].status.sw)
1546 continue;
1547 if (adev->ip_blocks[i].status.hw)
1548 continue;
1549 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1550 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1551 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1552 if (r) {
1553 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1554 adev->ip_blocks[i].version->funcs->name, r);
1555 return r;
1556 }
1557 adev->ip_blocks[i].status.hw = true;
1558 }
1559 }
1560
1561 return 0;
1562}
1563
1564static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1565{
1566 int i, r;
1567
1568 for (i = 0; i < adev->num_ip_blocks; i++) {
1569 if (!adev->ip_blocks[i].status.sw)
1570 continue;
1571 if (adev->ip_blocks[i].status.hw)
1572 continue;
1573 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1574 if (r) {
1575 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1576 adev->ip_blocks[i].version->funcs->name, r);
1577 return r;
1578 }
1579 adev->ip_blocks[i].status.hw = true;
1580 }
1581
1582 return 0;
1583}
1584
7a3e0bb2
RZ
1585static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1586{
1587 int r = 0;
1588 int i;
1589
1590 if (adev->asic_type >= CHIP_VEGA10) {
1591 for (i = 0; i < adev->num_ip_blocks; i++) {
1592 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1593 if (adev->in_gpu_reset || adev->in_suspend) {
1594 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1595 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1596 r = adev->ip_blocks[i].version->funcs->resume(adev);
1597 if (r) {
1598 DRM_ERROR("resume of IP block <%s> failed %d\n",
1599 adev->ip_blocks[i].version->funcs->name, r);
1600 return r;
1601 }
1602 } else {
1603 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1604 if (r) {
1605 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1606 adev->ip_blocks[i].version->funcs->name, r);
1607 return r;
1608 }
1609 }
1610 adev->ip_blocks[i].status.hw = true;
1611 }
1612 }
1613 }
1614
91eec27e 1615 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1616 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1617 if (r) {
1618 pr_err("firmware loading failed\n");
1619 return r;
1620 }
1621 }
1622
1623 return 0;
1624}
1625
e3ecdffa
AD
1626/**
1627 * amdgpu_device_ip_init - run init for hardware IPs
1628 *
1629 * @adev: amdgpu_device pointer
1630 *
1631 * Main initialization pass for hardware IPs. The list of all the hardware
1632 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1633 * are run. sw_init initializes the software state associated with each IP
1634 * and hw_init initializes the hardware associated with each IP.
1635 * Returns 0 on success, negative error code on failure.
1636 */
06ec9070 1637static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1638{
1639 int i, r;
1640
1641 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1642 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1643 continue;
a1255107 1644 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1645 if (r) {
a1255107
AD
1646 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1647 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1648 return r;
2c1a2784 1649 }
a1255107 1650 adev->ip_blocks[i].status.sw = true;
bfca0289 1651
d38ceaf9 1652 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1653 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1654 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1655 if (r) {
1656 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
d38ceaf9 1657 return r;
2c1a2784 1658 }
a1255107 1659 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1660 if (r) {
1661 DRM_ERROR("hw_init %d failed %d\n", i, r);
d38ceaf9 1662 return r;
2c1a2784 1663 }
06ec9070 1664 r = amdgpu_device_wb_init(adev);
2c1a2784 1665 if (r) {
06ec9070 1666 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
d38ceaf9 1667 return r;
2c1a2784 1668 }
a1255107 1669 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1670
1671 /* right after GMC hw init, we create CSA */
1672 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1673 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1674 AMDGPU_GEM_DOMAIN_VRAM,
1675 AMDGPU_CSA_SIZE);
2493664f
ML
1676 if (r) {
1677 DRM_ERROR("allocate CSA failed %d\n", r);
1678 return r;
1679 }
1680 }
d38ceaf9
AD
1681 }
1682 }
1683
c8963ea4
RZ
1684 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1685 if (r)
1686 return r;
0a4f2520
RZ
1687
1688 r = amdgpu_device_ip_hw_init_phase1(adev);
1689 if (r)
1690 return r;
1691
7a3e0bb2
RZ
1692 r = amdgpu_device_fw_loading(adev);
1693 if (r)
1694 return r;
1695
0a4f2520
RZ
1696 r = amdgpu_device_ip_hw_init_phase2(adev);
1697 if (r)
1698 return r;
d38ceaf9 1699
3e2e2ab5
HZ
1700 if (adev->gmc.xgmi.num_physical_nodes > 1)
1701 amdgpu_xgmi_add_device(adev);
1884734a 1702 amdgpu_amdkfd_device_init(adev);
c6332b97 1703
d3c117e5
ED
1704 if (amdgpu_sriov_vf(adev)) {
1705 amdgpu_virt_init_data_exchange(adev);
c6332b97 1706 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1707 }
c6332b97 1708
d38ceaf9
AD
1709 return 0;
1710}
1711
e3ecdffa
AD
1712/**
1713 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1714 *
1715 * @adev: amdgpu_device pointer
1716 *
1717 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1718 * this function before a GPU reset. If the value is retained after a
1719 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1720 */
06ec9070 1721static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1722{
1723 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1724}
1725
e3ecdffa
AD
1726/**
1727 * amdgpu_device_check_vram_lost - check if vram is valid
1728 *
1729 * @adev: amdgpu_device pointer
1730 *
1731 * Checks the reset magic value written to the gart pointer in VRAM.
1732 * The driver calls this after a GPU reset to see if the contents of
1733 * VRAM is lost or now.
1734 * returns true if vram is lost, false if not.
1735 */
06ec9070 1736static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1737{
1738 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1739 AMDGPU_RESET_MAGIC_NUM);
1740}
1741
e3ecdffa 1742/**
1112a46b 1743 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1744 *
1745 * @adev: amdgpu_device pointer
1746 *
e3ecdffa 1747 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1748 * set_clockgating_state callbacks are run.
1749 * Late initialization pass enabling clockgating for hardware IPs.
1750 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1751 * Returns 0 on success, negative error code on failure.
1752 */
fdd34271 1753
1112a46b
RZ
1754static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1755 enum amd_clockgating_state state)
d38ceaf9 1756{
1112a46b 1757 int i, j, r;
d38ceaf9 1758
4a2ba394
SL
1759 if (amdgpu_emu_mode == 1)
1760 return 0;
1761
1112a46b
RZ
1762 for (j = 0; j < adev->num_ip_blocks; j++) {
1763 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1764 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1765 continue;
4a446d55 1766 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1767 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1768 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1769 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1770 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1771 /* enable clockgating to save power */
a1255107 1772 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1773 state);
4a446d55
AD
1774 if (r) {
1775 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1776 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1777 return r;
1778 }
b0b00ff1 1779 }
d38ceaf9 1780 }
06b18f61 1781
c9f96fd5
RZ
1782 return 0;
1783}
1784
1112a46b 1785static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1786{
1112a46b 1787 int i, j, r;
06b18f61 1788
c9f96fd5
RZ
1789 if (amdgpu_emu_mode == 1)
1790 return 0;
1791
1112a46b
RZ
1792 for (j = 0; j < adev->num_ip_blocks; j++) {
1793 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1794 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1795 continue;
1796 /* skip CG for VCE/UVD, it's handled specially */
1797 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1798 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1799 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1800 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1801 /* enable powergating to save power */
1802 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1803 state);
c9f96fd5
RZ
1804 if (r) {
1805 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1806 adev->ip_blocks[i].version->funcs->name, r);
1807 return r;
1808 }
1809 }
1810 }
2dc80b00
S
1811 return 0;
1812}
1813
e3ecdffa
AD
1814/**
1815 * amdgpu_device_ip_late_init - run late init for hardware IPs
1816 *
1817 * @adev: amdgpu_device pointer
1818 *
1819 * Late initialization pass for hardware IPs. The list of all the hardware
1820 * IPs that make up the asic is walked and the late_init callbacks are run.
1821 * late_init covers any special initialization that an IP requires
1822 * after all of the have been initialized or something that needs to happen
1823 * late in the init process.
1824 * Returns 0 on success, negative error code on failure.
1825 */
06ec9070 1826static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1827{
1828 int i = 0, r;
1829
1830 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1831 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1832 continue;
1833 if (adev->ip_blocks[i].version->funcs->late_init) {
1834 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1835 if (r) {
1836 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1837 adev->ip_blocks[i].version->funcs->name, r);
1838 return r;
1839 }
2dc80b00 1840 }
73f847db 1841 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1842 }
1843
1112a46b
RZ
1844 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1845 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1846
2c773de2
S
1847 queue_delayed_work(system_wq, &adev->late_init_work,
1848 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1849
06ec9070 1850 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1851
1852 return 0;
1853}
1854
e3ecdffa
AD
1855/**
1856 * amdgpu_device_ip_fini - run fini for hardware IPs
1857 *
1858 * @adev: amdgpu_device pointer
1859 *
1860 * Main teardown pass for hardware IPs. The list of all the hardware
1861 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1862 * are run. hw_fini tears down the hardware associated with each IP
1863 * and sw_fini tears down any software state associated with each IP.
1864 * Returns 0 on success, negative error code on failure.
1865 */
06ec9070 1866static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1867{
1868 int i, r;
1869
a82400b5
AG
1870 if (adev->gmc.xgmi.num_physical_nodes > 1)
1871 amdgpu_xgmi_remove_device(adev);
1872
1884734a 1873 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1874
1875 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1876 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1877
3e96dbfd
AD
1878 /* need to disable SMC first */
1879 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1880 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1881 continue;
fdd34271 1882 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1883 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1884 /* XXX handle errors */
1885 if (r) {
1886 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1887 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1888 }
a1255107 1889 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1890 break;
1891 }
1892 }
1893
d38ceaf9 1894 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1895 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1896 continue;
8201a67a 1897
a1255107 1898 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1899 /* XXX handle errors */
2c1a2784 1900 if (r) {
a1255107
AD
1901 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1902 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1903 }
8201a67a 1904
a1255107 1905 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1906 }
1907
9950cda2 1908
d38ceaf9 1909 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1910 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1911 continue;
c12aba3a
ML
1912
1913 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1914 amdgpu_ucode_free_bo(adev);
1e256e27 1915 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1916 amdgpu_device_wb_fini(adev);
1917 amdgpu_device_vram_scratch_fini(adev);
1918 }
1919
a1255107 1920 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1921 /* XXX handle errors */
2c1a2784 1922 if (r) {
a1255107
AD
1923 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1924 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1925 }
a1255107
AD
1926 adev->ip_blocks[i].status.sw = false;
1927 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1928 }
1929
a6dcfd9c 1930 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1931 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1932 continue;
a1255107
AD
1933 if (adev->ip_blocks[i].version->funcs->late_fini)
1934 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1935 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1936 }
1937
030308fc 1938 if (amdgpu_sriov_vf(adev))
24136135
ML
1939 if (amdgpu_virt_release_full_gpu(adev, false))
1940 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 1941
d38ceaf9
AD
1942 return 0;
1943}
1944
b55c9e7a
EQ
1945static int amdgpu_device_enable_mgpu_fan_boost(void)
1946{
1947 struct amdgpu_gpu_instance *gpu_ins;
1948 struct amdgpu_device *adev;
1949 int i, ret = 0;
1950
1951 mutex_lock(&mgpu_info.mutex);
1952
1953 /*
1954 * MGPU fan boost feature should be enabled
1955 * only when there are two or more dGPUs in
1956 * the system
1957 */
1958 if (mgpu_info.num_dgpu < 2)
1959 goto out;
1960
1961 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1962 gpu_ins = &(mgpu_info.gpu_ins[i]);
1963 adev = gpu_ins->adev;
1964 if (!(adev->flags & AMD_IS_APU) &&
1965 !gpu_ins->mgpu_fan_enabled &&
1966 adev->powerplay.pp_funcs &&
1967 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1968 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1969 if (ret)
1970 break;
1971
1972 gpu_ins->mgpu_fan_enabled = 1;
1973 }
1974 }
1975
1976out:
1977 mutex_unlock(&mgpu_info.mutex);
1978
1979 return ret;
1980}
1981
e3ecdffa 1982/**
1112a46b 1983 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 1984 *
1112a46b 1985 * @work: work_struct.
e3ecdffa 1986 */
06ec9070 1987static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
1988{
1989 struct amdgpu_device *adev =
1990 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
1991 int r;
1992
1993 r = amdgpu_ib_ring_tests(adev);
1994 if (r)
1995 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
1996
1997 r = amdgpu_device_enable_mgpu_fan_boost();
1998 if (r)
1999 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2dc80b00
S
2000}
2001
1e317b99
RZ
2002static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2003{
2004 struct amdgpu_device *adev =
2005 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2006
2007 mutex_lock(&adev->gfx.gfx_off_mutex);
2008 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2009 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2010 adev->gfx.gfx_off_state = true;
2011 }
2012 mutex_unlock(&adev->gfx.gfx_off_mutex);
2013}
2014
e3ecdffa 2015/**
e7854a03 2016 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2017 *
2018 * @adev: amdgpu_device pointer
2019 *
2020 * Main suspend function for hardware IPs. The list of all the hardware
2021 * IPs that make up the asic is walked, clockgating is disabled and the
2022 * suspend callbacks are run. suspend puts the hardware and software state
2023 * in each IP into a state suitable for suspend.
2024 * Returns 0 on success, negative error code on failure.
2025 */
e7854a03
AD
2026static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2027{
2028 int i, r;
2029
05df1f01 2030 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2031 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2032
e7854a03
AD
2033 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2034 if (!adev->ip_blocks[i].status.valid)
2035 continue;
2036 /* displays are handled separately */
2037 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2038 /* XXX handle errors */
2039 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2040 /* XXX handle errors */
2041 if (r) {
2042 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2043 adev->ip_blocks[i].version->funcs->name, r);
2044 }
2045 }
2046 }
2047
e7854a03
AD
2048 return 0;
2049}
2050
2051/**
2052 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2053 *
2054 * @adev: amdgpu_device pointer
2055 *
2056 * Main suspend function for hardware IPs. The list of all the hardware
2057 * IPs that make up the asic is walked, clockgating is disabled and the
2058 * suspend callbacks are run. suspend puts the hardware and software state
2059 * in each IP into a state suitable for suspend.
2060 * Returns 0 on success, negative error code on failure.
2061 */
2062static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2063{
2064 int i, r;
2065
2066 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2067 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2068 continue;
e7854a03
AD
2069 /* displays are handled in phase1 */
2070 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2071 continue;
d38ceaf9 2072 /* XXX handle errors */
a1255107 2073 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2074 /* XXX handle errors */
2c1a2784 2075 if (r) {
a1255107
AD
2076 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2077 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2078 }
d38ceaf9
AD
2079 }
2080
2081 return 0;
2082}
2083
e7854a03
AD
2084/**
2085 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2086 *
2087 * @adev: amdgpu_device pointer
2088 *
2089 * Main suspend function for hardware IPs. The list of all the hardware
2090 * IPs that make up the asic is walked, clockgating is disabled and the
2091 * suspend callbacks are run. suspend puts the hardware and software state
2092 * in each IP into a state suitable for suspend.
2093 * Returns 0 on success, negative error code on failure.
2094 */
2095int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2096{
2097 int r;
2098
e7819644
YT
2099 if (amdgpu_sriov_vf(adev))
2100 amdgpu_virt_request_full_gpu(adev, false);
2101
e7854a03
AD
2102 r = amdgpu_device_ip_suspend_phase1(adev);
2103 if (r)
2104 return r;
2105 r = amdgpu_device_ip_suspend_phase2(adev);
2106
e7819644
YT
2107 if (amdgpu_sriov_vf(adev))
2108 amdgpu_virt_release_full_gpu(adev, false);
2109
e7854a03
AD
2110 return r;
2111}
2112
06ec9070 2113static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2114{
2115 int i, r;
2116
2cb681b6
ML
2117 static enum amd_ip_block_type ip_order[] = {
2118 AMD_IP_BLOCK_TYPE_GMC,
2119 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2120 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2121 AMD_IP_BLOCK_TYPE_IH,
2122 };
a90ad3c2 2123
2cb681b6
ML
2124 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2125 int j;
2126 struct amdgpu_ip_block *block;
a90ad3c2 2127
2cb681b6
ML
2128 for (j = 0; j < adev->num_ip_blocks; j++) {
2129 block = &adev->ip_blocks[j];
2130
2131 if (block->version->type != ip_order[i] ||
2132 !block->status.valid)
2133 continue;
2134
2135 r = block->version->funcs->hw_init(adev);
3f48c681 2136 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2137 if (r)
2138 return r;
a90ad3c2
ML
2139 }
2140 }
2141
2142 return 0;
2143}
2144
06ec9070 2145static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2146{
2147 int i, r;
2148
2cb681b6
ML
2149 static enum amd_ip_block_type ip_order[] = {
2150 AMD_IP_BLOCK_TYPE_SMC,
2151 AMD_IP_BLOCK_TYPE_DCE,
2152 AMD_IP_BLOCK_TYPE_GFX,
2153 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2154 AMD_IP_BLOCK_TYPE_UVD,
2155 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2156 };
a90ad3c2 2157
2cb681b6
ML
2158 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2159 int j;
2160 struct amdgpu_ip_block *block;
a90ad3c2 2161
2cb681b6
ML
2162 for (j = 0; j < adev->num_ip_blocks; j++) {
2163 block = &adev->ip_blocks[j];
2164
2165 if (block->version->type != ip_order[i] ||
2166 !block->status.valid)
2167 continue;
2168
2169 r = block->version->funcs->hw_init(adev);
3f48c681 2170 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2171 if (r)
2172 return r;
a90ad3c2
ML
2173 }
2174 }
2175
2176 return 0;
2177}
2178
e3ecdffa
AD
2179/**
2180 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2181 *
2182 * @adev: amdgpu_device pointer
2183 *
2184 * First resume function for hardware IPs. The list of all the hardware
2185 * IPs that make up the asic is walked and the resume callbacks are run for
2186 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2187 * after a suspend and updates the software state as necessary. This
2188 * function is also used for restoring the GPU after a GPU reset.
2189 * Returns 0 on success, negative error code on failure.
2190 */
06ec9070 2191static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2192{
2193 int i, r;
2194
a90ad3c2
ML
2195 for (i = 0; i < adev->num_ip_blocks; i++) {
2196 if (!adev->ip_blocks[i].status.valid)
2197 continue;
a90ad3c2 2198 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2199 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2200 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2201 r = adev->ip_blocks[i].version->funcs->resume(adev);
2202 if (r) {
2203 DRM_ERROR("resume of IP block <%s> failed %d\n",
2204 adev->ip_blocks[i].version->funcs->name, r);
2205 return r;
2206 }
a90ad3c2
ML
2207 }
2208 }
2209
2210 return 0;
2211}
2212
e3ecdffa
AD
2213/**
2214 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2215 *
2216 * @adev: amdgpu_device pointer
2217 *
2218 * First resume function for hardware IPs. The list of all the hardware
2219 * IPs that make up the asic is walked and the resume callbacks are run for
2220 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2221 * functional state after a suspend and updates the software state as
2222 * necessary. This function is also used for restoring the GPU after a GPU
2223 * reset.
2224 * Returns 0 on success, negative error code on failure.
2225 */
06ec9070 2226static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2227{
2228 int i, r;
2229
2230 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2231 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2232 continue;
fcf0649f 2233 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2234 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2235 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2236 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2237 continue;
a1255107 2238 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2239 if (r) {
a1255107
AD
2240 DRM_ERROR("resume of IP block <%s> failed %d\n",
2241 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2242 return r;
2c1a2784 2243 }
d38ceaf9
AD
2244 }
2245
2246 return 0;
2247}
2248
e3ecdffa
AD
2249/**
2250 * amdgpu_device_ip_resume - run resume for hardware IPs
2251 *
2252 * @adev: amdgpu_device pointer
2253 *
2254 * Main resume function for hardware IPs. The hardware IPs
2255 * are split into two resume functions because they are
2256 * are also used in in recovering from a GPU reset and some additional
2257 * steps need to be take between them. In this case (S3/S4) they are
2258 * run sequentially.
2259 * Returns 0 on success, negative error code on failure.
2260 */
06ec9070 2261static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2262{
2263 int r;
2264
06ec9070 2265 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2266 if (r)
2267 return r;
7a3e0bb2
RZ
2268
2269 r = amdgpu_device_fw_loading(adev);
2270 if (r)
2271 return r;
2272
06ec9070 2273 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2274
2275 return r;
2276}
2277
e3ecdffa
AD
2278/**
2279 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2280 *
2281 * @adev: amdgpu_device pointer
2282 *
2283 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2284 */
4e99a44e 2285static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2286{
6867e1b5
ML
2287 if (amdgpu_sriov_vf(adev)) {
2288 if (adev->is_atom_fw) {
2289 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2290 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2291 } else {
2292 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2293 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2294 }
2295
2296 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2297 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2298 }
048765ad
AR
2299}
2300
e3ecdffa
AD
2301/**
2302 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2303 *
2304 * @asic_type: AMD asic type
2305 *
2306 * Check if there is DC (new modesetting infrastructre) support for an asic.
2307 * returns true if DC has support, false if not.
2308 */
4562236b
HW
2309bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2310{
2311 switch (asic_type) {
2312#if defined(CONFIG_DRM_AMD_DC)
2313 case CHIP_BONAIRE:
0d6fbccb 2314 case CHIP_KAVERI:
367e6687
AD
2315 case CHIP_KABINI:
2316 case CHIP_MULLINS:
d9fda248
HW
2317 /*
2318 * We have systems in the wild with these ASICs that require
2319 * LVDS and VGA support which is not supported with DC.
2320 *
2321 * Fallback to the non-DC driver here by default so as not to
2322 * cause regressions.
2323 */
2324 return amdgpu_dc > 0;
2325 case CHIP_HAWAII:
4562236b
HW
2326 case CHIP_CARRIZO:
2327 case CHIP_STONEY:
4562236b 2328 case CHIP_POLARIS10:
675fd32b 2329 case CHIP_POLARIS11:
2c8ad2d5 2330 case CHIP_POLARIS12:
675fd32b 2331 case CHIP_VEGAM:
4562236b
HW
2332 case CHIP_TONGA:
2333 case CHIP_FIJI:
42f8ffa1 2334 case CHIP_VEGA10:
dca7b401 2335 case CHIP_VEGA12:
c6034aa2 2336 case CHIP_VEGA20:
dc37a9a0 2337#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2338 case CHIP_RAVEN:
42f8ffa1 2339#endif
fd187853 2340 return amdgpu_dc != 0;
4562236b
HW
2341#endif
2342 default:
2343 return false;
2344 }
2345}
2346
2347/**
2348 * amdgpu_device_has_dc_support - check if dc is supported
2349 *
2350 * @adev: amdgpu_device_pointer
2351 *
2352 * Returns true for supported, false for not supported
2353 */
2354bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2355{
2555039d
XY
2356 if (amdgpu_sriov_vf(adev))
2357 return false;
2358
4562236b
HW
2359 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2360}
2361
d4535e2c
AG
2362
2363static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2364{
2365 struct amdgpu_device *adev =
2366 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2367
2368 adev->asic_reset_res = amdgpu_asic_reset(adev);
2369 if (adev->asic_reset_res)
2370 DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
2371 adev->asic_reset_res, adev->ddev->unique);
2372}
2373
2374
d38ceaf9
AD
2375/**
2376 * amdgpu_device_init - initialize the driver
2377 *
2378 * @adev: amdgpu_device pointer
87e3f136 2379 * @ddev: drm dev pointer
d38ceaf9
AD
2380 * @pdev: pci dev pointer
2381 * @flags: driver flags
2382 *
2383 * Initializes the driver info and hw (all asics).
2384 * Returns 0 for success or an error on failure.
2385 * Called at driver startup.
2386 */
2387int amdgpu_device_init(struct amdgpu_device *adev,
2388 struct drm_device *ddev,
2389 struct pci_dev *pdev,
2390 uint32_t flags)
2391{
2392 int r, i;
2393 bool runtime = false;
95844d20 2394 u32 max_MBps;
d38ceaf9
AD
2395
2396 adev->shutdown = false;
2397 adev->dev = &pdev->dev;
2398 adev->ddev = ddev;
2399 adev->pdev = pdev;
2400 adev->flags = flags;
2f7d10b3 2401 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2402 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2403 if (amdgpu_emu_mode == 1)
2404 adev->usec_timeout *= 2;
770d13b1 2405 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2406 adev->accel_working = false;
2407 adev->num_rings = 0;
2408 adev->mman.buffer_funcs = NULL;
2409 adev->mman.buffer_funcs_ring = NULL;
2410 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2411 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2412 adev->gmc.gmc_funcs = NULL;
f54d1867 2413 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2414 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2415
2416 adev->smc_rreg = &amdgpu_invalid_rreg;
2417 adev->smc_wreg = &amdgpu_invalid_wreg;
2418 adev->pcie_rreg = &amdgpu_invalid_rreg;
2419 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2420 adev->pciep_rreg = &amdgpu_invalid_rreg;
2421 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2422 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2423 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2424 adev->didt_rreg = &amdgpu_invalid_rreg;
2425 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2426 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2427 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2428 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2429 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2430
3e39ab90
AD
2431 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2432 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2433 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2434
2435 /* mutex initialization are all done here so we
2436 * can recall function without having locking issues */
d38ceaf9 2437 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2438 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2439 mutex_init(&adev->pm.mutex);
2440 mutex_init(&adev->gfx.gpu_clock_mutex);
2441 mutex_init(&adev->srbm_mutex);
b8866c26 2442 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2443 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2444 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2445 mutex_init(&adev->mn_lock);
e23b74aa 2446 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2447 hash_init(adev->mn_hash);
13a752e3 2448 mutex_init(&adev->lock_reset);
d38ceaf9 2449
06ec9070 2450 amdgpu_device_check_arguments(adev);
d38ceaf9 2451
d38ceaf9
AD
2452 spin_lock_init(&adev->mmio_idx_lock);
2453 spin_lock_init(&adev->smc_idx_lock);
2454 spin_lock_init(&adev->pcie_idx_lock);
2455 spin_lock_init(&adev->uvd_ctx_idx_lock);
2456 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2457 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2458 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2459 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2460 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2461
0c4e7fa5
CZ
2462 INIT_LIST_HEAD(&adev->shadow_list);
2463 mutex_init(&adev->shadow_list_lock);
2464
795f2813
AR
2465 INIT_LIST_HEAD(&adev->ring_lru_list);
2466 spin_lock_init(&adev->ring_lru_list_lock);
2467
06ec9070
AD
2468 INIT_DELAYED_WORK(&adev->late_init_work,
2469 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2470 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2471 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2472
d4535e2c
AG
2473 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2474
d23ee13f 2475 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2476 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2477
0fa49558
AX
2478 /* Registers mapping */
2479 /* TODO: block userspace mapping of io register */
da69c161
KW
2480 if (adev->asic_type >= CHIP_BONAIRE) {
2481 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2482 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2483 } else {
2484 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2485 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2486 }
d38ceaf9 2487
d38ceaf9
AD
2488 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2489 if (adev->rmmio == NULL) {
2490 return -ENOMEM;
2491 }
2492 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2493 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2494
d38ceaf9
AD
2495 /* io port mapping */
2496 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2497 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2498 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2499 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2500 break;
2501 }
2502 }
2503 if (adev->rio_mem == NULL)
b64a18c5 2504 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2505
5494d864
AD
2506 amdgpu_device_get_pcie_info(adev);
2507
d38ceaf9 2508 /* early init functions */
06ec9070 2509 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2510 if (r)
2511 return r;
2512
6585661d
OZ
2513 /* doorbell bar mapping and doorbell index init*/
2514 amdgpu_device_doorbell_init(adev);
2515
d38ceaf9
AD
2516 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2517 /* this will fail for cards that aren't VGA class devices, just
2518 * ignore it */
06ec9070 2519 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2520
e9bef455 2521 if (amdgpu_device_is_px(ddev))
d38ceaf9 2522 runtime = true;
84c8b22e
LW
2523 if (!pci_is_thunderbolt_attached(adev->pdev))
2524 vga_switcheroo_register_client(adev->pdev,
2525 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2526 if (runtime)
2527 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2528
9475a943
SL
2529 if (amdgpu_emu_mode == 1) {
2530 /* post the asic on emulation mode */
2531 emu_soc_asic_init(adev);
bfca0289 2532 goto fence_driver_init;
9475a943 2533 }
bfca0289 2534
d38ceaf9 2535 /* Read BIOS */
83ba126a
AD
2536 if (!amdgpu_get_bios(adev)) {
2537 r = -EINVAL;
2538 goto failed;
2539 }
f7e9e9fe 2540
d38ceaf9 2541 r = amdgpu_atombios_init(adev);
2c1a2784
AD
2542 if (r) {
2543 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
e23b74aa 2544 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
83ba126a 2545 goto failed;
2c1a2784 2546 }
d38ceaf9 2547
4e99a44e
ML
2548 /* detect if we are with an SRIOV vbios */
2549 amdgpu_device_detect_sriov_bios(adev);
048765ad 2550
d38ceaf9 2551 /* Post card if necessary */
39c640c0 2552 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2553 if (!adev->bios) {
bec86378 2554 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2555 r = -EINVAL;
2556 goto failed;
d38ceaf9 2557 }
bec86378 2558 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2559 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2560 if (r) {
2561 dev_err(adev->dev, "gpu post error!\n");
2562 goto failed;
2563 }
d38ceaf9
AD
2564 }
2565
88b64e95
AD
2566 if (adev->is_atom_fw) {
2567 /* Initialize clocks */
2568 r = amdgpu_atomfirmware_get_clock_info(adev);
2569 if (r) {
2570 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2571 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2572 goto failed;
2573 }
2574 } else {
a5bde2f9
AD
2575 /* Initialize clocks */
2576 r = amdgpu_atombios_get_clock_info(adev);
2577 if (r) {
2578 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2579 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2580 goto failed;
a5bde2f9
AD
2581 }
2582 /* init i2c buses */
4562236b
HW
2583 if (!amdgpu_device_has_dc_support(adev))
2584 amdgpu_atombios_i2c_init(adev);
2c1a2784 2585 }
d38ceaf9 2586
bfca0289 2587fence_driver_init:
d38ceaf9
AD
2588 /* Fence driver */
2589 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2590 if (r) {
2591 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2592 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2593 goto failed;
2c1a2784 2594 }
d38ceaf9
AD
2595
2596 /* init the mode config */
2597 drm_mode_config_init(adev->ddev);
2598
06ec9070 2599 r = amdgpu_device_ip_init(adev);
d38ceaf9 2600 if (r) {
8840a387 2601 /* failed in exclusive mode due to timeout */
2602 if (amdgpu_sriov_vf(adev) &&
2603 !amdgpu_sriov_runtime(adev) &&
2604 amdgpu_virt_mmio_blocked(adev) &&
2605 !amdgpu_virt_wait_reset(adev)) {
2606 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2607 /* Don't send request since VF is inactive. */
2608 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2609 adev->virt.ops = NULL;
8840a387 2610 r = -EAGAIN;
2611 goto failed;
2612 }
06ec9070 2613 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2614 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 2615 goto failed;
d38ceaf9
AD
2616 }
2617
2618 adev->accel_working = true;
2619
e59c0205
AX
2620 amdgpu_vm_check_compute_bug(adev);
2621
95844d20
MO
2622 /* Initialize the buffer migration limit. */
2623 if (amdgpu_moverate >= 0)
2624 max_MBps = amdgpu_moverate;
2625 else
2626 max_MBps = 8; /* Allow 8 MB/s. */
2627 /* Get a log2 for easy divisions. */
2628 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2629
d38ceaf9
AD
2630 r = amdgpu_ib_pool_init(adev);
2631 if (r) {
2632 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
e23b74aa 2633 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
83ba126a 2634 goto failed;
d38ceaf9
AD
2635 }
2636
9bc92b9c
ML
2637 amdgpu_fbdev_init(adev);
2638
d2f52ac8
RZ
2639 r = amdgpu_pm_sysfs_init(adev);
2640 if (r)
2641 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2642
75758255 2643 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2644 if (r)
d38ceaf9 2645 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2646
2647 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2648 if (r)
d38ceaf9 2649 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2650
50ab2533 2651 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2652 if (r)
50ab2533 2653 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2654
763efb6c 2655 r = amdgpu_debugfs_init(adev);
db95e218 2656 if (r)
763efb6c 2657 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2658
d38ceaf9
AD
2659 if ((amdgpu_testing & 1)) {
2660 if (adev->accel_working)
2661 amdgpu_test_moves(adev);
2662 else
2663 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2664 }
d38ceaf9
AD
2665 if (amdgpu_benchmarking) {
2666 if (adev->accel_working)
2667 amdgpu_benchmark(adev, amdgpu_benchmarking);
2668 else
2669 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2670 }
2671
2672 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2673 * explicit gating rather than handling it automatically.
2674 */
06ec9070 2675 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2676 if (r) {
06ec9070 2677 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2678 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2679 goto failed;
2c1a2784 2680 }
d38ceaf9
AD
2681
2682 return 0;
83ba126a
AD
2683
2684failed:
89041940 2685 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2686 if (runtime)
2687 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2688
83ba126a 2689 return r;
d38ceaf9
AD
2690}
2691
d38ceaf9
AD
2692/**
2693 * amdgpu_device_fini - tear down the driver
2694 *
2695 * @adev: amdgpu_device pointer
2696 *
2697 * Tear down the driver info (all asics).
2698 * Called at driver shutdown.
2699 */
2700void amdgpu_device_fini(struct amdgpu_device *adev)
2701{
2702 int r;
2703
2704 DRM_INFO("amdgpu: finishing device.\n");
2705 adev->shutdown = true;
e5b03032
ML
2706 /* disable all interrupts */
2707 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2708 if (adev->mode_info.mode_config_initialized){
2709 if (!amdgpu_device_has_dc_support(adev))
2710 drm_crtc_force_disable_all(adev->ddev);
2711 else
2712 drm_atomic_helper_shutdown(adev->ddev);
2713 }
d38ceaf9
AD
2714 amdgpu_ib_pool_fini(adev);
2715 amdgpu_fence_driver_fini(adev);
58e955d9 2716 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2717 amdgpu_fbdev_fini(adev);
06ec9070 2718 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2719 if (adev->firmware.gpu_info_fw) {
2720 release_firmware(adev->firmware.gpu_info_fw);
2721 adev->firmware.gpu_info_fw = NULL;
2722 }
d38ceaf9 2723 adev->accel_working = false;
2dc80b00 2724 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2725 /* free i2c buses */
4562236b
HW
2726 if (!amdgpu_device_has_dc_support(adev))
2727 amdgpu_i2c_fini(adev);
bfca0289
SL
2728
2729 if (amdgpu_emu_mode != 1)
2730 amdgpu_atombios_fini(adev);
2731
d38ceaf9
AD
2732 kfree(adev->bios);
2733 adev->bios = NULL;
84c8b22e
LW
2734 if (!pci_is_thunderbolt_attached(adev->pdev))
2735 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2736 if (adev->flags & AMD_IS_PX)
2737 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2738 vga_client_register(adev->pdev, NULL, NULL, NULL);
2739 if (adev->rio_mem)
2740 pci_iounmap(adev->pdev, adev->rio_mem);
2741 adev->rio_mem = NULL;
2742 iounmap(adev->rmmio);
2743 adev->rmmio = NULL;
06ec9070 2744 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2745 amdgpu_debugfs_regs_cleanup(adev);
d38ceaf9
AD
2746}
2747
2748
2749/*
2750 * Suspend & resume.
2751 */
2752/**
810ddc3a 2753 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2754 *
87e3f136
DP
2755 * @dev: drm dev pointer
2756 * @suspend: suspend state
2757 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2758 *
2759 * Puts the hw in the suspend state (all asics).
2760 * Returns 0 for success or an error on failure.
2761 * Called at driver suspend.
2762 */
810ddc3a 2763int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2764{
2765 struct amdgpu_device *adev;
2766 struct drm_crtc *crtc;
2767 struct drm_connector *connector;
5ceb54c6 2768 int r;
d38ceaf9
AD
2769
2770 if (dev == NULL || dev->dev_private == NULL) {
2771 return -ENODEV;
2772 }
2773
2774 adev = dev->dev_private;
2775
2776 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2777 return 0;
2778
44779b43 2779 adev->in_suspend = true;
d38ceaf9
AD
2780 drm_kms_helper_poll_disable(dev);
2781
5f818173
S
2782 if (fbcon)
2783 amdgpu_fbdev_set_suspend(adev, 1);
2784
a5459475
RZ
2785 cancel_delayed_work_sync(&adev->late_init_work);
2786
4562236b
HW
2787 if (!amdgpu_device_has_dc_support(adev)) {
2788 /* turn off display hw */
2789 drm_modeset_lock_all(dev);
2790 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2791 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2792 }
2793 drm_modeset_unlock_all(dev);
fe1053b7
AD
2794 /* unpin the front buffers and cursors */
2795 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2796 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2797 struct drm_framebuffer *fb = crtc->primary->fb;
2798 struct amdgpu_bo *robj;
2799
91334223 2800 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2801 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2802 r = amdgpu_bo_reserve(aobj, true);
2803 if (r == 0) {
2804 amdgpu_bo_unpin(aobj);
2805 amdgpu_bo_unreserve(aobj);
2806 }
756e6880 2807 }
756e6880 2808
fe1053b7
AD
2809 if (fb == NULL || fb->obj[0] == NULL) {
2810 continue;
2811 }
2812 robj = gem_to_amdgpu_bo(fb->obj[0]);
2813 /* don't unpin kernel fb objects */
2814 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2815 r = amdgpu_bo_reserve(robj, true);
2816 if (r == 0) {
2817 amdgpu_bo_unpin(robj);
2818 amdgpu_bo_unreserve(robj);
2819 }
d38ceaf9
AD
2820 }
2821 }
2822 }
fe1053b7
AD
2823
2824 amdgpu_amdkfd_suspend(adev);
2825
2826 r = amdgpu_device_ip_suspend_phase1(adev);
2827
d38ceaf9
AD
2828 /* evict vram memory */
2829 amdgpu_bo_evict_vram(adev);
2830
5ceb54c6 2831 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2832
fe1053b7 2833 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2834
a0a71e49
AD
2835 /* evict remaining vram memory
2836 * This second call to evict vram is to evict the gart page table
2837 * using the CPU.
2838 */
d38ceaf9
AD
2839 amdgpu_bo_evict_vram(adev);
2840
2841 pci_save_state(dev->pdev);
2842 if (suspend) {
2843 /* Shut down the device */
2844 pci_disable_device(dev->pdev);
2845 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2846 } else {
2847 r = amdgpu_asic_reset(adev);
2848 if (r)
2849 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2850 }
2851
d38ceaf9
AD
2852 return 0;
2853}
2854
2855/**
810ddc3a 2856 * amdgpu_device_resume - initiate device resume
d38ceaf9 2857 *
87e3f136
DP
2858 * @dev: drm dev pointer
2859 * @resume: resume state
2860 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2861 *
2862 * Bring the hw back to operating state (all asics).
2863 * Returns 0 for success or an error on failure.
2864 * Called at driver resume.
2865 */
810ddc3a 2866int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2867{
2868 struct drm_connector *connector;
2869 struct amdgpu_device *adev = dev->dev_private;
756e6880 2870 struct drm_crtc *crtc;
03161a6e 2871 int r = 0;
d38ceaf9
AD
2872
2873 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2874 return 0;
2875
d38ceaf9
AD
2876 if (resume) {
2877 pci_set_power_state(dev->pdev, PCI_D0);
2878 pci_restore_state(dev->pdev);
74b0b157 2879 r = pci_enable_device(dev->pdev);
03161a6e 2880 if (r)
4d3b9ae5 2881 return r;
d38ceaf9
AD
2882 }
2883
2884 /* post card */
39c640c0 2885 if (amdgpu_device_need_post(adev)) {
74b0b157 2886 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2887 if (r)
2888 DRM_ERROR("amdgpu asic init failed\n");
2889 }
d38ceaf9 2890
06ec9070 2891 r = amdgpu_device_ip_resume(adev);
e6707218 2892 if (r) {
06ec9070 2893 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2894 return r;
e6707218 2895 }
5ceb54c6
AD
2896 amdgpu_fence_driver_resume(adev);
2897
d38ceaf9 2898
06ec9070 2899 r = amdgpu_device_ip_late_init(adev);
03161a6e 2900 if (r)
4d3b9ae5 2901 return r;
d38ceaf9 2902
fe1053b7
AD
2903 if (!amdgpu_device_has_dc_support(adev)) {
2904 /* pin cursors */
2905 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2906 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2907
91334223 2908 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2909 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2910 r = amdgpu_bo_reserve(aobj, true);
2911 if (r == 0) {
2912 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2913 if (r != 0)
2914 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2915 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2916 amdgpu_bo_unreserve(aobj);
2917 }
756e6880
AD
2918 }
2919 }
2920 }
ba997709
YZ
2921 r = amdgpu_amdkfd_resume(adev);
2922 if (r)
2923 return r;
756e6880 2924
96a5d8d4
LL
2925 /* Make sure IB tests flushed */
2926 flush_delayed_work(&adev->late_init_work);
2927
d38ceaf9
AD
2928 /* blat the mode back in */
2929 if (fbcon) {
4562236b
HW
2930 if (!amdgpu_device_has_dc_support(adev)) {
2931 /* pre DCE11 */
2932 drm_helper_resume_force_mode(dev);
2933
2934 /* turn on display hw */
2935 drm_modeset_lock_all(dev);
2936 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2937 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2938 }
2939 drm_modeset_unlock_all(dev);
d38ceaf9 2940 }
4d3b9ae5 2941 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
2942 }
2943
2944 drm_kms_helper_poll_enable(dev);
23a1a9e5
L
2945
2946 /*
2947 * Most of the connector probing functions try to acquire runtime pm
2948 * refs to ensure that the GPU is powered on when connector polling is
2949 * performed. Since we're calling this from a runtime PM callback,
2950 * trying to acquire rpm refs will cause us to deadlock.
2951 *
2952 * Since we're guaranteed to be holding the rpm lock, it's safe to
2953 * temporarily disable the rpm helpers so this doesn't deadlock us.
2954 */
2955#ifdef CONFIG_PM
2956 dev->dev->power.disable_depth++;
2957#endif
4562236b
HW
2958 if (!amdgpu_device_has_dc_support(adev))
2959 drm_helper_hpd_irq_event(dev);
2960 else
2961 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
2962#ifdef CONFIG_PM
2963 dev->dev->power.disable_depth--;
2964#endif
44779b43
RZ
2965 adev->in_suspend = false;
2966
4d3b9ae5 2967 return 0;
d38ceaf9
AD
2968}
2969
e3ecdffa
AD
2970/**
2971 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2972 *
2973 * @adev: amdgpu_device pointer
2974 *
2975 * The list of all the hardware IPs that make up the asic is walked and
2976 * the check_soft_reset callbacks are run. check_soft_reset determines
2977 * if the asic is still hung or not.
2978 * Returns true if any of the IPs are still in a hung state, false if not.
2979 */
06ec9070 2980static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
2981{
2982 int i;
2983 bool asic_hang = false;
2984
f993d628
ML
2985 if (amdgpu_sriov_vf(adev))
2986 return true;
2987
8bc04c29
AD
2988 if (amdgpu_asic_need_full_reset(adev))
2989 return true;
2990
63fbf42f 2991 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2992 if (!adev->ip_blocks[i].status.valid)
63fbf42f 2993 continue;
a1255107
AD
2994 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2995 adev->ip_blocks[i].status.hang =
2996 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
2997 if (adev->ip_blocks[i].status.hang) {
2998 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
2999 asic_hang = true;
3000 }
3001 }
3002 return asic_hang;
3003}
3004
e3ecdffa
AD
3005/**
3006 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3007 *
3008 * @adev: amdgpu_device pointer
3009 *
3010 * The list of all the hardware IPs that make up the asic is walked and the
3011 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3012 * handles any IP specific hardware or software state changes that are
3013 * necessary for a soft reset to succeed.
3014 * Returns 0 on success, negative error code on failure.
3015 */
06ec9070 3016static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3017{
3018 int i, r = 0;
3019
3020 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3021 if (!adev->ip_blocks[i].status.valid)
d31a501e 3022 continue;
a1255107
AD
3023 if (adev->ip_blocks[i].status.hang &&
3024 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3025 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3026 if (r)
3027 return r;
3028 }
3029 }
3030
3031 return 0;
3032}
3033
e3ecdffa
AD
3034/**
3035 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3036 *
3037 * @adev: amdgpu_device pointer
3038 *
3039 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3040 * reset is necessary to recover.
3041 * Returns true if a full asic reset is required, false if not.
3042 */
06ec9070 3043static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3044{
da146d3b
AD
3045 int i;
3046
8bc04c29
AD
3047 if (amdgpu_asic_need_full_reset(adev))
3048 return true;
3049
da146d3b 3050 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3051 if (!adev->ip_blocks[i].status.valid)
da146d3b 3052 continue;
a1255107
AD
3053 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3054 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3055 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3056 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3057 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3058 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3059 DRM_INFO("Some block need full reset!\n");
3060 return true;
3061 }
3062 }
35d782fe
CZ
3063 }
3064 return false;
3065}
3066
e3ecdffa
AD
3067/**
3068 * amdgpu_device_ip_soft_reset - do a soft reset
3069 *
3070 * @adev: amdgpu_device pointer
3071 *
3072 * The list of all the hardware IPs that make up the asic is walked and the
3073 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3074 * IP specific hardware or software state changes that are necessary to soft
3075 * reset the IP.
3076 * Returns 0 on success, negative error code on failure.
3077 */
06ec9070 3078static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3079{
3080 int i, r = 0;
3081
3082 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3083 if (!adev->ip_blocks[i].status.valid)
35d782fe 3084 continue;
a1255107
AD
3085 if (adev->ip_blocks[i].status.hang &&
3086 adev->ip_blocks[i].version->funcs->soft_reset) {
3087 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3088 if (r)
3089 return r;
3090 }
3091 }
3092
3093 return 0;
3094}
3095
e3ecdffa
AD
3096/**
3097 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3098 *
3099 * @adev: amdgpu_device pointer
3100 *
3101 * The list of all the hardware IPs that make up the asic is walked and the
3102 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3103 * handles any IP specific hardware or software state changes that are
3104 * necessary after the IP has been soft reset.
3105 * Returns 0 on success, negative error code on failure.
3106 */
06ec9070 3107static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3108{
3109 int i, r = 0;
3110
3111 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3112 if (!adev->ip_blocks[i].status.valid)
35d782fe 3113 continue;
a1255107
AD
3114 if (adev->ip_blocks[i].status.hang &&
3115 adev->ip_blocks[i].version->funcs->post_soft_reset)
3116 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3117 if (r)
3118 return r;
3119 }
3120
3121 return 0;
3122}
3123
e3ecdffa 3124/**
c33adbc7 3125 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3126 *
3127 * @adev: amdgpu_device pointer
3128 *
3129 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3130 * restore things like GPUVM page tables after a GPU reset where
3131 * the contents of VRAM might be lost.
403009bf
CK
3132 *
3133 * Returns:
3134 * 0 on success, negative error code on failure.
e3ecdffa 3135 */
c33adbc7 3136static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3137{
c41d1cf6 3138 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3139 struct amdgpu_bo *shadow;
3140 long r = 1, tmo;
c41d1cf6
ML
3141
3142 if (amdgpu_sriov_runtime(adev))
b045d3af 3143 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3144 else
3145 tmo = msecs_to_jiffies(100);
3146
3147 DRM_INFO("recover vram bo from shadow start\n");
3148 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3149 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3150
3151 /* No need to recover an evicted BO */
3152 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3153 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3154 continue;
3155
3156 r = amdgpu_bo_restore_shadow(shadow, &next);
3157 if (r)
3158 break;
3159
c41d1cf6
ML
3160 if (fence) {
3161 r = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3162 dma_fence_put(fence);
3163 fence = next;
3164 if (r <= 0)
c41d1cf6 3165 break;
403009bf
CK
3166 } else {
3167 fence = next;
c41d1cf6 3168 }
c41d1cf6
ML
3169 }
3170 mutex_unlock(&adev->shadow_list_lock);
3171
403009bf
CK
3172 if (fence)
3173 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3174 dma_fence_put(fence);
3175
403009bf 3176 if (r <= 0 || tmo <= 0) {
c41d1cf6 3177 DRM_ERROR("recover vram bo from shadow failed\n");
403009bf
CK
3178 return -EIO;
3179 }
c41d1cf6 3180
403009bf
CK
3181 DRM_INFO("recover vram bo from shadow done\n");
3182 return 0;
c41d1cf6
ML
3183}
3184
a90ad3c2 3185
e3ecdffa 3186/**
06ec9070 3187 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3188 *
3189 * @adev: amdgpu device pointer
87e3f136 3190 * @from_hypervisor: request from hypervisor
5740682e
ML
3191 *
3192 * do VF FLR and reinitialize Asic
3f48c681 3193 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3194 */
3195static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3196 bool from_hypervisor)
5740682e
ML
3197{
3198 int r;
3199
3200 if (from_hypervisor)
3201 r = amdgpu_virt_request_full_gpu(adev, true);
3202 else
3203 r = amdgpu_virt_reset_gpu(adev);
3204 if (r)
3205 return r;
a90ad3c2
ML
3206
3207 /* Resume IP prior to SMC */
06ec9070 3208 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3209 if (r)
3210 goto error;
a90ad3c2
ML
3211
3212 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3213 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3214
7a3e0bb2
RZ
3215 r = amdgpu_device_fw_loading(adev);
3216 if (r)
3217 return r;
3218
a90ad3c2 3219 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3220 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3221 if (r)
3222 goto error;
a90ad3c2
ML
3223
3224 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3225 r = amdgpu_ib_ring_tests(adev);
a90ad3c2 3226
abc34253 3227error:
d3c117e5 3228 amdgpu_virt_init_data_exchange(adev);
abc34253 3229 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3230 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3231 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3232 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3233 }
3234
3235 return r;
3236}
3237
12938fad
CK
3238/**
3239 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3240 *
3241 * @adev: amdgpu device pointer
3242 *
3243 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3244 * a hung GPU.
3245 */
3246bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3247{
3248 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3249 DRM_INFO("Timeout, but no hardware hang detected.\n");
3250 return false;
3251 }
3252
3ba7b418
AG
3253 if (amdgpu_gpu_recovery == 0)
3254 goto disabled;
3255
3256 if (amdgpu_sriov_vf(adev))
3257 return true;
3258
3259 if (amdgpu_gpu_recovery == -1) {
3260 switch (adev->asic_type) {
fc42d47c
AG
3261 case CHIP_BONAIRE:
3262 case CHIP_HAWAII:
3ba7b418
AG
3263 case CHIP_TOPAZ:
3264 case CHIP_TONGA:
3265 case CHIP_FIJI:
3266 case CHIP_POLARIS10:
3267 case CHIP_POLARIS11:
3268 case CHIP_POLARIS12:
3269 case CHIP_VEGAM:
3270 case CHIP_VEGA20:
3271 case CHIP_VEGA10:
3272 case CHIP_VEGA12:
3273 break;
3274 default:
3275 goto disabled;
3276 }
12938fad
CK
3277 }
3278
3279 return true;
3ba7b418
AG
3280
3281disabled:
3282 DRM_INFO("GPU recovery disabled.\n");
3283 return false;
12938fad
CK
3284}
3285
5c6dd71e 3286
26bc5340
AG
3287static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3288 struct amdgpu_job *job,
3289 bool *need_full_reset_arg)
3290{
3291 int i, r = 0;
3292 bool need_full_reset = *need_full_reset_arg;
71182665 3293
71182665 3294 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3295 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3296 struct amdgpu_ring *ring = adev->rings[i];
3297
51687759 3298 if (!ring || !ring->sched.thread)
0875dc9e 3299 continue;
5740682e 3300
71182665
ML
3301 kthread_park(ring->sched.thread);
3302
734afd4b 3303 if (job && job->base.sched != &ring->sched)
5740682e
ML
3304 continue;
3305
67ccea60 3306 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
5740682e 3307
2f9d4084
ML
3308 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3309 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3310 }
d38ceaf9 3311
26bc5340
AG
3312
3313
3314 if (!amdgpu_sriov_vf(adev)) {
3315
3316 if (!need_full_reset)
3317 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3318
3319 if (!need_full_reset) {
3320 amdgpu_device_ip_pre_soft_reset(adev);
3321 r = amdgpu_device_ip_soft_reset(adev);
3322 amdgpu_device_ip_post_soft_reset(adev);
3323 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3324 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3325 need_full_reset = true;
3326 }
3327 }
3328
3329 if (need_full_reset)
3330 r = amdgpu_device_ip_suspend(adev);
3331
3332 *need_full_reset_arg = need_full_reset;
3333 }
3334
3335 return r;
3336}
3337
3338static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3339 struct list_head *device_list_handle,
3340 bool *need_full_reset_arg)
3341{
3342 struct amdgpu_device *tmp_adev = NULL;
3343 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3344 int r = 0;
3345
3346 /*
3347 * ASIC reset has to be done on all HGMI hive nodes ASAP
3348 * to allow proper links negotiation in FW (within 1 sec)
3349 */
3350 if (need_full_reset) {
3351 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3352 /* For XGMI run all resets in parallel to speed up the process */
3353 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3354 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3355 r = -EALREADY;
3356 } else
3357 r = amdgpu_asic_reset(tmp_adev);
3358
3359 if (r) {
3360 DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
26bc5340 3361 r, tmp_adev->ddev->unique);
d4535e2c
AG
3362 break;
3363 }
3364 }
3365
3366 /* For XGMI wait for all PSP resets to complete before proceed */
3367 if (!r) {
3368 list_for_each_entry(tmp_adev, device_list_handle,
3369 gmc.xgmi.head) {
3370 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3371 flush_work(&tmp_adev->xgmi_reset_work);
3372 r = tmp_adev->asic_reset_res;
3373 if (r)
3374 break;
3375 }
3376 }
26bc5340
AG
3377 }
3378 }
3379
3380
3381 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3382 if (need_full_reset) {
3383 /* post card */
3384 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3385 DRM_WARN("asic atom init failed!");
3386
3387 if (!r) {
3388 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3389 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3390 if (r)
3391 goto out;
3392
3393 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3394 if (vram_lost) {
3395 DRM_ERROR("VRAM is lost!\n");
3396 atomic_inc(&tmp_adev->vram_lost_counter);
3397 }
3398
3399 r = amdgpu_gtt_mgr_recover(
3400 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3401 if (r)
3402 goto out;
3403
3404 r = amdgpu_device_fw_loading(tmp_adev);
3405 if (r)
3406 return r;
3407
3408 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3409 if (r)
3410 goto out;
3411
3412 if (vram_lost)
3413 amdgpu_device_fill_reset_magic(tmp_adev);
3414
3415 /* Update PSP FW topology after reset */
3416 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3417 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3418 }
3419 }
3420
3421
3422out:
3423 if (!r) {
3424 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3425 r = amdgpu_ib_ring_tests(tmp_adev);
3426 if (r) {
3427 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3428 r = amdgpu_device_ip_suspend(tmp_adev);
3429 need_full_reset = true;
3430 r = -EAGAIN;
3431 goto end;
3432 }
3433 }
3434
3435 if (!r)
3436 r = amdgpu_device_recover_vram(tmp_adev);
3437 else
3438 tmp_adev->asic_reset_res = r;
3439 }
3440
3441end:
3442 *need_full_reset_arg = need_full_reset;
3443 return r;
3444}
3445
3446static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
3447 struct amdgpu_job *job)
3448{
3449 int i;
5740682e 3450
71182665
ML
3451 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3452 struct amdgpu_ring *ring = adev->rings[i];
53cdccd5 3453
71182665
ML
3454 if (!ring || !ring->sched.thread)
3455 continue;
5740682e 3456
71182665
ML
3457 /* only need recovery sched of the given job's ring
3458 * or all rings (in the case @job is NULL)
3459 * after above amdgpu_reset accomplished
3460 */
26bc5340 3461 if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
1b1f42d8 3462 drm_sched_job_recovery(&ring->sched);
5740682e 3463
71182665 3464 kthread_unpark(ring->sched.thread);
d38ceaf9
AD
3465 }
3466
bf830604 3467 if (!amdgpu_device_has_dc_support(adev)) {
4562236b 3468 drm_helper_resume_force_mode(adev->ddev);
5740682e 3469 }
d38ceaf9 3470
26bc5340
AG
3471 adev->asic_reset_res = 0;
3472}
5740682e 3473
26bc5340
AG
3474static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
3475{
3476 mutex_lock(&adev->lock_reset);
3477 atomic_inc(&adev->gpu_reset_counter);
3478 adev->in_gpu_reset = 1;
7b184b00 3479 /* Block kfd: SRIOV would do it separately */
3480 if (!amdgpu_sriov_vf(adev))
3481 amdgpu_amdkfd_pre_reset(adev);
26bc5340 3482}
d38ceaf9 3483
26bc5340
AG
3484static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3485{
7b184b00 3486 /*unlock kfd: SRIOV would do it separately */
3487 if (!amdgpu_sriov_vf(adev))
3488 amdgpu_amdkfd_post_reset(adev);
89041940 3489 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3490 adev->in_gpu_reset = 0;
3491 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3492}
3493
3494
3495/**
3496 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3497 *
3498 * @adev: amdgpu device pointer
3499 * @job: which job trigger hang
3500 *
3501 * Attempt to reset the GPU if it has hung (all asics).
3502 * Attempt to do soft-reset or full-reset and reinitialize Asic
3503 * Returns 0 for success or an error on failure.
3504 */
3505
3506int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3507 struct amdgpu_job *job)
3508{
3509 int r;
3510 struct amdgpu_hive_info *hive = NULL;
3511 bool need_full_reset = false;
3512 struct amdgpu_device *tmp_adev = NULL;
3513 struct list_head device_list, *device_list_handle = NULL;
3514
3515 INIT_LIST_HEAD(&device_list);
3516
3517 dev_info(adev->dev, "GPU reset begin!\n");
3518
3519 /*
3520 * In case of XGMI hive disallow concurrent resets to be triggered
3521 * by different nodes. No point also since the one node already executing
3522 * reset will also reset all the other nodes in the hive.
3523 */
3524 hive = amdgpu_get_xgmi_hive(adev);
3525 if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
3526 !mutex_trylock(&hive->hive_lock))
3527 return 0;
3528
3529 /* Start with adev pre asic reset first for soft reset check.*/
3530 amdgpu_device_lock_adev(adev);
3531 r = amdgpu_device_pre_asic_reset(adev,
3532 job,
3533 &need_full_reset);
3534 if (r) {
3535 /*TODO Should we stop ?*/
3536 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3537 r, adev->ddev->unique);
3538 adev->asic_reset_res = r;
3539 }
3540
3541 /* Build list of devices to reset */
3542 if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
3543 if (!hive) {
3544 amdgpu_device_unlock_adev(adev);
3545 return -ENODEV;
3546 }
3547
3548 /*
3549 * In case we are in XGMI hive mode device reset is done for all the
3550 * nodes in the hive to retrain all XGMI links and hence the reset
3551 * sequence is executed in loop on all nodes.
3552 */
3553 device_list_handle = &hive->device_list;
3554 } else {
3555 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3556 device_list_handle = &device_list;
3557 }
3558
3559retry: /* Rest of adevs pre asic reset from XGMI hive. */
3560 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3561
3562 if (tmp_adev == adev)
3563 continue;
3564
26bc5340
AG
3565 amdgpu_device_lock_adev(tmp_adev);
3566 r = amdgpu_device_pre_asic_reset(tmp_adev,
3567 NULL,
3568 &need_full_reset);
3569 /*TODO Should we stop ?*/
3570 if (r) {
3571 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3572 r, tmp_adev->ddev->unique);
3573 tmp_adev->asic_reset_res = r;
3574 }
3575 }
3576
3577 /* Actual ASIC resets if needed.*/
3578 /* TODO Implement XGMI hive reset logic for SRIOV */
3579 if (amdgpu_sriov_vf(adev)) {
3580 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3581 if (r)
3582 adev->asic_reset_res = r;
3583 } else {
3584 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3585 if (r && r == -EAGAIN)
3586 goto retry;
3587 }
3588
3589 /* Post ASIC reset for all devs .*/
3590 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3591 amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
3592
3593 if (r) {
3594 /* bad news, how to tell it to userspace ? */
3595 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3596 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3597 } else {
3598 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3599 }
3600
3601 amdgpu_device_unlock_adev(tmp_adev);
3602 }
3603
3604 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
3605 mutex_unlock(&hive->hive_lock);
3606
3607 if (r)
3608 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3609 return r;
3610}
3611
e3ecdffa
AD
3612/**
3613 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3614 *
3615 * @adev: amdgpu_device pointer
3616 *
3617 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3618 * and lanes) of the slot the device is in. Handles APUs and
3619 * virtualized environments where PCIE config space may not be available.
3620 */
5494d864 3621static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3622{
5d9a6330
AD
3623 struct pci_dev *pdev;
3624 enum pci_bus_speed speed_cap;
3625 enum pcie_link_width link_width;
d0dd7f0c 3626
cd474ba0
AD
3627 if (amdgpu_pcie_gen_cap)
3628 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3629
cd474ba0
AD
3630 if (amdgpu_pcie_lane_cap)
3631 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3632
cd474ba0
AD
3633 /* covers APUs as well */
3634 if (pci_is_root_bus(adev->pdev->bus)) {
3635 if (adev->pm.pcie_gen_mask == 0)
3636 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3637 if (adev->pm.pcie_mlw_mask == 0)
3638 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3639 return;
cd474ba0 3640 }
d0dd7f0c 3641
cd474ba0 3642 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3643 /* asic caps */
3644 pdev = adev->pdev;
3645 speed_cap = pcie_get_speed_cap(pdev);
3646 if (speed_cap == PCI_SPEED_UNKNOWN) {
3647 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3648 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3649 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3650 } else {
5d9a6330
AD
3651 if (speed_cap == PCIE_SPEED_16_0GT)
3652 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3653 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3654 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3655 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3656 else if (speed_cap == PCIE_SPEED_8_0GT)
3657 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3658 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3659 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3660 else if (speed_cap == PCIE_SPEED_5_0GT)
3661 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3662 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3663 else
3664 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3665 }
3666 /* platform caps */
3667 pdev = adev->ddev->pdev->bus->self;
3668 speed_cap = pcie_get_speed_cap(pdev);
3669 if (speed_cap == PCI_SPEED_UNKNOWN) {
3670 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3671 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3672 } else {
3673 if (speed_cap == PCIE_SPEED_16_0GT)
3674 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3675 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3676 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3677 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3678 else if (speed_cap == PCIE_SPEED_8_0GT)
3679 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3680 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3681 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3682 else if (speed_cap == PCIE_SPEED_5_0GT)
3683 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3684 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3685 else
3686 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3687
cd474ba0
AD
3688 }
3689 }
3690 if (adev->pm.pcie_mlw_mask == 0) {
5d9a6330
AD
3691 pdev = adev->ddev->pdev->bus->self;
3692 link_width = pcie_get_width_cap(pdev);
3693 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3694 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3695 } else {
3696 switch (link_width) {
3697 case PCIE_LNK_X32:
cd474ba0
AD
3698 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3699 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3700 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3701 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3702 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3703 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3704 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3705 break;
5d9a6330 3706 case PCIE_LNK_X16:
cd474ba0
AD
3707 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3708 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3709 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3710 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3711 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3712 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3713 break;
5d9a6330 3714 case PCIE_LNK_X12:
cd474ba0
AD
3715 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3716 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3717 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3718 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3719 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3720 break;
5d9a6330 3721 case PCIE_LNK_X8:
cd474ba0
AD
3722 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3724 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3725 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3726 break;
5d9a6330 3727 case PCIE_LNK_X4:
cd474ba0
AD
3728 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3729 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3730 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3731 break;
5d9a6330 3732 case PCIE_LNK_X2:
cd474ba0
AD
3733 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3734 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3735 break;
5d9a6330 3736 case PCIE_LNK_X1:
cd474ba0
AD
3737 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3738 break;
3739 default:
3740 break;
3741 }
d0dd7f0c
AD
3742 }
3743 }
3744}
d38ceaf9 3745