drm/amdgpu: Expose hive adev list and xgmi_mutex
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9
AD
32#include <drm/drmP.h>
33#include <drm/drm_crtc_helper.h>
4562236b 34#include <drm/drm_atomic_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
5183411b
AG
62#include "amdgpu_xgmi.h"
63
e2a75f88 64MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 65MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 66MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 67MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 68MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 69
2dc80b00
S
70#define AMDGPU_RESUME_MS 2000
71
d38ceaf9 72static const char *amdgpu_asic_name[] = {
da69c161
KW
73 "TAHITI",
74 "PITCAIRN",
75 "VERDE",
76 "OLAND",
77 "HAINAN",
d38ceaf9
AD
78 "BONAIRE",
79 "KAVERI",
80 "KABINI",
81 "HAWAII",
82 "MULLINS",
83 "TOPAZ",
84 "TONGA",
48299f95 85 "FIJI",
d38ceaf9 86 "CARRIZO",
139f4917 87 "STONEY",
2cc0c0b5
FC
88 "POLARIS10",
89 "POLARIS11",
c4642a47 90 "POLARIS12",
48ff108d 91 "VEGAM",
d4196f01 92 "VEGA10",
8fab806a 93 "VEGA12",
956fcddc 94 "VEGA20",
2ca8a5d2 95 "RAVEN",
d38ceaf9
AD
96 "LAST",
97};
98
5494d864
AD
99static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
100
e3ecdffa
AD
101/**
102 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
103 *
104 * @dev: drm_device pointer
105 *
106 * Returns true if the device is a dGPU with HG/PX power control,
107 * otherwise return false.
108 */
d38ceaf9
AD
109bool amdgpu_device_is_px(struct drm_device *dev)
110{
111 struct amdgpu_device *adev = dev->dev_private;
112
2f7d10b3 113 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
114 return true;
115 return false;
116}
117
118/*
119 * MMIO register access helper functions.
120 */
e3ecdffa
AD
121/**
122 * amdgpu_mm_rreg - read a memory mapped IO register
123 *
124 * @adev: amdgpu_device pointer
125 * @reg: dword aligned register offset
126 * @acc_flags: access flags which require special behavior
127 *
128 * Returns the 32 bit value from the offset specified.
129 */
d38ceaf9 130uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 131 uint32_t acc_flags)
d38ceaf9 132{
f4b373f4
TSD
133 uint32_t ret;
134
43ca8efa 135 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 136 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 137
15d72fd7 138 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 139 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
140 else {
141 unsigned long flags;
d38ceaf9
AD
142
143 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
144 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
145 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
146 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 147 }
f4b373f4
TSD
148 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
149 return ret;
d38ceaf9
AD
150}
151
421a2a30
ML
152/*
153 * MMIO register read with bytes helper functions
154 * @offset:bytes offset from MMIO start
155 *
156*/
157
e3ecdffa
AD
158/**
159 * amdgpu_mm_rreg8 - read a memory mapped IO register
160 *
161 * @adev: amdgpu_device pointer
162 * @offset: byte aligned register offset
163 *
164 * Returns the 8 bit value from the offset specified.
165 */
421a2a30
ML
166uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
167 if (offset < adev->rmmio_size)
168 return (readb(adev->rmmio + offset));
169 BUG();
170}
171
172/*
173 * MMIO register write with bytes helper functions
174 * @offset:bytes offset from MMIO start
175 * @value: the value want to be written to the register
176 *
177*/
e3ecdffa
AD
178/**
179 * amdgpu_mm_wreg8 - read a memory mapped IO register
180 *
181 * @adev: amdgpu_device pointer
182 * @offset: byte aligned register offset
183 * @value: 8 bit value to write
184 *
185 * Writes the value specified to the offset specified.
186 */
421a2a30
ML
187void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
188 if (offset < adev->rmmio_size)
189 writeb(value, adev->rmmio + offset);
190 else
191 BUG();
192}
193
e3ecdffa
AD
194/**
195 * amdgpu_mm_wreg - write to a memory mapped IO register
196 *
197 * @adev: amdgpu_device pointer
198 * @reg: dword aligned register offset
199 * @v: 32 bit value to write to the register
200 * @acc_flags: access flags which require special behavior
201 *
202 * Writes the value specified to the offset specified.
203 */
d38ceaf9 204void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 205 uint32_t acc_flags)
d38ceaf9 206{
f4b373f4 207 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 208
47ed4e1c
KW
209 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
210 adev->last_mm_index = v;
211 }
212
43ca8efa 213 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 214 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 215
15d72fd7 216 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
217 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
218 else {
219 unsigned long flags;
220
221 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
222 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
223 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
224 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
225 }
47ed4e1c
KW
226
227 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
228 udelay(500);
229 }
d38ceaf9
AD
230}
231
e3ecdffa
AD
232/**
233 * amdgpu_io_rreg - read an IO register
234 *
235 * @adev: amdgpu_device pointer
236 * @reg: dword aligned register offset
237 *
238 * Returns the 32 bit value from the offset specified.
239 */
d38ceaf9
AD
240u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
241{
242 if ((reg * 4) < adev->rio_mem_size)
243 return ioread32(adev->rio_mem + (reg * 4));
244 else {
245 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
246 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
247 }
248}
249
e3ecdffa
AD
250/**
251 * amdgpu_io_wreg - write to an IO register
252 *
253 * @adev: amdgpu_device pointer
254 * @reg: dword aligned register offset
255 * @v: 32 bit value to write to the register
256 *
257 * Writes the value specified to the offset specified.
258 */
d38ceaf9
AD
259void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
260{
47ed4e1c
KW
261 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
262 adev->last_mm_index = v;
263 }
d38ceaf9
AD
264
265 if ((reg * 4) < adev->rio_mem_size)
266 iowrite32(v, adev->rio_mem + (reg * 4));
267 else {
268 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
269 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
270 }
47ed4e1c
KW
271
272 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
273 udelay(500);
274 }
d38ceaf9
AD
275}
276
277/**
278 * amdgpu_mm_rdoorbell - read a doorbell dword
279 *
280 * @adev: amdgpu_device pointer
281 * @index: doorbell index
282 *
283 * Returns the value in the doorbell aperture at the
284 * requested doorbell index (CIK).
285 */
286u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
287{
288 if (index < adev->doorbell.num_doorbells) {
289 return readl(adev->doorbell.ptr + index);
290 } else {
291 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
292 return 0;
293 }
294}
295
296/**
297 * amdgpu_mm_wdoorbell - write a doorbell dword
298 *
299 * @adev: amdgpu_device pointer
300 * @index: doorbell index
301 * @v: value to write
302 *
303 * Writes @v to the doorbell aperture at the
304 * requested doorbell index (CIK).
305 */
306void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
307{
308 if (index < adev->doorbell.num_doorbells) {
309 writel(v, adev->doorbell.ptr + index);
310 } else {
311 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
312 }
313}
314
832be404
KW
315/**
316 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
317 *
318 * @adev: amdgpu_device pointer
319 * @index: doorbell index
320 *
321 * Returns the value in the doorbell aperture at the
322 * requested doorbell index (VEGA10+).
323 */
324u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
325{
326 if (index < adev->doorbell.num_doorbells) {
327 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
328 } else {
329 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
330 return 0;
331 }
332}
333
334/**
335 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
336 *
337 * @adev: amdgpu_device pointer
338 * @index: doorbell index
339 * @v: value to write
340 *
341 * Writes @v to the doorbell aperture at the
342 * requested doorbell index (VEGA10+).
343 */
344void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
345{
346 if (index < adev->doorbell.num_doorbells) {
347 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
348 } else {
349 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
350 }
351}
352
d38ceaf9
AD
353/**
354 * amdgpu_invalid_rreg - dummy reg read function
355 *
356 * @adev: amdgpu device pointer
357 * @reg: offset of register
358 *
359 * Dummy register read function. Used for register blocks
360 * that certain asics don't have (all asics).
361 * Returns the value in the register.
362 */
363static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
364{
365 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
366 BUG();
367 return 0;
368}
369
370/**
371 * amdgpu_invalid_wreg - dummy reg write function
372 *
373 * @adev: amdgpu device pointer
374 * @reg: offset of register
375 * @v: value to write to the register
376 *
377 * Dummy register read function. Used for register blocks
378 * that certain asics don't have (all asics).
379 */
380static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
381{
382 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
383 reg, v);
384 BUG();
385}
386
387/**
388 * amdgpu_block_invalid_rreg - dummy reg read function
389 *
390 * @adev: amdgpu device pointer
391 * @block: offset of instance
392 * @reg: offset of register
393 *
394 * Dummy register read function. Used for register blocks
395 * that certain asics don't have (all asics).
396 * Returns the value in the register.
397 */
398static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
399 uint32_t block, uint32_t reg)
400{
401 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
402 reg, block);
403 BUG();
404 return 0;
405}
406
407/**
408 * amdgpu_block_invalid_wreg - dummy reg write function
409 *
410 * @adev: amdgpu device pointer
411 * @block: offset of instance
412 * @reg: offset of register
413 * @v: value to write to the register
414 *
415 * Dummy register read function. Used for register blocks
416 * that certain asics don't have (all asics).
417 */
418static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
419 uint32_t block,
420 uint32_t reg, uint32_t v)
421{
422 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
423 reg, block, v);
424 BUG();
425}
426
e3ecdffa
AD
427/**
428 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
429 *
430 * @adev: amdgpu device pointer
431 *
432 * Allocates a scratch page of VRAM for use by various things in the
433 * driver.
434 */
06ec9070 435static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 436{
a4a02777
CK
437 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
438 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
439 &adev->vram_scratch.robj,
440 &adev->vram_scratch.gpu_addr,
441 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
442}
443
e3ecdffa
AD
444/**
445 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
446 *
447 * @adev: amdgpu device pointer
448 *
449 * Frees the VRAM scratch page.
450 */
06ec9070 451static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 452{
078af1a3 453 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
454}
455
456/**
9c3f2b54 457 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
458 *
459 * @adev: amdgpu_device pointer
460 * @registers: pointer to the register array
461 * @array_size: size of the register array
462 *
463 * Programs an array or registers with and and or masks.
464 * This is a helper for setting golden registers.
465 */
9c3f2b54
AD
466void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
467 const u32 *registers,
468 const u32 array_size)
d38ceaf9
AD
469{
470 u32 tmp, reg, and_mask, or_mask;
471 int i;
472
473 if (array_size % 3)
474 return;
475
476 for (i = 0; i < array_size; i +=3) {
477 reg = registers[i + 0];
478 and_mask = registers[i + 1];
479 or_mask = registers[i + 2];
480
481 if (and_mask == 0xffffffff) {
482 tmp = or_mask;
483 } else {
484 tmp = RREG32(reg);
485 tmp &= ~and_mask;
486 tmp |= or_mask;
487 }
488 WREG32(reg, tmp);
489 }
490}
491
e3ecdffa
AD
492/**
493 * amdgpu_device_pci_config_reset - reset the GPU
494 *
495 * @adev: amdgpu_device pointer
496 *
497 * Resets the GPU using the pci config reset sequence.
498 * Only applicable to asics prior to vega10.
499 */
8111c387 500void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
501{
502 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
503}
504
505/*
506 * GPU doorbell aperture helpers function.
507 */
508/**
06ec9070 509 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
510 *
511 * @adev: amdgpu_device pointer
512 *
513 * Init doorbell driver information (CIK)
514 * Returns 0 on success, error on failure.
515 */
06ec9070 516static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 517{
6585661d
OZ
518 amdgpu_asic_init_doorbell_index(adev);
519
705e519e
CK
520 /* No doorbell on SI hardware generation */
521 if (adev->asic_type < CHIP_BONAIRE) {
522 adev->doorbell.base = 0;
523 adev->doorbell.size = 0;
524 adev->doorbell.num_doorbells = 0;
525 adev->doorbell.ptr = NULL;
526 return 0;
527 }
528
d6895ad3
CK
529 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
530 return -EINVAL;
531
d38ceaf9
AD
532 /* doorbell bar mapping */
533 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
534 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
535
edf600da 536 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 537 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
538 if (adev->doorbell.num_doorbells == 0)
539 return -EINVAL;
540
ec3db8a6
PY
541 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
542 * paging queue doorbell use the second page
543 */
544 if (adev->asic_type >= CHIP_VEGA10)
545 adev->doorbell.num_doorbells *= 2;
546
8972e5d2
CK
547 adev->doorbell.ptr = ioremap(adev->doorbell.base,
548 adev->doorbell.num_doorbells *
549 sizeof(u32));
550 if (adev->doorbell.ptr == NULL)
d38ceaf9 551 return -ENOMEM;
d38ceaf9
AD
552
553 return 0;
554}
555
556/**
06ec9070 557 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
558 *
559 * @adev: amdgpu_device pointer
560 *
561 * Tear down doorbell driver information (CIK)
562 */
06ec9070 563static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
564{
565 iounmap(adev->doorbell.ptr);
566 adev->doorbell.ptr = NULL;
567}
568
22cb0164 569
d38ceaf9
AD
570
571/*
06ec9070 572 * amdgpu_device_wb_*()
455a7bc2 573 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 574 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
575 */
576
577/**
06ec9070 578 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
579 *
580 * @adev: amdgpu_device pointer
581 *
582 * Disables Writeback and frees the Writeback memory (all asics).
583 * Used at driver shutdown.
584 */
06ec9070 585static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
586{
587 if (adev->wb.wb_obj) {
a76ed485
AD
588 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
589 &adev->wb.gpu_addr,
590 (void **)&adev->wb.wb);
d38ceaf9
AD
591 adev->wb.wb_obj = NULL;
592 }
593}
594
595/**
06ec9070 596 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
597 *
598 * @adev: amdgpu_device pointer
599 *
455a7bc2 600 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
601 * Used at driver startup.
602 * Returns 0 on success or an -error on failure.
603 */
06ec9070 604static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
605{
606 int r;
607
608 if (adev->wb.wb_obj == NULL) {
97407b63
AD
609 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
610 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
611 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
612 &adev->wb.wb_obj, &adev->wb.gpu_addr,
613 (void **)&adev->wb.wb);
d38ceaf9
AD
614 if (r) {
615 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
616 return r;
617 }
d38ceaf9
AD
618
619 adev->wb.num_wb = AMDGPU_MAX_WB;
620 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
621
622 /* clear wb memory */
73469585 623 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
624 }
625
626 return 0;
627}
628
629/**
131b4b36 630 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
631 *
632 * @adev: amdgpu_device pointer
633 * @wb: wb index
634 *
635 * Allocate a wb slot for use by the driver (all asics).
636 * Returns 0 on success or -EINVAL on failure.
637 */
131b4b36 638int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
639{
640 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 641
97407b63 642 if (offset < adev->wb.num_wb) {
7014285a 643 __set_bit(offset, adev->wb.used);
63ae07ca 644 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
645 return 0;
646 } else {
647 return -EINVAL;
648 }
649}
650
d38ceaf9 651/**
131b4b36 652 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
653 *
654 * @adev: amdgpu_device pointer
655 * @wb: wb index
656 *
657 * Free a wb slot allocated for use by the driver (all asics)
658 */
131b4b36 659void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 660{
73469585 661 wb >>= 3;
d38ceaf9 662 if (wb < adev->wb.num_wb)
73469585 663 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
664}
665
d6895ad3
CK
666/**
667 * amdgpu_device_resize_fb_bar - try to resize FB BAR
668 *
669 * @adev: amdgpu_device pointer
670 *
671 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
672 * to fail, but if any of the BARs is not accessible after the size we abort
673 * driver loading by returning -ENODEV.
674 */
675int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
676{
770d13b1 677 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 678 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
679 struct pci_bus *root;
680 struct resource *res;
681 unsigned i;
d6895ad3
CK
682 u16 cmd;
683 int r;
684
0c03b912 685 /* Bypass for VF */
686 if (amdgpu_sriov_vf(adev))
687 return 0;
688
31b8adab
CK
689 /* Check if the root BUS has 64bit memory resources */
690 root = adev->pdev->bus;
691 while (root->parent)
692 root = root->parent;
693
694 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 695 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
696 res->start > 0x100000000ull)
697 break;
698 }
699
700 /* Trying to resize is pointless without a root hub window above 4GB */
701 if (!res)
702 return 0;
703
d6895ad3
CK
704 /* Disable memory decoding while we change the BAR addresses and size */
705 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
706 pci_write_config_word(adev->pdev, PCI_COMMAND,
707 cmd & ~PCI_COMMAND_MEMORY);
708
709 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 710 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
711 if (adev->asic_type >= CHIP_BONAIRE)
712 pci_release_resource(adev->pdev, 2);
713
714 pci_release_resource(adev->pdev, 0);
715
716 r = pci_resize_resource(adev->pdev, 0, rbar_size);
717 if (r == -ENOSPC)
718 DRM_INFO("Not enough PCI address space for a large BAR.");
719 else if (r && r != -ENOTSUPP)
720 DRM_ERROR("Problem resizing BAR0 (%d).", r);
721
722 pci_assign_unassigned_bus_resources(adev->pdev->bus);
723
724 /* When the doorbell or fb BAR isn't available we have no chance of
725 * using the device.
726 */
06ec9070 727 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
728 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
729 return -ENODEV;
730
731 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
732
733 return 0;
734}
a05502e5 735
d38ceaf9
AD
736/*
737 * GPU helpers function.
738 */
739/**
39c640c0 740 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
741 *
742 * @adev: amdgpu_device pointer
743 *
c836fec5
JQ
744 * Check if the asic has been initialized (all asics) at driver startup
745 * or post is needed if hw reset is performed.
746 * Returns true if need or false if not.
d38ceaf9 747 */
39c640c0 748bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
749{
750 uint32_t reg;
751
bec86378
ML
752 if (amdgpu_sriov_vf(adev))
753 return false;
754
755 if (amdgpu_passthrough(adev)) {
1da2c326
ML
756 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
757 * some old smc fw still need driver do vPost otherwise gpu hang, while
758 * those smc fw version above 22.15 doesn't have this flaw, so we force
759 * vpost executed for smc version below 22.15
bec86378
ML
760 */
761 if (adev->asic_type == CHIP_FIJI) {
762 int err;
763 uint32_t fw_ver;
764 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
765 /* force vPost if error occured */
766 if (err)
767 return true;
768
769 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
770 if (fw_ver < 0x00160e00)
771 return true;
bec86378 772 }
bec86378 773 }
91fe77eb 774
775 if (adev->has_hw_reset) {
776 adev->has_hw_reset = false;
777 return true;
778 }
779
780 /* bios scratch used on CIK+ */
781 if (adev->asic_type >= CHIP_BONAIRE)
782 return amdgpu_atombios_scratch_need_asic_init(adev);
783
784 /* check MEM_SIZE for older asics */
785 reg = amdgpu_asic_get_config_memsize(adev);
786
787 if ((reg != 0) && (reg != 0xffffffff))
788 return false;
789
790 return true;
bec86378
ML
791}
792
d38ceaf9
AD
793/* if we get transitioned to only one device, take VGA back */
794/**
06ec9070 795 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
796 *
797 * @cookie: amdgpu_device pointer
798 * @state: enable/disable vga decode
799 *
800 * Enable/disable vga decode (all asics).
801 * Returns VGA resource flags.
802 */
06ec9070 803static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
804{
805 struct amdgpu_device *adev = cookie;
806 amdgpu_asic_set_vga_state(adev, state);
807 if (state)
808 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
809 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
810 else
811 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
812}
813
e3ecdffa
AD
814/**
815 * amdgpu_device_check_block_size - validate the vm block size
816 *
817 * @adev: amdgpu_device pointer
818 *
819 * Validates the vm block size specified via module parameter.
820 * The vm block size defines number of bits in page table versus page directory,
821 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
822 * page table and the remaining bits are in the page directory.
823 */
06ec9070 824static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
825{
826 /* defines number of bits in page table versus page directory,
827 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
828 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
829 if (amdgpu_vm_block_size == -1)
830 return;
a1adf8be 831
bab4fee7 832 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
833 dev_warn(adev->dev, "VM page table size (%d) too small\n",
834 amdgpu_vm_block_size);
97489129 835 amdgpu_vm_block_size = -1;
a1adf8be 836 }
a1adf8be
CZ
837}
838
e3ecdffa
AD
839/**
840 * amdgpu_device_check_vm_size - validate the vm size
841 *
842 * @adev: amdgpu_device pointer
843 *
844 * Validates the vm size in GB specified via module parameter.
845 * The VM size is the size of the GPU virtual memory space in GB.
846 */
06ec9070 847static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 848{
64dab074
AD
849 /* no need to check the default value */
850 if (amdgpu_vm_size == -1)
851 return;
852
83ca145d
ZJ
853 if (amdgpu_vm_size < 1) {
854 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
855 amdgpu_vm_size);
f3368128 856 amdgpu_vm_size = -1;
83ca145d 857 }
83ca145d
ZJ
858}
859
7951e376
RZ
860static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
861{
862 struct sysinfo si;
863 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
864 uint64_t total_memory;
865 uint64_t dram_size_seven_GB = 0x1B8000000;
866 uint64_t dram_size_three_GB = 0xB8000000;
867
868 if (amdgpu_smu_memory_pool_size == 0)
869 return;
870
871 if (!is_os_64) {
872 DRM_WARN("Not 64-bit OS, feature not supported\n");
873 goto def_value;
874 }
875 si_meminfo(&si);
876 total_memory = (uint64_t)si.totalram * si.mem_unit;
877
878 if ((amdgpu_smu_memory_pool_size == 1) ||
879 (amdgpu_smu_memory_pool_size == 2)) {
880 if (total_memory < dram_size_three_GB)
881 goto def_value1;
882 } else if ((amdgpu_smu_memory_pool_size == 4) ||
883 (amdgpu_smu_memory_pool_size == 8)) {
884 if (total_memory < dram_size_seven_GB)
885 goto def_value1;
886 } else {
887 DRM_WARN("Smu memory pool size not supported\n");
888 goto def_value;
889 }
890 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
891
892 return;
893
894def_value1:
895 DRM_WARN("No enough system memory\n");
896def_value:
897 adev->pm.smu_prv_buffer_size = 0;
898}
899
d38ceaf9 900/**
06ec9070 901 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
902 *
903 * @adev: amdgpu_device pointer
904 *
905 * Validates certain module parameters and updates
906 * the associated values used by the driver (all asics).
907 */
06ec9070 908static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 909{
5b011235
CZ
910 if (amdgpu_sched_jobs < 4) {
911 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
912 amdgpu_sched_jobs);
913 amdgpu_sched_jobs = 4;
76117507 914 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
915 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
916 amdgpu_sched_jobs);
917 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
918 }
d38ceaf9 919
83e74db6 920 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
921 /* gart size must be greater or equal to 32M */
922 dev_warn(adev->dev, "gart size (%d) too small\n",
923 amdgpu_gart_size);
83e74db6 924 amdgpu_gart_size = -1;
d38ceaf9
AD
925 }
926
36d38372 927 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 928 /* gtt size must be greater or equal to 32M */
36d38372
CK
929 dev_warn(adev->dev, "gtt size (%d) too small\n",
930 amdgpu_gtt_size);
931 amdgpu_gtt_size = -1;
d38ceaf9
AD
932 }
933
d07f14be
RH
934 /* valid range is between 4 and 9 inclusive */
935 if (amdgpu_vm_fragment_size != -1 &&
936 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
937 dev_warn(adev->dev, "valid range is between 4 and 9\n");
938 amdgpu_vm_fragment_size = -1;
939 }
940
7951e376
RZ
941 amdgpu_device_check_smu_prv_buffer_size(adev);
942
06ec9070 943 amdgpu_device_check_vm_size(adev);
d38ceaf9 944
06ec9070 945 amdgpu_device_check_block_size(adev);
6a7f76e7 946
526bae37 947 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 948 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
949 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
950 amdgpu_vram_page_split);
951 amdgpu_vram_page_split = 1024;
952 }
8854695a
AG
953
954 if (amdgpu_lockup_timeout == 0) {
955 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
956 amdgpu_lockup_timeout = 10000;
957 }
19aede77
AD
958
959 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
d38ceaf9
AD
960}
961
962/**
963 * amdgpu_switcheroo_set_state - set switcheroo state
964 *
965 * @pdev: pci dev pointer
1694467b 966 * @state: vga_switcheroo state
d38ceaf9
AD
967 *
968 * Callback for the switcheroo driver. Suspends or resumes the
969 * the asics before or after it is powered up using ACPI methods.
970 */
971static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
972{
973 struct drm_device *dev = pci_get_drvdata(pdev);
974
975 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
976 return;
977
978 if (state == VGA_SWITCHEROO_ON) {
7ca85295 979 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
980 /* don't suspend or resume card normally */
981 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
982
810ddc3a 983 amdgpu_device_resume(dev, true, true);
d38ceaf9 984
d38ceaf9
AD
985 dev->switch_power_state = DRM_SWITCH_POWER_ON;
986 drm_kms_helper_poll_enable(dev);
987 } else {
7ca85295 988 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
989 drm_kms_helper_poll_disable(dev);
990 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 991 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
992 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
993 }
994}
995
996/**
997 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
998 *
999 * @pdev: pci dev pointer
1000 *
1001 * Callback for the switcheroo driver. Check of the switcheroo
1002 * state can be changed.
1003 * Returns true if the state can be changed, false if not.
1004 */
1005static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1006{
1007 struct drm_device *dev = pci_get_drvdata(pdev);
1008
1009 /*
1010 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1011 * locking inversion with the driver load path. And the access here is
1012 * completely racy anyway. So don't bother with locking for now.
1013 */
1014 return dev->open_count == 0;
1015}
1016
1017static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1018 .set_gpu_state = amdgpu_switcheroo_set_state,
1019 .reprobe = NULL,
1020 .can_switch = amdgpu_switcheroo_can_switch,
1021};
1022
e3ecdffa
AD
1023/**
1024 * amdgpu_device_ip_set_clockgating_state - set the CG state
1025 *
87e3f136 1026 * @dev: amdgpu_device pointer
e3ecdffa
AD
1027 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1028 * @state: clockgating state (gate or ungate)
1029 *
1030 * Sets the requested clockgating state for all instances of
1031 * the hardware IP specified.
1032 * Returns the error code from the last instance.
1033 */
43fa561f 1034int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1035 enum amd_ip_block_type block_type,
1036 enum amd_clockgating_state state)
d38ceaf9 1037{
43fa561f 1038 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1039 int i, r = 0;
1040
1041 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1042 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1043 continue;
c722865a
RZ
1044 if (adev->ip_blocks[i].version->type != block_type)
1045 continue;
1046 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1047 continue;
1048 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1049 (void *)adev, state);
1050 if (r)
1051 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1052 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1053 }
1054 return r;
1055}
1056
e3ecdffa
AD
1057/**
1058 * amdgpu_device_ip_set_powergating_state - set the PG state
1059 *
87e3f136 1060 * @dev: amdgpu_device pointer
e3ecdffa
AD
1061 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1062 * @state: powergating state (gate or ungate)
1063 *
1064 * Sets the requested powergating state for all instances of
1065 * the hardware IP specified.
1066 * Returns the error code from the last instance.
1067 */
43fa561f 1068int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1069 enum amd_ip_block_type block_type,
1070 enum amd_powergating_state state)
d38ceaf9 1071{
43fa561f 1072 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1073 int i, r = 0;
1074
1075 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1076 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1077 continue;
c722865a
RZ
1078 if (adev->ip_blocks[i].version->type != block_type)
1079 continue;
1080 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1081 continue;
1082 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1083 (void *)adev, state);
1084 if (r)
1085 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1086 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1087 }
1088 return r;
1089}
1090
e3ecdffa
AD
1091/**
1092 * amdgpu_device_ip_get_clockgating_state - get the CG state
1093 *
1094 * @adev: amdgpu_device pointer
1095 * @flags: clockgating feature flags
1096 *
1097 * Walks the list of IPs on the device and updates the clockgating
1098 * flags for each IP.
1099 * Updates @flags with the feature flags for each hardware IP where
1100 * clockgating is enabled.
1101 */
2990a1fc
AD
1102void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1103 u32 *flags)
6cb2d4e4
HR
1104{
1105 int i;
1106
1107 for (i = 0; i < adev->num_ip_blocks; i++) {
1108 if (!adev->ip_blocks[i].status.valid)
1109 continue;
1110 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1111 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1112 }
1113}
1114
e3ecdffa
AD
1115/**
1116 * amdgpu_device_ip_wait_for_idle - wait for idle
1117 *
1118 * @adev: amdgpu_device pointer
1119 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1120 *
1121 * Waits for the request hardware IP to be idle.
1122 * Returns 0 for success or a negative error code on failure.
1123 */
2990a1fc
AD
1124int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1125 enum amd_ip_block_type block_type)
5dbbb60b
AD
1126{
1127 int i, r;
1128
1129 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1130 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1131 continue;
a1255107
AD
1132 if (adev->ip_blocks[i].version->type == block_type) {
1133 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1134 if (r)
1135 return r;
1136 break;
1137 }
1138 }
1139 return 0;
1140
1141}
1142
e3ecdffa
AD
1143/**
1144 * amdgpu_device_ip_is_idle - is the hardware IP idle
1145 *
1146 * @adev: amdgpu_device pointer
1147 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1148 *
1149 * Check if the hardware IP is idle or not.
1150 * Returns true if it the IP is idle, false if not.
1151 */
2990a1fc
AD
1152bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1153 enum amd_ip_block_type block_type)
5dbbb60b
AD
1154{
1155 int i;
1156
1157 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1158 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1159 continue;
a1255107
AD
1160 if (adev->ip_blocks[i].version->type == block_type)
1161 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1162 }
1163 return true;
1164
1165}
1166
e3ecdffa
AD
1167/**
1168 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1169 *
1170 * @adev: amdgpu_device pointer
87e3f136 1171 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1172 *
1173 * Returns a pointer to the hardware IP block structure
1174 * if it exists for the asic, otherwise NULL.
1175 */
2990a1fc
AD
1176struct amdgpu_ip_block *
1177amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1178 enum amd_ip_block_type type)
d38ceaf9
AD
1179{
1180 int i;
1181
1182 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1183 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1184 return &adev->ip_blocks[i];
1185
1186 return NULL;
1187}
1188
1189/**
2990a1fc 1190 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1191 *
1192 * @adev: amdgpu_device pointer
5fc3aeeb 1193 * @type: enum amd_ip_block_type
d38ceaf9
AD
1194 * @major: major version
1195 * @minor: minor version
1196 *
1197 * return 0 if equal or greater
1198 * return 1 if smaller or the ip_block doesn't exist
1199 */
2990a1fc
AD
1200int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1201 enum amd_ip_block_type type,
1202 u32 major, u32 minor)
d38ceaf9 1203{
2990a1fc 1204 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1205
a1255107
AD
1206 if (ip_block && ((ip_block->version->major > major) ||
1207 ((ip_block->version->major == major) &&
1208 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1209 return 0;
1210
1211 return 1;
1212}
1213
a1255107 1214/**
2990a1fc 1215 * amdgpu_device_ip_block_add
a1255107
AD
1216 *
1217 * @adev: amdgpu_device pointer
1218 * @ip_block_version: pointer to the IP to add
1219 *
1220 * Adds the IP block driver information to the collection of IPs
1221 * on the asic.
1222 */
2990a1fc
AD
1223int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1224 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1225{
1226 if (!ip_block_version)
1227 return -EINVAL;
1228
e966a725 1229 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1230 ip_block_version->funcs->name);
1231
a1255107
AD
1232 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1233
1234 return 0;
1235}
1236
e3ecdffa
AD
1237/**
1238 * amdgpu_device_enable_virtual_display - enable virtual display feature
1239 *
1240 * @adev: amdgpu_device pointer
1241 *
1242 * Enabled the virtual display feature if the user has enabled it via
1243 * the module parameter virtual_display. This feature provides a virtual
1244 * display hardware on headless boards or in virtualized environments.
1245 * This function parses and validates the configuration string specified by
1246 * the user and configues the virtual display configuration (number of
1247 * virtual connectors, crtcs, etc.) specified.
1248 */
483ef985 1249static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1250{
1251 adev->enable_virtual_display = false;
1252
1253 if (amdgpu_virtual_display) {
1254 struct drm_device *ddev = adev->ddev;
1255 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1256 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1257
1258 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1259 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1260 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1261 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1262 if (!strcmp("all", pciaddname)
1263 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1264 long num_crtc;
1265 int res = -1;
1266
9accf2fd 1267 adev->enable_virtual_display = true;
0f66356d
ED
1268
1269 if (pciaddname_tmp)
1270 res = kstrtol(pciaddname_tmp, 10,
1271 &num_crtc);
1272
1273 if (!res) {
1274 if (num_crtc < 1)
1275 num_crtc = 1;
1276 if (num_crtc > 6)
1277 num_crtc = 6;
1278 adev->mode_info.num_crtc = num_crtc;
1279 } else {
1280 adev->mode_info.num_crtc = 1;
1281 }
9accf2fd
ED
1282 break;
1283 }
1284 }
1285
0f66356d
ED
1286 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1287 amdgpu_virtual_display, pci_address_name,
1288 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1289
1290 kfree(pciaddstr);
1291 }
1292}
1293
e3ecdffa
AD
1294/**
1295 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1296 *
1297 * @adev: amdgpu_device pointer
1298 *
1299 * Parses the asic configuration parameters specified in the gpu info
1300 * firmware and makes them availale to the driver for use in configuring
1301 * the asic.
1302 * Returns 0 on success, -EINVAL on failure.
1303 */
e2a75f88
AD
1304static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1305{
e2a75f88
AD
1306 const char *chip_name;
1307 char fw_name[30];
1308 int err;
1309 const struct gpu_info_firmware_header_v1_0 *hdr;
1310
ab4fe3e1
HR
1311 adev->firmware.gpu_info_fw = NULL;
1312
e2a75f88
AD
1313 switch (adev->asic_type) {
1314 case CHIP_TOPAZ:
1315 case CHIP_TONGA:
1316 case CHIP_FIJI:
e2a75f88 1317 case CHIP_POLARIS10:
cc07f18d 1318 case CHIP_POLARIS11:
e2a75f88 1319 case CHIP_POLARIS12:
cc07f18d 1320 case CHIP_VEGAM:
e2a75f88
AD
1321 case CHIP_CARRIZO:
1322 case CHIP_STONEY:
1323#ifdef CONFIG_DRM_AMDGPU_SI
1324 case CHIP_VERDE:
1325 case CHIP_TAHITI:
1326 case CHIP_PITCAIRN:
1327 case CHIP_OLAND:
1328 case CHIP_HAINAN:
1329#endif
1330#ifdef CONFIG_DRM_AMDGPU_CIK
1331 case CHIP_BONAIRE:
1332 case CHIP_HAWAII:
1333 case CHIP_KAVERI:
1334 case CHIP_KABINI:
1335 case CHIP_MULLINS:
1336#endif
27c0bc71 1337 case CHIP_VEGA20:
e2a75f88
AD
1338 default:
1339 return 0;
1340 case CHIP_VEGA10:
1341 chip_name = "vega10";
1342 break;
3f76dced
AD
1343 case CHIP_VEGA12:
1344 chip_name = "vega12";
1345 break;
2d2e5e7e 1346 case CHIP_RAVEN:
54c4d17e
FX
1347 if (adev->rev_id >= 8)
1348 chip_name = "raven2";
741deade
AD
1349 else if (adev->pdev->device == 0x15d8)
1350 chip_name = "picasso";
54c4d17e
FX
1351 else
1352 chip_name = "raven";
2d2e5e7e 1353 break;
e2a75f88
AD
1354 }
1355
1356 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1357 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1358 if (err) {
1359 dev_err(adev->dev,
1360 "Failed to load gpu_info firmware \"%s\"\n",
1361 fw_name);
1362 goto out;
1363 }
ab4fe3e1 1364 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1365 if (err) {
1366 dev_err(adev->dev,
1367 "Failed to validate gpu_info firmware \"%s\"\n",
1368 fw_name);
1369 goto out;
1370 }
1371
ab4fe3e1 1372 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1373 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1374
1375 switch (hdr->version_major) {
1376 case 1:
1377 {
1378 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1379 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1380 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1381
b5ab16bf
AD
1382 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1383 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1384 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1385 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1386 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1387 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1388 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1389 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1390 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1391 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1392 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1393 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1394 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1395 adev->gfx.cu_info.max_waves_per_simd =
1396 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1397 adev->gfx.cu_info.max_scratch_slots_per_cu =
1398 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1399 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1400 break;
1401 }
1402 default:
1403 dev_err(adev->dev,
1404 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1405 err = -EINVAL;
1406 goto out;
1407 }
1408out:
e2a75f88
AD
1409 return err;
1410}
1411
e3ecdffa
AD
1412/**
1413 * amdgpu_device_ip_early_init - run early init for hardware IPs
1414 *
1415 * @adev: amdgpu_device pointer
1416 *
1417 * Early initialization pass for hardware IPs. The hardware IPs that make
1418 * up each asic are discovered each IP's early_init callback is run. This
1419 * is the first stage in initializing the asic.
1420 * Returns 0 on success, negative error code on failure.
1421 */
06ec9070 1422static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1423{
aaa36a97 1424 int i, r;
d38ceaf9 1425
483ef985 1426 amdgpu_device_enable_virtual_display(adev);
a6be7570 1427
d38ceaf9 1428 switch (adev->asic_type) {
aaa36a97
AD
1429 case CHIP_TOPAZ:
1430 case CHIP_TONGA:
48299f95 1431 case CHIP_FIJI:
2cc0c0b5 1432 case CHIP_POLARIS10:
32cc7e53 1433 case CHIP_POLARIS11:
c4642a47 1434 case CHIP_POLARIS12:
32cc7e53 1435 case CHIP_VEGAM:
aaa36a97 1436 case CHIP_CARRIZO:
39bb0c92
SL
1437 case CHIP_STONEY:
1438 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1439 adev->family = AMDGPU_FAMILY_CZ;
1440 else
1441 adev->family = AMDGPU_FAMILY_VI;
1442
1443 r = vi_set_ip_blocks(adev);
1444 if (r)
1445 return r;
1446 break;
33f34802
KW
1447#ifdef CONFIG_DRM_AMDGPU_SI
1448 case CHIP_VERDE:
1449 case CHIP_TAHITI:
1450 case CHIP_PITCAIRN:
1451 case CHIP_OLAND:
1452 case CHIP_HAINAN:
295d0daf 1453 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1454 r = si_set_ip_blocks(adev);
1455 if (r)
1456 return r;
1457 break;
1458#endif
a2e73f56
AD
1459#ifdef CONFIG_DRM_AMDGPU_CIK
1460 case CHIP_BONAIRE:
1461 case CHIP_HAWAII:
1462 case CHIP_KAVERI:
1463 case CHIP_KABINI:
1464 case CHIP_MULLINS:
1465 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1466 adev->family = AMDGPU_FAMILY_CI;
1467 else
1468 adev->family = AMDGPU_FAMILY_KV;
1469
1470 r = cik_set_ip_blocks(adev);
1471 if (r)
1472 return r;
1473 break;
1474#endif
e48a3cd9
AD
1475 case CHIP_VEGA10:
1476 case CHIP_VEGA12:
e4bd8170 1477 case CHIP_VEGA20:
e48a3cd9 1478 case CHIP_RAVEN:
741deade 1479 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1480 adev->family = AMDGPU_FAMILY_RV;
1481 else
1482 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1483
1484 r = soc15_set_ip_blocks(adev);
1485 if (r)
1486 return r;
1487 break;
d38ceaf9
AD
1488 default:
1489 /* FIXME: not supported yet */
1490 return -EINVAL;
1491 }
1492
e2a75f88
AD
1493 r = amdgpu_device_parse_gpu_info_fw(adev);
1494 if (r)
1495 return r;
1496
1884734a 1497 amdgpu_amdkfd_device_probe(adev);
1498
3149d9da
XY
1499 if (amdgpu_sriov_vf(adev)) {
1500 r = amdgpu_virt_request_full_gpu(adev, true);
1501 if (r)
5ffa61c1 1502 return -EAGAIN;
3149d9da
XY
1503 }
1504
00f54b97
HR
1505 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1506
d38ceaf9
AD
1507 for (i = 0; i < adev->num_ip_blocks; i++) {
1508 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1509 DRM_ERROR("disabled ip block: %d <%s>\n",
1510 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1511 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1512 } else {
a1255107
AD
1513 if (adev->ip_blocks[i].version->funcs->early_init) {
1514 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1515 if (r == -ENOENT) {
a1255107 1516 adev->ip_blocks[i].status.valid = false;
2c1a2784 1517 } else if (r) {
a1255107
AD
1518 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1519 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1520 return r;
2c1a2784 1521 } else {
a1255107 1522 adev->ip_blocks[i].status.valid = true;
2c1a2784 1523 }
974e6b64 1524 } else {
a1255107 1525 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1526 }
d38ceaf9
AD
1527 }
1528 }
1529
395d1fb9
NH
1530 adev->cg_flags &= amdgpu_cg_mask;
1531 adev->pg_flags &= amdgpu_pg_mask;
1532
d38ceaf9
AD
1533 return 0;
1534}
1535
0a4f2520
RZ
1536static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1537{
1538 int i, r;
1539
1540 for (i = 0; i < adev->num_ip_blocks; i++) {
1541 if (!adev->ip_blocks[i].status.sw)
1542 continue;
1543 if (adev->ip_blocks[i].status.hw)
1544 continue;
1545 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1546 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1547 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1548 if (r) {
1549 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1550 adev->ip_blocks[i].version->funcs->name, r);
1551 return r;
1552 }
1553 adev->ip_blocks[i].status.hw = true;
1554 }
1555 }
1556
1557 return 0;
1558}
1559
1560static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1561{
1562 int i, r;
1563
1564 for (i = 0; i < adev->num_ip_blocks; i++) {
1565 if (!adev->ip_blocks[i].status.sw)
1566 continue;
1567 if (adev->ip_blocks[i].status.hw)
1568 continue;
1569 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1570 if (r) {
1571 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1572 adev->ip_blocks[i].version->funcs->name, r);
1573 return r;
1574 }
1575 adev->ip_blocks[i].status.hw = true;
1576 }
1577
1578 return 0;
1579}
1580
7a3e0bb2
RZ
1581static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1582{
1583 int r = 0;
1584 int i;
1585
1586 if (adev->asic_type >= CHIP_VEGA10) {
1587 for (i = 0; i < adev->num_ip_blocks; i++) {
1588 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1589 if (adev->in_gpu_reset || adev->in_suspend) {
1590 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1591 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1592 r = adev->ip_blocks[i].version->funcs->resume(adev);
1593 if (r) {
1594 DRM_ERROR("resume of IP block <%s> failed %d\n",
1595 adev->ip_blocks[i].version->funcs->name, r);
1596 return r;
1597 }
1598 } else {
1599 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1600 if (r) {
1601 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1602 adev->ip_blocks[i].version->funcs->name, r);
1603 return r;
1604 }
1605 }
1606 adev->ip_blocks[i].status.hw = true;
1607 }
1608 }
1609 }
1610
91eec27e 1611 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1612 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1613 if (r) {
1614 pr_err("firmware loading failed\n");
1615 return r;
1616 }
1617 }
1618
1619 return 0;
1620}
1621
e3ecdffa
AD
1622/**
1623 * amdgpu_device_ip_init - run init for hardware IPs
1624 *
1625 * @adev: amdgpu_device pointer
1626 *
1627 * Main initialization pass for hardware IPs. The list of all the hardware
1628 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1629 * are run. sw_init initializes the software state associated with each IP
1630 * and hw_init initializes the hardware associated with each IP.
1631 * Returns 0 on success, negative error code on failure.
1632 */
06ec9070 1633static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1634{
1635 int i, r;
1636
1637 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1638 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1639 continue;
a1255107 1640 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1641 if (r) {
a1255107
AD
1642 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1643 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1644 return r;
2c1a2784 1645 }
a1255107 1646 adev->ip_blocks[i].status.sw = true;
bfca0289 1647
d38ceaf9 1648 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1649 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1650 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1651 if (r) {
1652 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
d38ceaf9 1653 return r;
2c1a2784 1654 }
a1255107 1655 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1656 if (r) {
1657 DRM_ERROR("hw_init %d failed %d\n", i, r);
d38ceaf9 1658 return r;
2c1a2784 1659 }
06ec9070 1660 r = amdgpu_device_wb_init(adev);
2c1a2784 1661 if (r) {
06ec9070 1662 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
d38ceaf9 1663 return r;
2c1a2784 1664 }
a1255107 1665 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1666
1667 /* right after GMC hw init, we create CSA */
1668 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1669 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1670 AMDGPU_GEM_DOMAIN_VRAM,
1671 AMDGPU_CSA_SIZE);
2493664f
ML
1672 if (r) {
1673 DRM_ERROR("allocate CSA failed %d\n", r);
1674 return r;
1675 }
1676 }
d38ceaf9
AD
1677 }
1678 }
1679
c8963ea4
RZ
1680 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1681 if (r)
1682 return r;
0a4f2520
RZ
1683
1684 r = amdgpu_device_ip_hw_init_phase1(adev);
1685 if (r)
1686 return r;
1687
7a3e0bb2
RZ
1688 r = amdgpu_device_fw_loading(adev);
1689 if (r)
1690 return r;
1691
0a4f2520
RZ
1692 r = amdgpu_device_ip_hw_init_phase2(adev);
1693 if (r)
1694 return r;
d38ceaf9 1695
3e2e2ab5
HZ
1696 if (adev->gmc.xgmi.num_physical_nodes > 1)
1697 amdgpu_xgmi_add_device(adev);
1884734a 1698 amdgpu_amdkfd_device_init(adev);
c6332b97 1699
1700 if (amdgpu_sriov_vf(adev))
1701 amdgpu_virt_release_full_gpu(adev, true);
1702
d38ceaf9
AD
1703 return 0;
1704}
1705
e3ecdffa
AD
1706/**
1707 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1708 *
1709 * @adev: amdgpu_device pointer
1710 *
1711 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1712 * this function before a GPU reset. If the value is retained after a
1713 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1714 */
06ec9070 1715static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1716{
1717 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1718}
1719
e3ecdffa
AD
1720/**
1721 * amdgpu_device_check_vram_lost - check if vram is valid
1722 *
1723 * @adev: amdgpu_device pointer
1724 *
1725 * Checks the reset magic value written to the gart pointer in VRAM.
1726 * The driver calls this after a GPU reset to see if the contents of
1727 * VRAM is lost or now.
1728 * returns true if vram is lost, false if not.
1729 */
06ec9070 1730static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1731{
1732 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1733 AMDGPU_RESET_MAGIC_NUM);
1734}
1735
e3ecdffa 1736/**
1112a46b 1737 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1738 *
1739 * @adev: amdgpu_device pointer
1740 *
e3ecdffa 1741 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1742 * set_clockgating_state callbacks are run.
1743 * Late initialization pass enabling clockgating for hardware IPs.
1744 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1745 * Returns 0 on success, negative error code on failure.
1746 */
fdd34271 1747
1112a46b
RZ
1748static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1749 enum amd_clockgating_state state)
d38ceaf9 1750{
1112a46b 1751 int i, j, r;
d38ceaf9 1752
4a2ba394
SL
1753 if (amdgpu_emu_mode == 1)
1754 return 0;
1755
1112a46b
RZ
1756 for (j = 0; j < adev->num_ip_blocks; j++) {
1757 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1758 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1759 continue;
4a446d55 1760 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1761 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1762 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1763 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1764 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1765 /* enable clockgating to save power */
a1255107 1766 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1767 state);
4a446d55
AD
1768 if (r) {
1769 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1770 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1771 return r;
1772 }
b0b00ff1 1773 }
d38ceaf9 1774 }
06b18f61 1775
c9f96fd5
RZ
1776 return 0;
1777}
1778
1112a46b 1779static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1780{
1112a46b 1781 int i, j, r;
06b18f61 1782
c9f96fd5
RZ
1783 if (amdgpu_emu_mode == 1)
1784 return 0;
1785
1112a46b
RZ
1786 for (j = 0; j < adev->num_ip_blocks; j++) {
1787 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1788 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1789 continue;
1790 /* skip CG for VCE/UVD, it's handled specially */
1791 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1792 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1793 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1794 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1795 /* enable powergating to save power */
1796 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1797 state);
c9f96fd5
RZ
1798 if (r) {
1799 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1800 adev->ip_blocks[i].version->funcs->name, r);
1801 return r;
1802 }
1803 }
1804 }
2dc80b00
S
1805 return 0;
1806}
1807
e3ecdffa
AD
1808/**
1809 * amdgpu_device_ip_late_init - run late init for hardware IPs
1810 *
1811 * @adev: amdgpu_device pointer
1812 *
1813 * Late initialization pass for hardware IPs. The list of all the hardware
1814 * IPs that make up the asic is walked and the late_init callbacks are run.
1815 * late_init covers any special initialization that an IP requires
1816 * after all of the have been initialized or something that needs to happen
1817 * late in the init process.
1818 * Returns 0 on success, negative error code on failure.
1819 */
06ec9070 1820static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1821{
1822 int i = 0, r;
1823
1824 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1825 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1826 continue;
1827 if (adev->ip_blocks[i].version->funcs->late_init) {
1828 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1829 if (r) {
1830 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1831 adev->ip_blocks[i].version->funcs->name, r);
1832 return r;
1833 }
2dc80b00 1834 }
73f847db 1835 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1836 }
1837
1112a46b
RZ
1838 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1839 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1840
2c773de2
S
1841 queue_delayed_work(system_wq, &adev->late_init_work,
1842 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1843
06ec9070 1844 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1845
1846 return 0;
1847}
1848
e3ecdffa
AD
1849/**
1850 * amdgpu_device_ip_fini - run fini for hardware IPs
1851 *
1852 * @adev: amdgpu_device pointer
1853 *
1854 * Main teardown pass for hardware IPs. The list of all the hardware
1855 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1856 * are run. hw_fini tears down the hardware associated with each IP
1857 * and sw_fini tears down any software state associated with each IP.
1858 * Returns 0 on success, negative error code on failure.
1859 */
06ec9070 1860static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1861{
1862 int i, r;
1863
1884734a 1864 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1865
1866 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1867 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1868
3e96dbfd
AD
1869 /* need to disable SMC first */
1870 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1871 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1872 continue;
fdd34271 1873 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1874 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1875 /* XXX handle errors */
1876 if (r) {
1877 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1878 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1879 }
a1255107 1880 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1881 break;
1882 }
1883 }
1884
d38ceaf9 1885 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1886 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1887 continue;
8201a67a 1888
a1255107 1889 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1890 /* XXX handle errors */
2c1a2784 1891 if (r) {
a1255107
AD
1892 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1893 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1894 }
8201a67a 1895
a1255107 1896 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1897 }
1898
9950cda2 1899
d38ceaf9 1900 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1901 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1902 continue;
c12aba3a
ML
1903
1904 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1905 amdgpu_ucode_free_bo(adev);
1e256e27 1906 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1907 amdgpu_device_wb_fini(adev);
1908 amdgpu_device_vram_scratch_fini(adev);
1909 }
1910
a1255107 1911 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1912 /* XXX handle errors */
2c1a2784 1913 if (r) {
a1255107
AD
1914 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1915 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1916 }
a1255107
AD
1917 adev->ip_blocks[i].status.sw = false;
1918 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1919 }
1920
a6dcfd9c 1921 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1922 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1923 continue;
a1255107
AD
1924 if (adev->ip_blocks[i].version->funcs->late_fini)
1925 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1926 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1927 }
1928
030308fc 1929 if (amdgpu_sriov_vf(adev))
24136135
ML
1930 if (amdgpu_virt_release_full_gpu(adev, false))
1931 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 1932
d38ceaf9
AD
1933 return 0;
1934}
1935
b55c9e7a
EQ
1936static int amdgpu_device_enable_mgpu_fan_boost(void)
1937{
1938 struct amdgpu_gpu_instance *gpu_ins;
1939 struct amdgpu_device *adev;
1940 int i, ret = 0;
1941
1942 mutex_lock(&mgpu_info.mutex);
1943
1944 /*
1945 * MGPU fan boost feature should be enabled
1946 * only when there are two or more dGPUs in
1947 * the system
1948 */
1949 if (mgpu_info.num_dgpu < 2)
1950 goto out;
1951
1952 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1953 gpu_ins = &(mgpu_info.gpu_ins[i]);
1954 adev = gpu_ins->adev;
1955 if (!(adev->flags & AMD_IS_APU) &&
1956 !gpu_ins->mgpu_fan_enabled &&
1957 adev->powerplay.pp_funcs &&
1958 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1959 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1960 if (ret)
1961 break;
1962
1963 gpu_ins->mgpu_fan_enabled = 1;
1964 }
1965 }
1966
1967out:
1968 mutex_unlock(&mgpu_info.mutex);
1969
1970 return ret;
1971}
1972
e3ecdffa 1973/**
1112a46b 1974 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 1975 *
1112a46b 1976 * @work: work_struct.
e3ecdffa 1977 */
06ec9070 1978static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
1979{
1980 struct amdgpu_device *adev =
1981 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
1982 int r;
1983
1984 r = amdgpu_ib_ring_tests(adev);
1985 if (r)
1986 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
1987
1988 r = amdgpu_device_enable_mgpu_fan_boost();
1989 if (r)
1990 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2dc80b00
S
1991}
1992
1e317b99
RZ
1993static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
1994{
1995 struct amdgpu_device *adev =
1996 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
1997
1998 mutex_lock(&adev->gfx.gfx_off_mutex);
1999 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2000 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2001 adev->gfx.gfx_off_state = true;
2002 }
2003 mutex_unlock(&adev->gfx.gfx_off_mutex);
2004}
2005
e3ecdffa 2006/**
e7854a03 2007 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2008 *
2009 * @adev: amdgpu_device pointer
2010 *
2011 * Main suspend function for hardware IPs. The list of all the hardware
2012 * IPs that make up the asic is walked, clockgating is disabled and the
2013 * suspend callbacks are run. suspend puts the hardware and software state
2014 * in each IP into a state suitable for suspend.
2015 * Returns 0 on success, negative error code on failure.
2016 */
e7854a03
AD
2017static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2018{
2019 int i, r;
2020
05df1f01 2021 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2022 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2023
e7854a03
AD
2024 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2025 if (!adev->ip_blocks[i].status.valid)
2026 continue;
2027 /* displays are handled separately */
2028 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2029 /* XXX handle errors */
2030 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2031 /* XXX handle errors */
2032 if (r) {
2033 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2034 adev->ip_blocks[i].version->funcs->name, r);
2035 }
2036 }
2037 }
2038
e7854a03
AD
2039 return 0;
2040}
2041
2042/**
2043 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2044 *
2045 * @adev: amdgpu_device pointer
2046 *
2047 * Main suspend function for hardware IPs. The list of all the hardware
2048 * IPs that make up the asic is walked, clockgating is disabled and the
2049 * suspend callbacks are run. suspend puts the hardware and software state
2050 * in each IP into a state suitable for suspend.
2051 * Returns 0 on success, negative error code on failure.
2052 */
2053static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2054{
2055 int i, r;
2056
2057 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2058 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2059 continue;
e7854a03
AD
2060 /* displays are handled in phase1 */
2061 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2062 continue;
d38ceaf9 2063 /* XXX handle errors */
a1255107 2064 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2065 /* XXX handle errors */
2c1a2784 2066 if (r) {
a1255107
AD
2067 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2068 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2069 }
d38ceaf9
AD
2070 }
2071
2072 return 0;
2073}
2074
e7854a03
AD
2075/**
2076 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2077 *
2078 * @adev: amdgpu_device pointer
2079 *
2080 * Main suspend function for hardware IPs. The list of all the hardware
2081 * IPs that make up the asic is walked, clockgating is disabled and the
2082 * suspend callbacks are run. suspend puts the hardware and software state
2083 * in each IP into a state suitable for suspend.
2084 * Returns 0 on success, negative error code on failure.
2085 */
2086int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2087{
2088 int r;
2089
e7819644
YT
2090 if (amdgpu_sriov_vf(adev))
2091 amdgpu_virt_request_full_gpu(adev, false);
2092
e7854a03
AD
2093 r = amdgpu_device_ip_suspend_phase1(adev);
2094 if (r)
2095 return r;
2096 r = amdgpu_device_ip_suspend_phase2(adev);
2097
e7819644
YT
2098 if (amdgpu_sriov_vf(adev))
2099 amdgpu_virt_release_full_gpu(adev, false);
2100
e7854a03
AD
2101 return r;
2102}
2103
06ec9070 2104static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2105{
2106 int i, r;
2107
2cb681b6
ML
2108 static enum amd_ip_block_type ip_order[] = {
2109 AMD_IP_BLOCK_TYPE_GMC,
2110 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2111 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2112 AMD_IP_BLOCK_TYPE_IH,
2113 };
a90ad3c2 2114
2cb681b6
ML
2115 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2116 int j;
2117 struct amdgpu_ip_block *block;
a90ad3c2 2118
2cb681b6
ML
2119 for (j = 0; j < adev->num_ip_blocks; j++) {
2120 block = &adev->ip_blocks[j];
2121
2122 if (block->version->type != ip_order[i] ||
2123 !block->status.valid)
2124 continue;
2125
2126 r = block->version->funcs->hw_init(adev);
3f48c681 2127 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2128 if (r)
2129 return r;
a90ad3c2
ML
2130 }
2131 }
2132
2133 return 0;
2134}
2135
06ec9070 2136static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2137{
2138 int i, r;
2139
2cb681b6
ML
2140 static enum amd_ip_block_type ip_order[] = {
2141 AMD_IP_BLOCK_TYPE_SMC,
2142 AMD_IP_BLOCK_TYPE_DCE,
2143 AMD_IP_BLOCK_TYPE_GFX,
2144 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2145 AMD_IP_BLOCK_TYPE_UVD,
2146 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2147 };
a90ad3c2 2148
2cb681b6
ML
2149 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2150 int j;
2151 struct amdgpu_ip_block *block;
a90ad3c2 2152
2cb681b6
ML
2153 for (j = 0; j < adev->num_ip_blocks; j++) {
2154 block = &adev->ip_blocks[j];
2155
2156 if (block->version->type != ip_order[i] ||
2157 !block->status.valid)
2158 continue;
2159
2160 r = block->version->funcs->hw_init(adev);
3f48c681 2161 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2162 if (r)
2163 return r;
a90ad3c2
ML
2164 }
2165 }
2166
2167 return 0;
2168}
2169
e3ecdffa
AD
2170/**
2171 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2172 *
2173 * @adev: amdgpu_device pointer
2174 *
2175 * First resume function for hardware IPs. The list of all the hardware
2176 * IPs that make up the asic is walked and the resume callbacks are run for
2177 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2178 * after a suspend and updates the software state as necessary. This
2179 * function is also used for restoring the GPU after a GPU reset.
2180 * Returns 0 on success, negative error code on failure.
2181 */
06ec9070 2182static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2183{
2184 int i, r;
2185
a90ad3c2
ML
2186 for (i = 0; i < adev->num_ip_blocks; i++) {
2187 if (!adev->ip_blocks[i].status.valid)
2188 continue;
a90ad3c2 2189 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2190 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2191 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2192 r = adev->ip_blocks[i].version->funcs->resume(adev);
2193 if (r) {
2194 DRM_ERROR("resume of IP block <%s> failed %d\n",
2195 adev->ip_blocks[i].version->funcs->name, r);
2196 return r;
2197 }
a90ad3c2
ML
2198 }
2199 }
2200
2201 return 0;
2202}
2203
e3ecdffa
AD
2204/**
2205 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2206 *
2207 * @adev: amdgpu_device pointer
2208 *
2209 * First resume function for hardware IPs. The list of all the hardware
2210 * IPs that make up the asic is walked and the resume callbacks are run for
2211 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2212 * functional state after a suspend and updates the software state as
2213 * necessary. This function is also used for restoring the GPU after a GPU
2214 * reset.
2215 * Returns 0 on success, negative error code on failure.
2216 */
06ec9070 2217static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2218{
2219 int i, r;
2220
2221 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2222 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2223 continue;
fcf0649f 2224 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2225 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2226 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2227 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2228 continue;
a1255107 2229 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2230 if (r) {
a1255107
AD
2231 DRM_ERROR("resume of IP block <%s> failed %d\n",
2232 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2233 return r;
2c1a2784 2234 }
d38ceaf9
AD
2235 }
2236
2237 return 0;
2238}
2239
e3ecdffa
AD
2240/**
2241 * amdgpu_device_ip_resume - run resume for hardware IPs
2242 *
2243 * @adev: amdgpu_device pointer
2244 *
2245 * Main resume function for hardware IPs. The hardware IPs
2246 * are split into two resume functions because they are
2247 * are also used in in recovering from a GPU reset and some additional
2248 * steps need to be take between them. In this case (S3/S4) they are
2249 * run sequentially.
2250 * Returns 0 on success, negative error code on failure.
2251 */
06ec9070 2252static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2253{
2254 int r;
2255
06ec9070 2256 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2257 if (r)
2258 return r;
7a3e0bb2
RZ
2259
2260 r = amdgpu_device_fw_loading(adev);
2261 if (r)
2262 return r;
2263
06ec9070 2264 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2265
2266 return r;
2267}
2268
e3ecdffa
AD
2269/**
2270 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2271 *
2272 * @adev: amdgpu_device pointer
2273 *
2274 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2275 */
4e99a44e 2276static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2277{
6867e1b5
ML
2278 if (amdgpu_sriov_vf(adev)) {
2279 if (adev->is_atom_fw) {
2280 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2281 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2282 } else {
2283 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2284 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2285 }
2286
2287 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2288 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2289 }
048765ad
AR
2290}
2291
e3ecdffa
AD
2292/**
2293 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2294 *
2295 * @asic_type: AMD asic type
2296 *
2297 * Check if there is DC (new modesetting infrastructre) support for an asic.
2298 * returns true if DC has support, false if not.
2299 */
4562236b
HW
2300bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2301{
2302 switch (asic_type) {
2303#if defined(CONFIG_DRM_AMD_DC)
2304 case CHIP_BONAIRE:
0d6fbccb 2305 case CHIP_KAVERI:
367e6687
AD
2306 case CHIP_KABINI:
2307 case CHIP_MULLINS:
d9fda248
HW
2308 /*
2309 * We have systems in the wild with these ASICs that require
2310 * LVDS and VGA support which is not supported with DC.
2311 *
2312 * Fallback to the non-DC driver here by default so as not to
2313 * cause regressions.
2314 */
2315 return amdgpu_dc > 0;
2316 case CHIP_HAWAII:
4562236b
HW
2317 case CHIP_CARRIZO:
2318 case CHIP_STONEY:
4562236b 2319 case CHIP_POLARIS10:
675fd32b 2320 case CHIP_POLARIS11:
2c8ad2d5 2321 case CHIP_POLARIS12:
675fd32b 2322 case CHIP_VEGAM:
4562236b
HW
2323 case CHIP_TONGA:
2324 case CHIP_FIJI:
42f8ffa1 2325 case CHIP_VEGA10:
dca7b401 2326 case CHIP_VEGA12:
c6034aa2 2327 case CHIP_VEGA20:
dc37a9a0 2328#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2329 case CHIP_RAVEN:
42f8ffa1 2330#endif
fd187853 2331 return amdgpu_dc != 0;
4562236b
HW
2332#endif
2333 default:
2334 return false;
2335 }
2336}
2337
2338/**
2339 * amdgpu_device_has_dc_support - check if dc is supported
2340 *
2341 * @adev: amdgpu_device_pointer
2342 *
2343 * Returns true for supported, false for not supported
2344 */
2345bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2346{
2555039d
XY
2347 if (amdgpu_sriov_vf(adev))
2348 return false;
2349
4562236b
HW
2350 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2351}
2352
d38ceaf9
AD
2353/**
2354 * amdgpu_device_init - initialize the driver
2355 *
2356 * @adev: amdgpu_device pointer
87e3f136 2357 * @ddev: drm dev pointer
d38ceaf9
AD
2358 * @pdev: pci dev pointer
2359 * @flags: driver flags
2360 *
2361 * Initializes the driver info and hw (all asics).
2362 * Returns 0 for success or an error on failure.
2363 * Called at driver startup.
2364 */
2365int amdgpu_device_init(struct amdgpu_device *adev,
2366 struct drm_device *ddev,
2367 struct pci_dev *pdev,
2368 uint32_t flags)
2369{
2370 int r, i;
2371 bool runtime = false;
95844d20 2372 u32 max_MBps;
d38ceaf9
AD
2373
2374 adev->shutdown = false;
2375 adev->dev = &pdev->dev;
2376 adev->ddev = ddev;
2377 adev->pdev = pdev;
2378 adev->flags = flags;
2f7d10b3 2379 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2380 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2381 if (amdgpu_emu_mode == 1)
2382 adev->usec_timeout *= 2;
770d13b1 2383 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2384 adev->accel_working = false;
2385 adev->num_rings = 0;
2386 adev->mman.buffer_funcs = NULL;
2387 adev->mman.buffer_funcs_ring = NULL;
2388 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2389 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2390 adev->gmc.gmc_funcs = NULL;
f54d1867 2391 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2392 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2393
2394 adev->smc_rreg = &amdgpu_invalid_rreg;
2395 adev->smc_wreg = &amdgpu_invalid_wreg;
2396 adev->pcie_rreg = &amdgpu_invalid_rreg;
2397 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2398 adev->pciep_rreg = &amdgpu_invalid_rreg;
2399 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2400 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2401 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2402 adev->didt_rreg = &amdgpu_invalid_rreg;
2403 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2404 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2405 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2406 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2407 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2408
3e39ab90
AD
2409 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2410 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2411 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2412
2413 /* mutex initialization are all done here so we
2414 * can recall function without having locking issues */
d38ceaf9 2415 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2416 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2417 mutex_init(&adev->pm.mutex);
2418 mutex_init(&adev->gfx.gpu_clock_mutex);
2419 mutex_init(&adev->srbm_mutex);
b8866c26 2420 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2421 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2422 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2423 mutex_init(&adev->mn_lock);
e23b74aa 2424 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2425 hash_init(adev->mn_hash);
13a752e3 2426 mutex_init(&adev->lock_reset);
d38ceaf9 2427
06ec9070 2428 amdgpu_device_check_arguments(adev);
d38ceaf9 2429
d38ceaf9
AD
2430 spin_lock_init(&adev->mmio_idx_lock);
2431 spin_lock_init(&adev->smc_idx_lock);
2432 spin_lock_init(&adev->pcie_idx_lock);
2433 spin_lock_init(&adev->uvd_ctx_idx_lock);
2434 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2435 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2436 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2437 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2438 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2439
0c4e7fa5
CZ
2440 INIT_LIST_HEAD(&adev->shadow_list);
2441 mutex_init(&adev->shadow_list_lock);
2442
795f2813
AR
2443 INIT_LIST_HEAD(&adev->ring_lru_list);
2444 spin_lock_init(&adev->ring_lru_list_lock);
2445
06ec9070
AD
2446 INIT_DELAYED_WORK(&adev->late_init_work,
2447 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2448 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2449 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2450
d23ee13f 2451 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2452 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2453
0fa49558
AX
2454 /* Registers mapping */
2455 /* TODO: block userspace mapping of io register */
da69c161
KW
2456 if (adev->asic_type >= CHIP_BONAIRE) {
2457 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2458 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2459 } else {
2460 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2461 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2462 }
d38ceaf9 2463
d38ceaf9
AD
2464 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2465 if (adev->rmmio == NULL) {
2466 return -ENOMEM;
2467 }
2468 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2469 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2470
d38ceaf9
AD
2471 /* io port mapping */
2472 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2473 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2474 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2475 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2476 break;
2477 }
2478 }
2479 if (adev->rio_mem == NULL)
b64a18c5 2480 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2481
5494d864
AD
2482 amdgpu_device_get_pcie_info(adev);
2483
d38ceaf9 2484 /* early init functions */
06ec9070 2485 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2486 if (r)
2487 return r;
2488
6585661d
OZ
2489 /* doorbell bar mapping and doorbell index init*/
2490 amdgpu_device_doorbell_init(adev);
2491
d38ceaf9
AD
2492 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2493 /* this will fail for cards that aren't VGA class devices, just
2494 * ignore it */
06ec9070 2495 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2496
e9bef455 2497 if (amdgpu_device_is_px(ddev))
d38ceaf9 2498 runtime = true;
84c8b22e
LW
2499 if (!pci_is_thunderbolt_attached(adev->pdev))
2500 vga_switcheroo_register_client(adev->pdev,
2501 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2502 if (runtime)
2503 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2504
9475a943
SL
2505 if (amdgpu_emu_mode == 1) {
2506 /* post the asic on emulation mode */
2507 emu_soc_asic_init(adev);
bfca0289 2508 goto fence_driver_init;
9475a943 2509 }
bfca0289 2510
d38ceaf9 2511 /* Read BIOS */
83ba126a
AD
2512 if (!amdgpu_get_bios(adev)) {
2513 r = -EINVAL;
2514 goto failed;
2515 }
f7e9e9fe 2516
d38ceaf9 2517 r = amdgpu_atombios_init(adev);
2c1a2784
AD
2518 if (r) {
2519 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
e23b74aa 2520 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
83ba126a 2521 goto failed;
2c1a2784 2522 }
d38ceaf9 2523
4e99a44e
ML
2524 /* detect if we are with an SRIOV vbios */
2525 amdgpu_device_detect_sriov_bios(adev);
048765ad 2526
d38ceaf9 2527 /* Post card if necessary */
39c640c0 2528 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2529 if (!adev->bios) {
bec86378 2530 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2531 r = -EINVAL;
2532 goto failed;
d38ceaf9 2533 }
bec86378 2534 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2535 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2536 if (r) {
2537 dev_err(adev->dev, "gpu post error!\n");
2538 goto failed;
2539 }
d38ceaf9
AD
2540 }
2541
88b64e95
AD
2542 if (adev->is_atom_fw) {
2543 /* Initialize clocks */
2544 r = amdgpu_atomfirmware_get_clock_info(adev);
2545 if (r) {
2546 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2547 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2548 goto failed;
2549 }
2550 } else {
a5bde2f9
AD
2551 /* Initialize clocks */
2552 r = amdgpu_atombios_get_clock_info(adev);
2553 if (r) {
2554 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2555 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2556 goto failed;
a5bde2f9
AD
2557 }
2558 /* init i2c buses */
4562236b
HW
2559 if (!amdgpu_device_has_dc_support(adev))
2560 amdgpu_atombios_i2c_init(adev);
2c1a2784 2561 }
d38ceaf9 2562
bfca0289 2563fence_driver_init:
d38ceaf9
AD
2564 /* Fence driver */
2565 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2566 if (r) {
2567 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2568 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2569 goto failed;
2c1a2784 2570 }
d38ceaf9
AD
2571
2572 /* init the mode config */
2573 drm_mode_config_init(adev->ddev);
2574
06ec9070 2575 r = amdgpu_device_ip_init(adev);
d38ceaf9 2576 if (r) {
8840a387 2577 /* failed in exclusive mode due to timeout */
2578 if (amdgpu_sriov_vf(adev) &&
2579 !amdgpu_sriov_runtime(adev) &&
2580 amdgpu_virt_mmio_blocked(adev) &&
2581 !amdgpu_virt_wait_reset(adev)) {
2582 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2583 /* Don't send request since VF is inactive. */
2584 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2585 adev->virt.ops = NULL;
8840a387 2586 r = -EAGAIN;
2587 goto failed;
2588 }
06ec9070 2589 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2590 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 2591 goto failed;
d38ceaf9
AD
2592 }
2593
2594 adev->accel_working = true;
2595
e59c0205
AX
2596 amdgpu_vm_check_compute_bug(adev);
2597
95844d20
MO
2598 /* Initialize the buffer migration limit. */
2599 if (amdgpu_moverate >= 0)
2600 max_MBps = amdgpu_moverate;
2601 else
2602 max_MBps = 8; /* Allow 8 MB/s. */
2603 /* Get a log2 for easy divisions. */
2604 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2605
d38ceaf9
AD
2606 r = amdgpu_ib_pool_init(adev);
2607 if (r) {
2608 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
e23b74aa 2609 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
83ba126a 2610 goto failed;
d38ceaf9
AD
2611 }
2612
2dc8f81e
HC
2613 if (amdgpu_sriov_vf(adev))
2614 amdgpu_virt_init_data_exchange(adev);
2615
9bc92b9c
ML
2616 amdgpu_fbdev_init(adev);
2617
d2f52ac8
RZ
2618 r = amdgpu_pm_sysfs_init(adev);
2619 if (r)
2620 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2621
75758255 2622 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2623 if (r)
d38ceaf9 2624 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2625
2626 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2627 if (r)
d38ceaf9 2628 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2629
50ab2533 2630 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2631 if (r)
50ab2533 2632 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2633
763efb6c 2634 r = amdgpu_debugfs_init(adev);
db95e218 2635 if (r)
763efb6c 2636 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2637
d38ceaf9
AD
2638 if ((amdgpu_testing & 1)) {
2639 if (adev->accel_working)
2640 amdgpu_test_moves(adev);
2641 else
2642 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2643 }
d38ceaf9
AD
2644 if (amdgpu_benchmarking) {
2645 if (adev->accel_working)
2646 amdgpu_benchmark(adev, amdgpu_benchmarking);
2647 else
2648 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2649 }
2650
2651 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2652 * explicit gating rather than handling it automatically.
2653 */
06ec9070 2654 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2655 if (r) {
06ec9070 2656 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2657 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2658 goto failed;
2c1a2784 2659 }
d38ceaf9
AD
2660
2661 return 0;
83ba126a
AD
2662
2663failed:
89041940 2664 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2665 if (runtime)
2666 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2667
83ba126a 2668 return r;
d38ceaf9
AD
2669}
2670
d38ceaf9
AD
2671/**
2672 * amdgpu_device_fini - tear down the driver
2673 *
2674 * @adev: amdgpu_device pointer
2675 *
2676 * Tear down the driver info (all asics).
2677 * Called at driver shutdown.
2678 */
2679void amdgpu_device_fini(struct amdgpu_device *adev)
2680{
2681 int r;
2682
2683 DRM_INFO("amdgpu: finishing device.\n");
2684 adev->shutdown = true;
e5b03032
ML
2685 /* disable all interrupts */
2686 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2687 if (adev->mode_info.mode_config_initialized){
2688 if (!amdgpu_device_has_dc_support(adev))
2689 drm_crtc_force_disable_all(adev->ddev);
2690 else
2691 drm_atomic_helper_shutdown(adev->ddev);
2692 }
d38ceaf9
AD
2693 amdgpu_ib_pool_fini(adev);
2694 amdgpu_fence_driver_fini(adev);
58e955d9 2695 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2696 amdgpu_fbdev_fini(adev);
06ec9070 2697 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2698 if (adev->firmware.gpu_info_fw) {
2699 release_firmware(adev->firmware.gpu_info_fw);
2700 adev->firmware.gpu_info_fw = NULL;
2701 }
d38ceaf9 2702 adev->accel_working = false;
2dc80b00 2703 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2704 /* free i2c buses */
4562236b
HW
2705 if (!amdgpu_device_has_dc_support(adev))
2706 amdgpu_i2c_fini(adev);
bfca0289
SL
2707
2708 if (amdgpu_emu_mode != 1)
2709 amdgpu_atombios_fini(adev);
2710
d38ceaf9
AD
2711 kfree(adev->bios);
2712 adev->bios = NULL;
84c8b22e
LW
2713 if (!pci_is_thunderbolt_attached(adev->pdev))
2714 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2715 if (adev->flags & AMD_IS_PX)
2716 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2717 vga_client_register(adev->pdev, NULL, NULL, NULL);
2718 if (adev->rio_mem)
2719 pci_iounmap(adev->pdev, adev->rio_mem);
2720 adev->rio_mem = NULL;
2721 iounmap(adev->rmmio);
2722 adev->rmmio = NULL;
06ec9070 2723 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2724 amdgpu_debugfs_regs_cleanup(adev);
d38ceaf9
AD
2725}
2726
2727
2728/*
2729 * Suspend & resume.
2730 */
2731/**
810ddc3a 2732 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2733 *
87e3f136
DP
2734 * @dev: drm dev pointer
2735 * @suspend: suspend state
2736 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2737 *
2738 * Puts the hw in the suspend state (all asics).
2739 * Returns 0 for success or an error on failure.
2740 * Called at driver suspend.
2741 */
810ddc3a 2742int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2743{
2744 struct amdgpu_device *adev;
2745 struct drm_crtc *crtc;
2746 struct drm_connector *connector;
5ceb54c6 2747 int r;
d38ceaf9
AD
2748
2749 if (dev == NULL || dev->dev_private == NULL) {
2750 return -ENODEV;
2751 }
2752
2753 adev = dev->dev_private;
2754
2755 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2756 return 0;
2757
44779b43 2758 adev->in_suspend = true;
d38ceaf9
AD
2759 drm_kms_helper_poll_disable(dev);
2760
5f818173
S
2761 if (fbcon)
2762 amdgpu_fbdev_set_suspend(adev, 1);
2763
a5459475
RZ
2764 cancel_delayed_work_sync(&adev->late_init_work);
2765
4562236b
HW
2766 if (!amdgpu_device_has_dc_support(adev)) {
2767 /* turn off display hw */
2768 drm_modeset_lock_all(dev);
2769 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2770 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2771 }
2772 drm_modeset_unlock_all(dev);
fe1053b7
AD
2773 /* unpin the front buffers and cursors */
2774 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2775 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2776 struct drm_framebuffer *fb = crtc->primary->fb;
2777 struct amdgpu_bo *robj;
2778
2779 if (amdgpu_crtc->cursor_bo) {
2780 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2781 r = amdgpu_bo_reserve(aobj, true);
2782 if (r == 0) {
2783 amdgpu_bo_unpin(aobj);
2784 amdgpu_bo_unreserve(aobj);
2785 }
756e6880 2786 }
756e6880 2787
fe1053b7
AD
2788 if (fb == NULL || fb->obj[0] == NULL) {
2789 continue;
2790 }
2791 robj = gem_to_amdgpu_bo(fb->obj[0]);
2792 /* don't unpin kernel fb objects */
2793 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2794 r = amdgpu_bo_reserve(robj, true);
2795 if (r == 0) {
2796 amdgpu_bo_unpin(robj);
2797 amdgpu_bo_unreserve(robj);
2798 }
d38ceaf9
AD
2799 }
2800 }
2801 }
fe1053b7
AD
2802
2803 amdgpu_amdkfd_suspend(adev);
2804
2805 r = amdgpu_device_ip_suspend_phase1(adev);
2806
d38ceaf9
AD
2807 /* evict vram memory */
2808 amdgpu_bo_evict_vram(adev);
2809
5ceb54c6 2810 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2811
fe1053b7 2812 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2813
a0a71e49
AD
2814 /* evict remaining vram memory
2815 * This second call to evict vram is to evict the gart page table
2816 * using the CPU.
2817 */
d38ceaf9
AD
2818 amdgpu_bo_evict_vram(adev);
2819
2820 pci_save_state(dev->pdev);
2821 if (suspend) {
2822 /* Shut down the device */
2823 pci_disable_device(dev->pdev);
2824 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2825 } else {
2826 r = amdgpu_asic_reset(adev);
2827 if (r)
2828 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2829 }
2830
d38ceaf9
AD
2831 return 0;
2832}
2833
2834/**
810ddc3a 2835 * amdgpu_device_resume - initiate device resume
d38ceaf9 2836 *
87e3f136
DP
2837 * @dev: drm dev pointer
2838 * @resume: resume state
2839 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2840 *
2841 * Bring the hw back to operating state (all asics).
2842 * Returns 0 for success or an error on failure.
2843 * Called at driver resume.
2844 */
810ddc3a 2845int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2846{
2847 struct drm_connector *connector;
2848 struct amdgpu_device *adev = dev->dev_private;
756e6880 2849 struct drm_crtc *crtc;
03161a6e 2850 int r = 0;
d38ceaf9
AD
2851
2852 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2853 return 0;
2854
d38ceaf9
AD
2855 if (resume) {
2856 pci_set_power_state(dev->pdev, PCI_D0);
2857 pci_restore_state(dev->pdev);
74b0b157 2858 r = pci_enable_device(dev->pdev);
03161a6e 2859 if (r)
4d3b9ae5 2860 return r;
d38ceaf9
AD
2861 }
2862
2863 /* post card */
39c640c0 2864 if (amdgpu_device_need_post(adev)) {
74b0b157 2865 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2866 if (r)
2867 DRM_ERROR("amdgpu asic init failed\n");
2868 }
d38ceaf9 2869
06ec9070 2870 r = amdgpu_device_ip_resume(adev);
e6707218 2871 if (r) {
06ec9070 2872 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2873 return r;
e6707218 2874 }
5ceb54c6
AD
2875 amdgpu_fence_driver_resume(adev);
2876
d38ceaf9 2877
06ec9070 2878 r = amdgpu_device_ip_late_init(adev);
03161a6e 2879 if (r)
4d3b9ae5 2880 return r;
d38ceaf9 2881
fe1053b7
AD
2882 if (!amdgpu_device_has_dc_support(adev)) {
2883 /* pin cursors */
2884 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2885 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2886
2887 if (amdgpu_crtc->cursor_bo) {
2888 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2889 r = amdgpu_bo_reserve(aobj, true);
2890 if (r == 0) {
2891 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2892 if (r != 0)
2893 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2894 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2895 amdgpu_bo_unreserve(aobj);
2896 }
756e6880
AD
2897 }
2898 }
2899 }
ba997709
YZ
2900 r = amdgpu_amdkfd_resume(adev);
2901 if (r)
2902 return r;
756e6880 2903
96a5d8d4
LL
2904 /* Make sure IB tests flushed */
2905 flush_delayed_work(&adev->late_init_work);
2906
d38ceaf9
AD
2907 /* blat the mode back in */
2908 if (fbcon) {
4562236b
HW
2909 if (!amdgpu_device_has_dc_support(adev)) {
2910 /* pre DCE11 */
2911 drm_helper_resume_force_mode(dev);
2912
2913 /* turn on display hw */
2914 drm_modeset_lock_all(dev);
2915 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2916 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2917 }
2918 drm_modeset_unlock_all(dev);
d38ceaf9 2919 }
4d3b9ae5 2920 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
2921 }
2922
2923 drm_kms_helper_poll_enable(dev);
23a1a9e5
L
2924
2925 /*
2926 * Most of the connector probing functions try to acquire runtime pm
2927 * refs to ensure that the GPU is powered on when connector polling is
2928 * performed. Since we're calling this from a runtime PM callback,
2929 * trying to acquire rpm refs will cause us to deadlock.
2930 *
2931 * Since we're guaranteed to be holding the rpm lock, it's safe to
2932 * temporarily disable the rpm helpers so this doesn't deadlock us.
2933 */
2934#ifdef CONFIG_PM
2935 dev->dev->power.disable_depth++;
2936#endif
4562236b
HW
2937 if (!amdgpu_device_has_dc_support(adev))
2938 drm_helper_hpd_irq_event(dev);
2939 else
2940 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
2941#ifdef CONFIG_PM
2942 dev->dev->power.disable_depth--;
2943#endif
44779b43
RZ
2944 adev->in_suspend = false;
2945
4d3b9ae5 2946 return 0;
d38ceaf9
AD
2947}
2948
e3ecdffa
AD
2949/**
2950 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2951 *
2952 * @adev: amdgpu_device pointer
2953 *
2954 * The list of all the hardware IPs that make up the asic is walked and
2955 * the check_soft_reset callbacks are run. check_soft_reset determines
2956 * if the asic is still hung or not.
2957 * Returns true if any of the IPs are still in a hung state, false if not.
2958 */
06ec9070 2959static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
2960{
2961 int i;
2962 bool asic_hang = false;
2963
f993d628
ML
2964 if (amdgpu_sriov_vf(adev))
2965 return true;
2966
8bc04c29
AD
2967 if (amdgpu_asic_need_full_reset(adev))
2968 return true;
2969
63fbf42f 2970 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2971 if (!adev->ip_blocks[i].status.valid)
63fbf42f 2972 continue;
a1255107
AD
2973 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2974 adev->ip_blocks[i].status.hang =
2975 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
2976 if (adev->ip_blocks[i].status.hang) {
2977 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
2978 asic_hang = true;
2979 }
2980 }
2981 return asic_hang;
2982}
2983
e3ecdffa
AD
2984/**
2985 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
2986 *
2987 * @adev: amdgpu_device pointer
2988 *
2989 * The list of all the hardware IPs that make up the asic is walked and the
2990 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
2991 * handles any IP specific hardware or software state changes that are
2992 * necessary for a soft reset to succeed.
2993 * Returns 0 on success, negative error code on failure.
2994 */
06ec9070 2995static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
2996{
2997 int i, r = 0;
2998
2999 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3000 if (!adev->ip_blocks[i].status.valid)
d31a501e 3001 continue;
a1255107
AD
3002 if (adev->ip_blocks[i].status.hang &&
3003 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3004 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3005 if (r)
3006 return r;
3007 }
3008 }
3009
3010 return 0;
3011}
3012
e3ecdffa
AD
3013/**
3014 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3015 *
3016 * @adev: amdgpu_device pointer
3017 *
3018 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3019 * reset is necessary to recover.
3020 * Returns true if a full asic reset is required, false if not.
3021 */
06ec9070 3022static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3023{
da146d3b
AD
3024 int i;
3025
8bc04c29
AD
3026 if (amdgpu_asic_need_full_reset(adev))
3027 return true;
3028
da146d3b 3029 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3030 if (!adev->ip_blocks[i].status.valid)
da146d3b 3031 continue;
a1255107
AD
3032 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3033 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3034 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3035 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3036 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3037 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3038 DRM_INFO("Some block need full reset!\n");
3039 return true;
3040 }
3041 }
35d782fe
CZ
3042 }
3043 return false;
3044}
3045
e3ecdffa
AD
3046/**
3047 * amdgpu_device_ip_soft_reset - do a soft reset
3048 *
3049 * @adev: amdgpu_device pointer
3050 *
3051 * The list of all the hardware IPs that make up the asic is walked and the
3052 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3053 * IP specific hardware or software state changes that are necessary to soft
3054 * reset the IP.
3055 * Returns 0 on success, negative error code on failure.
3056 */
06ec9070 3057static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3058{
3059 int i, r = 0;
3060
3061 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3062 if (!adev->ip_blocks[i].status.valid)
35d782fe 3063 continue;
a1255107
AD
3064 if (adev->ip_blocks[i].status.hang &&
3065 adev->ip_blocks[i].version->funcs->soft_reset) {
3066 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3067 if (r)
3068 return r;
3069 }
3070 }
3071
3072 return 0;
3073}
3074
e3ecdffa
AD
3075/**
3076 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3077 *
3078 * @adev: amdgpu_device pointer
3079 *
3080 * The list of all the hardware IPs that make up the asic is walked and the
3081 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3082 * handles any IP specific hardware or software state changes that are
3083 * necessary after the IP has been soft reset.
3084 * Returns 0 on success, negative error code on failure.
3085 */
06ec9070 3086static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3087{
3088 int i, r = 0;
3089
3090 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3091 if (!adev->ip_blocks[i].status.valid)
35d782fe 3092 continue;
a1255107
AD
3093 if (adev->ip_blocks[i].status.hang &&
3094 adev->ip_blocks[i].version->funcs->post_soft_reset)
3095 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3096 if (r)
3097 return r;
3098 }
3099
3100 return 0;
3101}
3102
e3ecdffa 3103/**
c33adbc7 3104 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3105 *
3106 * @adev: amdgpu_device pointer
3107 *
3108 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3109 * restore things like GPUVM page tables after a GPU reset where
3110 * the contents of VRAM might be lost.
403009bf
CK
3111 *
3112 * Returns:
3113 * 0 on success, negative error code on failure.
e3ecdffa 3114 */
c33adbc7 3115static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3116{
c41d1cf6 3117 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3118 struct amdgpu_bo *shadow;
3119 long r = 1, tmo;
c41d1cf6
ML
3120
3121 if (amdgpu_sriov_runtime(adev))
b045d3af 3122 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3123 else
3124 tmo = msecs_to_jiffies(100);
3125
3126 DRM_INFO("recover vram bo from shadow start\n");
3127 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3128 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3129
3130 /* No need to recover an evicted BO */
3131 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3132 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3133 continue;
3134
3135 r = amdgpu_bo_restore_shadow(shadow, &next);
3136 if (r)
3137 break;
3138
c41d1cf6
ML
3139 if (fence) {
3140 r = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3141 dma_fence_put(fence);
3142 fence = next;
3143 if (r <= 0)
c41d1cf6 3144 break;
403009bf
CK
3145 } else {
3146 fence = next;
c41d1cf6 3147 }
c41d1cf6
ML
3148 }
3149 mutex_unlock(&adev->shadow_list_lock);
3150
403009bf
CK
3151 if (fence)
3152 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3153 dma_fence_put(fence);
3154
403009bf 3155 if (r <= 0 || tmo <= 0) {
c41d1cf6 3156 DRM_ERROR("recover vram bo from shadow failed\n");
403009bf
CK
3157 return -EIO;
3158 }
c41d1cf6 3159
403009bf
CK
3160 DRM_INFO("recover vram bo from shadow done\n");
3161 return 0;
c41d1cf6
ML
3162}
3163
e3ecdffa 3164/**
06ec9070 3165 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
a90ad3c2
ML
3166 *
3167 * @adev: amdgpu device pointer
a90ad3c2 3168 *
5740682e 3169 * attempt to do soft-reset or full-reset and reinitialize Asic
3f48c681 3170 * return 0 means succeeded otherwise failed
e3ecdffa 3171 */
c41d1cf6 3172static int amdgpu_device_reset(struct amdgpu_device *adev)
a90ad3c2 3173{
5740682e
ML
3174 bool need_full_reset, vram_lost = 0;
3175 int r;
a90ad3c2 3176
06ec9070 3177 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
a90ad3c2 3178
5740682e 3179 if (!need_full_reset) {
06ec9070
AD
3180 amdgpu_device_ip_pre_soft_reset(adev);
3181 r = amdgpu_device_ip_soft_reset(adev);
3182 amdgpu_device_ip_post_soft_reset(adev);
3183 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5740682e
ML
3184 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3185 need_full_reset = true;
3186 }
5740682e 3187 }
a90ad3c2 3188
5740682e 3189 if (need_full_reset) {
cdd61df6 3190 r = amdgpu_device_ip_suspend(adev);
a90ad3c2 3191
5740682e 3192retry:
5740682e 3193 r = amdgpu_asic_reset(adev);
5740682e
ML
3194 /* post card */
3195 amdgpu_atom_asic_init(adev->mode_info.atom_context);
65781c78 3196
5740682e
ML
3197 if (!r) {
3198 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
06ec9070 3199 r = amdgpu_device_ip_resume_phase1(adev);
5740682e
ML
3200 if (r)
3201 goto out;
65781c78 3202
06ec9070 3203 vram_lost = amdgpu_device_check_vram_lost(adev);
5740682e
ML
3204 if (vram_lost) {
3205 DRM_ERROR("VRAM is lost!\n");
3206 atomic_inc(&adev->vram_lost_counter);
3207 }
3208
c1c7ce8f
CK
3209 r = amdgpu_gtt_mgr_recover(
3210 &adev->mman.bdev.man[TTM_PL_TT]);
5740682e
ML
3211 if (r)
3212 goto out;
3213
7a3e0bb2
RZ
3214 r = amdgpu_device_fw_loading(adev);
3215 if (r)
3216 return r;
3217
06ec9070 3218 r = amdgpu_device_ip_resume_phase2(adev);
5740682e
ML
3219 if (r)
3220 goto out;
3221
3222 if (vram_lost)
06ec9070 3223 amdgpu_device_fill_reset_magic(adev);
65781c78 3224 }
5740682e 3225 }
65781c78 3226
5740682e
ML
3227out:
3228 if (!r) {
3229 amdgpu_irq_gpu_reset_resume_helper(adev);
3230 r = amdgpu_ib_ring_tests(adev);
3231 if (r) {
3232 dev_err(adev->dev, "ib ring test failed (%d).\n", r);
cdd61df6 3233 r = amdgpu_device_ip_suspend(adev);
5740682e
ML
3234 need_full_reset = true;
3235 goto retry;
3236 }
3237 }
65781c78 3238
c33adbc7
CK
3239 if (!r)
3240 r = amdgpu_device_recover_vram(adev);
a90ad3c2 3241
5740682e
ML
3242 return r;
3243}
a90ad3c2 3244
e3ecdffa 3245/**
06ec9070 3246 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3247 *
3248 * @adev: amdgpu device pointer
87e3f136 3249 * @from_hypervisor: request from hypervisor
5740682e
ML
3250 *
3251 * do VF FLR and reinitialize Asic
3f48c681 3252 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3253 */
3254static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3255 bool from_hypervisor)
5740682e
ML
3256{
3257 int r;
3258
3259 if (from_hypervisor)
3260 r = amdgpu_virt_request_full_gpu(adev, true);
3261 else
3262 r = amdgpu_virt_reset_gpu(adev);
3263 if (r)
3264 return r;
a90ad3c2
ML
3265
3266 /* Resume IP prior to SMC */
06ec9070 3267 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3268 if (r)
3269 goto error;
a90ad3c2
ML
3270
3271 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3272 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3273
7a3e0bb2
RZ
3274 r = amdgpu_device_fw_loading(adev);
3275 if (r)
3276 return r;
3277
a90ad3c2 3278 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3279 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3280 if (r)
3281 goto error;
a90ad3c2
ML
3282
3283 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3284 r = amdgpu_ib_ring_tests(adev);
a90ad3c2 3285
abc34253
ED
3286error:
3287 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3288 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3289 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3290 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3291 }
3292
3293 return r;
3294}
3295
12938fad
CK
3296/**
3297 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3298 *
3299 * @adev: amdgpu device pointer
3300 *
3301 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3302 * a hung GPU.
3303 */
3304bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3305{
3306 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3307 DRM_INFO("Timeout, but no hardware hang detected.\n");
3308 return false;
3309 }
3310
3ba7b418
AG
3311 if (amdgpu_gpu_recovery == 0)
3312 goto disabled;
3313
3314 if (amdgpu_sriov_vf(adev))
3315 return true;
3316
3317 if (amdgpu_gpu_recovery == -1) {
3318 switch (adev->asic_type) {
3319 case CHIP_TOPAZ:
3320 case CHIP_TONGA:
3321 case CHIP_FIJI:
3322 case CHIP_POLARIS10:
3323 case CHIP_POLARIS11:
3324 case CHIP_POLARIS12:
3325 case CHIP_VEGAM:
3326 case CHIP_VEGA20:
3327 case CHIP_VEGA10:
3328 case CHIP_VEGA12:
3329 break;
3330 default:
3331 goto disabled;
3332 }
12938fad
CK
3333 }
3334
3335 return true;
3ba7b418
AG
3336
3337disabled:
3338 DRM_INFO("GPU recovery disabled.\n");
3339 return false;
12938fad
CK
3340}
3341
d38ceaf9 3342/**
5f152b5e 3343 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
d38ceaf9
AD
3344 *
3345 * @adev: amdgpu device pointer
5740682e 3346 * @job: which job trigger hang
d38ceaf9 3347 *
5740682e 3348 * Attempt to reset the GPU if it has hung (all asics).
d38ceaf9
AD
3349 * Returns 0 for success or an error on failure.
3350 */
5f152b5e 3351int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
12938fad 3352 struct amdgpu_job *job)
d38ceaf9 3353{
5740682e 3354 int i, r, resched;
fb140b29 3355
5740682e
ML
3356 dev_info(adev->dev, "GPU reset begin!\n");
3357
13a752e3 3358 mutex_lock(&adev->lock_reset);
d94aed5a 3359 atomic_inc(&adev->gpu_reset_counter);
13a752e3 3360 adev->in_gpu_reset = 1;
d38ceaf9 3361
5c6dd71e
SL
3362 /* Block kfd */
3363 amdgpu_amdkfd_pre_reset(adev);
3364
a3c47d6b
CZ
3365 /* block TTM */
3366 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
71182665 3367
71182665 3368 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3369 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3370 struct amdgpu_ring *ring = adev->rings[i];
3371
51687759 3372 if (!ring || !ring->sched.thread)
0875dc9e 3373 continue;
5740682e 3374
71182665
ML
3375 kthread_park(ring->sched.thread);
3376
734afd4b 3377 if (job && job->base.sched != &ring->sched)
5740682e
ML
3378 continue;
3379
67ccea60 3380 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
5740682e 3381
2f9d4084
ML
3382 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3383 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3384 }
d38ceaf9 3385
5740682e 3386 if (amdgpu_sriov_vf(adev))
c41d1cf6 3387 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5740682e 3388 else
c41d1cf6 3389 r = amdgpu_device_reset(adev);
5740682e 3390
71182665
ML
3391 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3392 struct amdgpu_ring *ring = adev->rings[i];
53cdccd5 3393
71182665
ML
3394 if (!ring || !ring->sched.thread)
3395 continue;
5740682e 3396
71182665
ML
3397 /* only need recovery sched of the given job's ring
3398 * or all rings (in the case @job is NULL)
3399 * after above amdgpu_reset accomplished
3400 */
3320b8d2 3401 if ((!job || job->base.sched == &ring->sched) && !r)
1b1f42d8 3402 drm_sched_job_recovery(&ring->sched);
5740682e 3403
71182665 3404 kthread_unpark(ring->sched.thread);
d38ceaf9
AD
3405 }
3406
bf830604 3407 if (!amdgpu_device_has_dc_support(adev)) {
4562236b 3408 drm_helper_resume_force_mode(adev->ddev);
5740682e 3409 }
d38ceaf9
AD
3410
3411 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
5740682e 3412
89041940 3413 if (r) {
d38ceaf9 3414 /* bad news, how to tell it to userspace ? */
5740682e
ML
3415 dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3416 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3417 } else {
3f48c681 3418 dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
89041940 3419 }
d38ceaf9 3420
5c6dd71e
SL
3421 /*unlock kfd */
3422 amdgpu_amdkfd_post_reset(adev);
89041940 3423 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3424 adev->in_gpu_reset = 0;
3425 mutex_unlock(&adev->lock_reset);
d38ceaf9
AD
3426 return r;
3427}
3428
e3ecdffa
AD
3429/**
3430 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3431 *
3432 * @adev: amdgpu_device pointer
3433 *
3434 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3435 * and lanes) of the slot the device is in. Handles APUs and
3436 * virtualized environments where PCIE config space may not be available.
3437 */
5494d864 3438static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3439{
5d9a6330
AD
3440 struct pci_dev *pdev;
3441 enum pci_bus_speed speed_cap;
3442 enum pcie_link_width link_width;
d0dd7f0c 3443
cd474ba0
AD
3444 if (amdgpu_pcie_gen_cap)
3445 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3446
cd474ba0
AD
3447 if (amdgpu_pcie_lane_cap)
3448 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3449
cd474ba0
AD
3450 /* covers APUs as well */
3451 if (pci_is_root_bus(adev->pdev->bus)) {
3452 if (adev->pm.pcie_gen_mask == 0)
3453 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3454 if (adev->pm.pcie_mlw_mask == 0)
3455 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3456 return;
cd474ba0 3457 }
d0dd7f0c 3458
cd474ba0 3459 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3460 /* asic caps */
3461 pdev = adev->pdev;
3462 speed_cap = pcie_get_speed_cap(pdev);
3463 if (speed_cap == PCI_SPEED_UNKNOWN) {
3464 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3465 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3466 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3467 } else {
5d9a6330
AD
3468 if (speed_cap == PCIE_SPEED_16_0GT)
3469 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3470 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3471 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3473 else if (speed_cap == PCIE_SPEED_8_0GT)
3474 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3475 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3476 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3477 else if (speed_cap == PCIE_SPEED_5_0GT)
3478 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3479 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3480 else
3481 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3482 }
3483 /* platform caps */
3484 pdev = adev->ddev->pdev->bus->self;
3485 speed_cap = pcie_get_speed_cap(pdev);
3486 if (speed_cap == PCI_SPEED_UNKNOWN) {
3487 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3489 } else {
3490 if (speed_cap == PCIE_SPEED_16_0GT)
3491 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3492 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3493 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3494 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3495 else if (speed_cap == PCIE_SPEED_8_0GT)
3496 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3497 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3498 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3499 else if (speed_cap == PCIE_SPEED_5_0GT)
3500 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3501 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3502 else
3503 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3504
cd474ba0
AD
3505 }
3506 }
3507 if (adev->pm.pcie_mlw_mask == 0) {
5d9a6330
AD
3508 pdev = adev->ddev->pdev->bus->self;
3509 link_width = pcie_get_width_cap(pdev);
3510 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3511 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3512 } else {
3513 switch (link_width) {
3514 case PCIE_LNK_X32:
cd474ba0
AD
3515 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3519 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3522 break;
5d9a6330 3523 case PCIE_LNK_X16:
cd474ba0
AD
3524 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3526 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3527 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3530 break;
5d9a6330 3531 case PCIE_LNK_X12:
cd474ba0
AD
3532 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3533 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3534 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3535 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3537 break;
5d9a6330 3538 case PCIE_LNK_X8:
cd474ba0
AD
3539 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3543 break;
5d9a6330 3544 case PCIE_LNK_X4:
cd474ba0
AD
3545 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3546 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3547 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3548 break;
5d9a6330 3549 case PCIE_LNK_X2:
cd474ba0
AD
3550 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3551 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3552 break;
5d9a6330 3553 case PCIE_LNK_X1:
cd474ba0
AD
3554 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3555 break;
3556 default:
3557 break;
3558 }
d0dd7f0c
AD
3559 }
3560 }
3561}
d38ceaf9 3562