drm/amdgpu/soc15: add need_reset_on_init asic callback for SOC15 (v2)
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9
AD
32#include <drm/drmP.h>
33#include <drm/drm_crtc_helper.h>
4562236b 34#include <drm/drm_atomic_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
5183411b
AG
62#include "amdgpu_xgmi.h"
63
e2a75f88 64MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 65MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 66MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 67MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 68MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 69
2dc80b00
S
70#define AMDGPU_RESUME_MS 2000
71
d38ceaf9 72static const char *amdgpu_asic_name[] = {
da69c161
KW
73 "TAHITI",
74 "PITCAIRN",
75 "VERDE",
76 "OLAND",
77 "HAINAN",
d38ceaf9
AD
78 "BONAIRE",
79 "KAVERI",
80 "KABINI",
81 "HAWAII",
82 "MULLINS",
83 "TOPAZ",
84 "TONGA",
48299f95 85 "FIJI",
d38ceaf9 86 "CARRIZO",
139f4917 87 "STONEY",
2cc0c0b5
FC
88 "POLARIS10",
89 "POLARIS11",
c4642a47 90 "POLARIS12",
48ff108d 91 "VEGAM",
d4196f01 92 "VEGA10",
8fab806a 93 "VEGA12",
956fcddc 94 "VEGA20",
2ca8a5d2 95 "RAVEN",
d38ceaf9
AD
96 "LAST",
97};
98
5494d864
AD
99static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
100
e3ecdffa
AD
101/**
102 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
103 *
104 * @dev: drm_device pointer
105 *
106 * Returns true if the device is a dGPU with HG/PX power control,
107 * otherwise return false.
108 */
d38ceaf9
AD
109bool amdgpu_device_is_px(struct drm_device *dev)
110{
111 struct amdgpu_device *adev = dev->dev_private;
112
2f7d10b3 113 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
114 return true;
115 return false;
116}
117
118/*
119 * MMIO register access helper functions.
120 */
e3ecdffa
AD
121/**
122 * amdgpu_mm_rreg - read a memory mapped IO register
123 *
124 * @adev: amdgpu_device pointer
125 * @reg: dword aligned register offset
126 * @acc_flags: access flags which require special behavior
127 *
128 * Returns the 32 bit value from the offset specified.
129 */
d38ceaf9 130uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 131 uint32_t acc_flags)
d38ceaf9 132{
f4b373f4
TSD
133 uint32_t ret;
134
43ca8efa 135 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 136 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 137
15d72fd7 138 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 139 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
140 else {
141 unsigned long flags;
d38ceaf9
AD
142
143 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
144 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
145 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
146 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 147 }
f4b373f4
TSD
148 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
149 return ret;
d38ceaf9
AD
150}
151
421a2a30
ML
152/*
153 * MMIO register read with bytes helper functions
154 * @offset:bytes offset from MMIO start
155 *
156*/
157
e3ecdffa
AD
158/**
159 * amdgpu_mm_rreg8 - read a memory mapped IO register
160 *
161 * @adev: amdgpu_device pointer
162 * @offset: byte aligned register offset
163 *
164 * Returns the 8 bit value from the offset specified.
165 */
421a2a30
ML
166uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
167 if (offset < adev->rmmio_size)
168 return (readb(adev->rmmio + offset));
169 BUG();
170}
171
172/*
173 * MMIO register write with bytes helper functions
174 * @offset:bytes offset from MMIO start
175 * @value: the value want to be written to the register
176 *
177*/
e3ecdffa
AD
178/**
179 * amdgpu_mm_wreg8 - read a memory mapped IO register
180 *
181 * @adev: amdgpu_device pointer
182 * @offset: byte aligned register offset
183 * @value: 8 bit value to write
184 *
185 * Writes the value specified to the offset specified.
186 */
421a2a30
ML
187void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
188 if (offset < adev->rmmio_size)
189 writeb(value, adev->rmmio + offset);
190 else
191 BUG();
192}
193
e3ecdffa
AD
194/**
195 * amdgpu_mm_wreg - write to a memory mapped IO register
196 *
197 * @adev: amdgpu_device pointer
198 * @reg: dword aligned register offset
199 * @v: 32 bit value to write to the register
200 * @acc_flags: access flags which require special behavior
201 *
202 * Writes the value specified to the offset specified.
203 */
d38ceaf9 204void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 205 uint32_t acc_flags)
d38ceaf9 206{
f4b373f4 207 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 208
47ed4e1c
KW
209 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
210 adev->last_mm_index = v;
211 }
212
43ca8efa 213 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 214 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 215
15d72fd7 216 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
217 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
218 else {
219 unsigned long flags;
220
221 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
222 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
223 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
224 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
225 }
47ed4e1c
KW
226
227 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
228 udelay(500);
229 }
d38ceaf9
AD
230}
231
e3ecdffa
AD
232/**
233 * amdgpu_io_rreg - read an IO register
234 *
235 * @adev: amdgpu_device pointer
236 * @reg: dword aligned register offset
237 *
238 * Returns the 32 bit value from the offset specified.
239 */
d38ceaf9
AD
240u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
241{
242 if ((reg * 4) < adev->rio_mem_size)
243 return ioread32(adev->rio_mem + (reg * 4));
244 else {
245 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
246 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
247 }
248}
249
e3ecdffa
AD
250/**
251 * amdgpu_io_wreg - write to an IO register
252 *
253 * @adev: amdgpu_device pointer
254 * @reg: dword aligned register offset
255 * @v: 32 bit value to write to the register
256 *
257 * Writes the value specified to the offset specified.
258 */
d38ceaf9
AD
259void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
260{
47ed4e1c
KW
261 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
262 adev->last_mm_index = v;
263 }
d38ceaf9
AD
264
265 if ((reg * 4) < adev->rio_mem_size)
266 iowrite32(v, adev->rio_mem + (reg * 4));
267 else {
268 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
269 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
270 }
47ed4e1c
KW
271
272 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
273 udelay(500);
274 }
d38ceaf9
AD
275}
276
277/**
278 * amdgpu_mm_rdoorbell - read a doorbell dword
279 *
280 * @adev: amdgpu_device pointer
281 * @index: doorbell index
282 *
283 * Returns the value in the doorbell aperture at the
284 * requested doorbell index (CIK).
285 */
286u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
287{
288 if (index < adev->doorbell.num_doorbells) {
289 return readl(adev->doorbell.ptr + index);
290 } else {
291 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
292 return 0;
293 }
294}
295
296/**
297 * amdgpu_mm_wdoorbell - write a doorbell dword
298 *
299 * @adev: amdgpu_device pointer
300 * @index: doorbell index
301 * @v: value to write
302 *
303 * Writes @v to the doorbell aperture at the
304 * requested doorbell index (CIK).
305 */
306void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
307{
308 if (index < adev->doorbell.num_doorbells) {
309 writel(v, adev->doorbell.ptr + index);
310 } else {
311 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
312 }
313}
314
832be404
KW
315/**
316 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
317 *
318 * @adev: amdgpu_device pointer
319 * @index: doorbell index
320 *
321 * Returns the value in the doorbell aperture at the
322 * requested doorbell index (VEGA10+).
323 */
324u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
325{
326 if (index < adev->doorbell.num_doorbells) {
327 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
328 } else {
329 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
330 return 0;
331 }
332}
333
334/**
335 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
336 *
337 * @adev: amdgpu_device pointer
338 * @index: doorbell index
339 * @v: value to write
340 *
341 * Writes @v to the doorbell aperture at the
342 * requested doorbell index (VEGA10+).
343 */
344void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
345{
346 if (index < adev->doorbell.num_doorbells) {
347 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
348 } else {
349 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
350 }
351}
352
d38ceaf9
AD
353/**
354 * amdgpu_invalid_rreg - dummy reg read function
355 *
356 * @adev: amdgpu device pointer
357 * @reg: offset of register
358 *
359 * Dummy register read function. Used for register blocks
360 * that certain asics don't have (all asics).
361 * Returns the value in the register.
362 */
363static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
364{
365 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
366 BUG();
367 return 0;
368}
369
370/**
371 * amdgpu_invalid_wreg - dummy reg write function
372 *
373 * @adev: amdgpu device pointer
374 * @reg: offset of register
375 * @v: value to write to the register
376 *
377 * Dummy register read function. Used for register blocks
378 * that certain asics don't have (all asics).
379 */
380static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
381{
382 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
383 reg, v);
384 BUG();
385}
386
387/**
388 * amdgpu_block_invalid_rreg - dummy reg read function
389 *
390 * @adev: amdgpu device pointer
391 * @block: offset of instance
392 * @reg: offset of register
393 *
394 * Dummy register read function. Used for register blocks
395 * that certain asics don't have (all asics).
396 * Returns the value in the register.
397 */
398static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
399 uint32_t block, uint32_t reg)
400{
401 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
402 reg, block);
403 BUG();
404 return 0;
405}
406
407/**
408 * amdgpu_block_invalid_wreg - dummy reg write function
409 *
410 * @adev: amdgpu device pointer
411 * @block: offset of instance
412 * @reg: offset of register
413 * @v: value to write to the register
414 *
415 * Dummy register read function. Used for register blocks
416 * that certain asics don't have (all asics).
417 */
418static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
419 uint32_t block,
420 uint32_t reg, uint32_t v)
421{
422 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
423 reg, block, v);
424 BUG();
425}
426
e3ecdffa
AD
427/**
428 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
429 *
430 * @adev: amdgpu device pointer
431 *
432 * Allocates a scratch page of VRAM for use by various things in the
433 * driver.
434 */
06ec9070 435static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 436{
a4a02777
CK
437 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
438 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
439 &adev->vram_scratch.robj,
440 &adev->vram_scratch.gpu_addr,
441 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
442}
443
e3ecdffa
AD
444/**
445 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
446 *
447 * @adev: amdgpu device pointer
448 *
449 * Frees the VRAM scratch page.
450 */
06ec9070 451static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 452{
078af1a3 453 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
454}
455
456/**
9c3f2b54 457 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
458 *
459 * @adev: amdgpu_device pointer
460 * @registers: pointer to the register array
461 * @array_size: size of the register array
462 *
463 * Programs an array or registers with and and or masks.
464 * This is a helper for setting golden registers.
465 */
9c3f2b54
AD
466void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
467 const u32 *registers,
468 const u32 array_size)
d38ceaf9
AD
469{
470 u32 tmp, reg, and_mask, or_mask;
471 int i;
472
473 if (array_size % 3)
474 return;
475
476 for (i = 0; i < array_size; i +=3) {
477 reg = registers[i + 0];
478 and_mask = registers[i + 1];
479 or_mask = registers[i + 2];
480
481 if (and_mask == 0xffffffff) {
482 tmp = or_mask;
483 } else {
484 tmp = RREG32(reg);
485 tmp &= ~and_mask;
486 tmp |= or_mask;
487 }
488 WREG32(reg, tmp);
489 }
490}
491
e3ecdffa
AD
492/**
493 * amdgpu_device_pci_config_reset - reset the GPU
494 *
495 * @adev: amdgpu_device pointer
496 *
497 * Resets the GPU using the pci config reset sequence.
498 * Only applicable to asics prior to vega10.
499 */
8111c387 500void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
501{
502 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
503}
504
505/*
506 * GPU doorbell aperture helpers function.
507 */
508/**
06ec9070 509 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
510 *
511 * @adev: amdgpu_device pointer
512 *
513 * Init doorbell driver information (CIK)
514 * Returns 0 on success, error on failure.
515 */
06ec9070 516static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 517{
6585661d 518
705e519e
CK
519 /* No doorbell on SI hardware generation */
520 if (adev->asic_type < CHIP_BONAIRE) {
521 adev->doorbell.base = 0;
522 adev->doorbell.size = 0;
523 adev->doorbell.num_doorbells = 0;
524 adev->doorbell.ptr = NULL;
525 return 0;
526 }
527
d6895ad3
CK
528 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
529 return -EINVAL;
530
22357775
AD
531 amdgpu_asic_init_doorbell_index(adev);
532
d38ceaf9
AD
533 /* doorbell bar mapping */
534 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
535 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
536
edf600da 537 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 538 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
539 if (adev->doorbell.num_doorbells == 0)
540 return -EINVAL;
541
ec3db8a6 542 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
543 * paging queue doorbell use the second page. The
544 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
545 * doorbells are in the first page. So with paging queue enabled,
546 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
547 */
548 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 549 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 550
8972e5d2
CK
551 adev->doorbell.ptr = ioremap(adev->doorbell.base,
552 adev->doorbell.num_doorbells *
553 sizeof(u32));
554 if (adev->doorbell.ptr == NULL)
d38ceaf9 555 return -ENOMEM;
d38ceaf9
AD
556
557 return 0;
558}
559
560/**
06ec9070 561 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
562 *
563 * @adev: amdgpu_device pointer
564 *
565 * Tear down doorbell driver information (CIK)
566 */
06ec9070 567static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
568{
569 iounmap(adev->doorbell.ptr);
570 adev->doorbell.ptr = NULL;
571}
572
22cb0164 573
d38ceaf9
AD
574
575/*
06ec9070 576 * amdgpu_device_wb_*()
455a7bc2 577 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 578 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
579 */
580
581/**
06ec9070 582 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
583 *
584 * @adev: amdgpu_device pointer
585 *
586 * Disables Writeback and frees the Writeback memory (all asics).
587 * Used at driver shutdown.
588 */
06ec9070 589static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
590{
591 if (adev->wb.wb_obj) {
a76ed485
AD
592 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
593 &adev->wb.gpu_addr,
594 (void **)&adev->wb.wb);
d38ceaf9
AD
595 adev->wb.wb_obj = NULL;
596 }
597}
598
599/**
06ec9070 600 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
601 *
602 * @adev: amdgpu_device pointer
603 *
455a7bc2 604 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
605 * Used at driver startup.
606 * Returns 0 on success or an -error on failure.
607 */
06ec9070 608static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
609{
610 int r;
611
612 if (adev->wb.wb_obj == NULL) {
97407b63
AD
613 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
614 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
615 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
616 &adev->wb.wb_obj, &adev->wb.gpu_addr,
617 (void **)&adev->wb.wb);
d38ceaf9
AD
618 if (r) {
619 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
620 return r;
621 }
d38ceaf9
AD
622
623 adev->wb.num_wb = AMDGPU_MAX_WB;
624 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
625
626 /* clear wb memory */
73469585 627 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
628 }
629
630 return 0;
631}
632
633/**
131b4b36 634 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
635 *
636 * @adev: amdgpu_device pointer
637 * @wb: wb index
638 *
639 * Allocate a wb slot for use by the driver (all asics).
640 * Returns 0 on success or -EINVAL on failure.
641 */
131b4b36 642int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
643{
644 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 645
97407b63 646 if (offset < adev->wb.num_wb) {
7014285a 647 __set_bit(offset, adev->wb.used);
63ae07ca 648 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
649 return 0;
650 } else {
651 return -EINVAL;
652 }
653}
654
d38ceaf9 655/**
131b4b36 656 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
657 *
658 * @adev: amdgpu_device pointer
659 * @wb: wb index
660 *
661 * Free a wb slot allocated for use by the driver (all asics)
662 */
131b4b36 663void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 664{
73469585 665 wb >>= 3;
d38ceaf9 666 if (wb < adev->wb.num_wb)
73469585 667 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
668}
669
d6895ad3
CK
670/**
671 * amdgpu_device_resize_fb_bar - try to resize FB BAR
672 *
673 * @adev: amdgpu_device pointer
674 *
675 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
676 * to fail, but if any of the BARs is not accessible after the size we abort
677 * driver loading by returning -ENODEV.
678 */
679int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
680{
770d13b1 681 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 682 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
683 struct pci_bus *root;
684 struct resource *res;
685 unsigned i;
d6895ad3
CK
686 u16 cmd;
687 int r;
688
0c03b912 689 /* Bypass for VF */
690 if (amdgpu_sriov_vf(adev))
691 return 0;
692
31b8adab
CK
693 /* Check if the root BUS has 64bit memory resources */
694 root = adev->pdev->bus;
695 while (root->parent)
696 root = root->parent;
697
698 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 699 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
700 res->start > 0x100000000ull)
701 break;
702 }
703
704 /* Trying to resize is pointless without a root hub window above 4GB */
705 if (!res)
706 return 0;
707
d6895ad3
CK
708 /* Disable memory decoding while we change the BAR addresses and size */
709 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
710 pci_write_config_word(adev->pdev, PCI_COMMAND,
711 cmd & ~PCI_COMMAND_MEMORY);
712
713 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 714 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
715 if (adev->asic_type >= CHIP_BONAIRE)
716 pci_release_resource(adev->pdev, 2);
717
718 pci_release_resource(adev->pdev, 0);
719
720 r = pci_resize_resource(adev->pdev, 0, rbar_size);
721 if (r == -ENOSPC)
722 DRM_INFO("Not enough PCI address space for a large BAR.");
723 else if (r && r != -ENOTSUPP)
724 DRM_ERROR("Problem resizing BAR0 (%d).", r);
725
726 pci_assign_unassigned_bus_resources(adev->pdev->bus);
727
728 /* When the doorbell or fb BAR isn't available we have no chance of
729 * using the device.
730 */
06ec9070 731 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
732 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
733 return -ENODEV;
734
735 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
736
737 return 0;
738}
a05502e5 739
d38ceaf9
AD
740/*
741 * GPU helpers function.
742 */
743/**
39c640c0 744 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
745 *
746 * @adev: amdgpu_device pointer
747 *
c836fec5
JQ
748 * Check if the asic has been initialized (all asics) at driver startup
749 * or post is needed if hw reset is performed.
750 * Returns true if need or false if not.
d38ceaf9 751 */
39c640c0 752bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
753{
754 uint32_t reg;
755
bec86378
ML
756 if (amdgpu_sriov_vf(adev))
757 return false;
758
759 if (amdgpu_passthrough(adev)) {
1da2c326
ML
760 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
761 * some old smc fw still need driver do vPost otherwise gpu hang, while
762 * those smc fw version above 22.15 doesn't have this flaw, so we force
763 * vpost executed for smc version below 22.15
bec86378
ML
764 */
765 if (adev->asic_type == CHIP_FIJI) {
766 int err;
767 uint32_t fw_ver;
768 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
769 /* force vPost if error occured */
770 if (err)
771 return true;
772
773 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
774 if (fw_ver < 0x00160e00)
775 return true;
bec86378 776 }
bec86378 777 }
91fe77eb 778
779 if (adev->has_hw_reset) {
780 adev->has_hw_reset = false;
781 return true;
782 }
783
784 /* bios scratch used on CIK+ */
785 if (adev->asic_type >= CHIP_BONAIRE)
786 return amdgpu_atombios_scratch_need_asic_init(adev);
787
788 /* check MEM_SIZE for older asics */
789 reg = amdgpu_asic_get_config_memsize(adev);
790
791 if ((reg != 0) && (reg != 0xffffffff))
792 return false;
793
794 return true;
bec86378
ML
795}
796
d38ceaf9
AD
797/* if we get transitioned to only one device, take VGA back */
798/**
06ec9070 799 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
800 *
801 * @cookie: amdgpu_device pointer
802 * @state: enable/disable vga decode
803 *
804 * Enable/disable vga decode (all asics).
805 * Returns VGA resource flags.
806 */
06ec9070 807static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
808{
809 struct amdgpu_device *adev = cookie;
810 amdgpu_asic_set_vga_state(adev, state);
811 if (state)
812 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
813 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
814 else
815 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
816}
817
e3ecdffa
AD
818/**
819 * amdgpu_device_check_block_size - validate the vm block size
820 *
821 * @adev: amdgpu_device pointer
822 *
823 * Validates the vm block size specified via module parameter.
824 * The vm block size defines number of bits in page table versus page directory,
825 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
826 * page table and the remaining bits are in the page directory.
827 */
06ec9070 828static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
829{
830 /* defines number of bits in page table versus page directory,
831 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
832 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
833 if (amdgpu_vm_block_size == -1)
834 return;
a1adf8be 835
bab4fee7 836 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
837 dev_warn(adev->dev, "VM page table size (%d) too small\n",
838 amdgpu_vm_block_size);
97489129 839 amdgpu_vm_block_size = -1;
a1adf8be 840 }
a1adf8be
CZ
841}
842
e3ecdffa
AD
843/**
844 * amdgpu_device_check_vm_size - validate the vm size
845 *
846 * @adev: amdgpu_device pointer
847 *
848 * Validates the vm size in GB specified via module parameter.
849 * The VM size is the size of the GPU virtual memory space in GB.
850 */
06ec9070 851static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 852{
64dab074
AD
853 /* no need to check the default value */
854 if (amdgpu_vm_size == -1)
855 return;
856
83ca145d
ZJ
857 if (amdgpu_vm_size < 1) {
858 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
859 amdgpu_vm_size);
f3368128 860 amdgpu_vm_size = -1;
83ca145d 861 }
83ca145d
ZJ
862}
863
7951e376
RZ
864static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
865{
866 struct sysinfo si;
867 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
868 uint64_t total_memory;
869 uint64_t dram_size_seven_GB = 0x1B8000000;
870 uint64_t dram_size_three_GB = 0xB8000000;
871
872 if (amdgpu_smu_memory_pool_size == 0)
873 return;
874
875 if (!is_os_64) {
876 DRM_WARN("Not 64-bit OS, feature not supported\n");
877 goto def_value;
878 }
879 si_meminfo(&si);
880 total_memory = (uint64_t)si.totalram * si.mem_unit;
881
882 if ((amdgpu_smu_memory_pool_size == 1) ||
883 (amdgpu_smu_memory_pool_size == 2)) {
884 if (total_memory < dram_size_three_GB)
885 goto def_value1;
886 } else if ((amdgpu_smu_memory_pool_size == 4) ||
887 (amdgpu_smu_memory_pool_size == 8)) {
888 if (total_memory < dram_size_seven_GB)
889 goto def_value1;
890 } else {
891 DRM_WARN("Smu memory pool size not supported\n");
892 goto def_value;
893 }
894 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
895
896 return;
897
898def_value1:
899 DRM_WARN("No enough system memory\n");
900def_value:
901 adev->pm.smu_prv_buffer_size = 0;
902}
903
d38ceaf9 904/**
06ec9070 905 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
906 *
907 * @adev: amdgpu_device pointer
908 *
909 * Validates certain module parameters and updates
910 * the associated values used by the driver (all asics).
911 */
06ec9070 912static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 913{
5b011235
CZ
914 if (amdgpu_sched_jobs < 4) {
915 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
916 amdgpu_sched_jobs);
917 amdgpu_sched_jobs = 4;
76117507 918 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
919 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
920 amdgpu_sched_jobs);
921 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
922 }
d38ceaf9 923
83e74db6 924 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
925 /* gart size must be greater or equal to 32M */
926 dev_warn(adev->dev, "gart size (%d) too small\n",
927 amdgpu_gart_size);
83e74db6 928 amdgpu_gart_size = -1;
d38ceaf9
AD
929 }
930
36d38372 931 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 932 /* gtt size must be greater or equal to 32M */
36d38372
CK
933 dev_warn(adev->dev, "gtt size (%d) too small\n",
934 amdgpu_gtt_size);
935 amdgpu_gtt_size = -1;
d38ceaf9
AD
936 }
937
d07f14be
RH
938 /* valid range is between 4 and 9 inclusive */
939 if (amdgpu_vm_fragment_size != -1 &&
940 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
941 dev_warn(adev->dev, "valid range is between 4 and 9\n");
942 amdgpu_vm_fragment_size = -1;
943 }
944
7951e376
RZ
945 amdgpu_device_check_smu_prv_buffer_size(adev);
946
06ec9070 947 amdgpu_device_check_vm_size(adev);
d38ceaf9 948
06ec9070 949 amdgpu_device_check_block_size(adev);
6a7f76e7 950
526bae37 951 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 952 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
953 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
954 amdgpu_vram_page_split);
955 amdgpu_vram_page_split = 1024;
956 }
8854695a
AG
957
958 if (amdgpu_lockup_timeout == 0) {
959 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
960 amdgpu_lockup_timeout = 10000;
961 }
19aede77
AD
962
963 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
d38ceaf9
AD
964}
965
966/**
967 * amdgpu_switcheroo_set_state - set switcheroo state
968 *
969 * @pdev: pci dev pointer
1694467b 970 * @state: vga_switcheroo state
d38ceaf9
AD
971 *
972 * Callback for the switcheroo driver. Suspends or resumes the
973 * the asics before or after it is powered up using ACPI methods.
974 */
975static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
976{
977 struct drm_device *dev = pci_get_drvdata(pdev);
978
979 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
980 return;
981
982 if (state == VGA_SWITCHEROO_ON) {
7ca85295 983 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
984 /* don't suspend or resume card normally */
985 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
986
810ddc3a 987 amdgpu_device_resume(dev, true, true);
d38ceaf9 988
d38ceaf9
AD
989 dev->switch_power_state = DRM_SWITCH_POWER_ON;
990 drm_kms_helper_poll_enable(dev);
991 } else {
7ca85295 992 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
993 drm_kms_helper_poll_disable(dev);
994 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 995 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
996 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
997 }
998}
999
1000/**
1001 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1002 *
1003 * @pdev: pci dev pointer
1004 *
1005 * Callback for the switcheroo driver. Check of the switcheroo
1006 * state can be changed.
1007 * Returns true if the state can be changed, false if not.
1008 */
1009static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1010{
1011 struct drm_device *dev = pci_get_drvdata(pdev);
1012
1013 /*
1014 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1015 * locking inversion with the driver load path. And the access here is
1016 * completely racy anyway. So don't bother with locking for now.
1017 */
1018 return dev->open_count == 0;
1019}
1020
1021static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1022 .set_gpu_state = amdgpu_switcheroo_set_state,
1023 .reprobe = NULL,
1024 .can_switch = amdgpu_switcheroo_can_switch,
1025};
1026
e3ecdffa
AD
1027/**
1028 * amdgpu_device_ip_set_clockgating_state - set the CG state
1029 *
87e3f136 1030 * @dev: amdgpu_device pointer
e3ecdffa
AD
1031 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1032 * @state: clockgating state (gate or ungate)
1033 *
1034 * Sets the requested clockgating state for all instances of
1035 * the hardware IP specified.
1036 * Returns the error code from the last instance.
1037 */
43fa561f 1038int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1039 enum amd_ip_block_type block_type,
1040 enum amd_clockgating_state state)
d38ceaf9 1041{
43fa561f 1042 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1043 int i, r = 0;
1044
1045 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1046 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1047 continue;
c722865a
RZ
1048 if (adev->ip_blocks[i].version->type != block_type)
1049 continue;
1050 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1051 continue;
1052 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1053 (void *)adev, state);
1054 if (r)
1055 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1056 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1057 }
1058 return r;
1059}
1060
e3ecdffa
AD
1061/**
1062 * amdgpu_device_ip_set_powergating_state - set the PG state
1063 *
87e3f136 1064 * @dev: amdgpu_device pointer
e3ecdffa
AD
1065 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1066 * @state: powergating state (gate or ungate)
1067 *
1068 * Sets the requested powergating state for all instances of
1069 * the hardware IP specified.
1070 * Returns the error code from the last instance.
1071 */
43fa561f 1072int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1073 enum amd_ip_block_type block_type,
1074 enum amd_powergating_state state)
d38ceaf9 1075{
43fa561f 1076 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1077 int i, r = 0;
1078
1079 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1080 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1081 continue;
c722865a
RZ
1082 if (adev->ip_blocks[i].version->type != block_type)
1083 continue;
1084 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1085 continue;
1086 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1087 (void *)adev, state);
1088 if (r)
1089 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1090 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1091 }
1092 return r;
1093}
1094
e3ecdffa
AD
1095/**
1096 * amdgpu_device_ip_get_clockgating_state - get the CG state
1097 *
1098 * @adev: amdgpu_device pointer
1099 * @flags: clockgating feature flags
1100 *
1101 * Walks the list of IPs on the device and updates the clockgating
1102 * flags for each IP.
1103 * Updates @flags with the feature flags for each hardware IP where
1104 * clockgating is enabled.
1105 */
2990a1fc
AD
1106void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1107 u32 *flags)
6cb2d4e4
HR
1108{
1109 int i;
1110
1111 for (i = 0; i < adev->num_ip_blocks; i++) {
1112 if (!adev->ip_blocks[i].status.valid)
1113 continue;
1114 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1115 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1116 }
1117}
1118
e3ecdffa
AD
1119/**
1120 * amdgpu_device_ip_wait_for_idle - wait for idle
1121 *
1122 * @adev: amdgpu_device pointer
1123 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1124 *
1125 * Waits for the request hardware IP to be idle.
1126 * Returns 0 for success or a negative error code on failure.
1127 */
2990a1fc
AD
1128int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1129 enum amd_ip_block_type block_type)
5dbbb60b
AD
1130{
1131 int i, r;
1132
1133 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1134 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1135 continue;
a1255107
AD
1136 if (adev->ip_blocks[i].version->type == block_type) {
1137 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1138 if (r)
1139 return r;
1140 break;
1141 }
1142 }
1143 return 0;
1144
1145}
1146
e3ecdffa
AD
1147/**
1148 * amdgpu_device_ip_is_idle - is the hardware IP idle
1149 *
1150 * @adev: amdgpu_device pointer
1151 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1152 *
1153 * Check if the hardware IP is idle or not.
1154 * Returns true if it the IP is idle, false if not.
1155 */
2990a1fc
AD
1156bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1157 enum amd_ip_block_type block_type)
5dbbb60b
AD
1158{
1159 int i;
1160
1161 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1162 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1163 continue;
a1255107
AD
1164 if (adev->ip_blocks[i].version->type == block_type)
1165 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1166 }
1167 return true;
1168
1169}
1170
e3ecdffa
AD
1171/**
1172 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1173 *
1174 * @adev: amdgpu_device pointer
87e3f136 1175 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1176 *
1177 * Returns a pointer to the hardware IP block structure
1178 * if it exists for the asic, otherwise NULL.
1179 */
2990a1fc
AD
1180struct amdgpu_ip_block *
1181amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1182 enum amd_ip_block_type type)
d38ceaf9
AD
1183{
1184 int i;
1185
1186 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1187 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1188 return &adev->ip_blocks[i];
1189
1190 return NULL;
1191}
1192
1193/**
2990a1fc 1194 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1195 *
1196 * @adev: amdgpu_device pointer
5fc3aeeb 1197 * @type: enum amd_ip_block_type
d38ceaf9
AD
1198 * @major: major version
1199 * @minor: minor version
1200 *
1201 * return 0 if equal or greater
1202 * return 1 if smaller or the ip_block doesn't exist
1203 */
2990a1fc
AD
1204int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1205 enum amd_ip_block_type type,
1206 u32 major, u32 minor)
d38ceaf9 1207{
2990a1fc 1208 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1209
a1255107
AD
1210 if (ip_block && ((ip_block->version->major > major) ||
1211 ((ip_block->version->major == major) &&
1212 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1213 return 0;
1214
1215 return 1;
1216}
1217
a1255107 1218/**
2990a1fc 1219 * amdgpu_device_ip_block_add
a1255107
AD
1220 *
1221 * @adev: amdgpu_device pointer
1222 * @ip_block_version: pointer to the IP to add
1223 *
1224 * Adds the IP block driver information to the collection of IPs
1225 * on the asic.
1226 */
2990a1fc
AD
1227int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1228 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1229{
1230 if (!ip_block_version)
1231 return -EINVAL;
1232
e966a725 1233 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1234 ip_block_version->funcs->name);
1235
a1255107
AD
1236 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1237
1238 return 0;
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_enable_virtual_display - enable virtual display feature
1243 *
1244 * @adev: amdgpu_device pointer
1245 *
1246 * Enabled the virtual display feature if the user has enabled it via
1247 * the module parameter virtual_display. This feature provides a virtual
1248 * display hardware on headless boards or in virtualized environments.
1249 * This function parses and validates the configuration string specified by
1250 * the user and configues the virtual display configuration (number of
1251 * virtual connectors, crtcs, etc.) specified.
1252 */
483ef985 1253static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1254{
1255 adev->enable_virtual_display = false;
1256
1257 if (amdgpu_virtual_display) {
1258 struct drm_device *ddev = adev->ddev;
1259 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1260 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1261
1262 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1263 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1264 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1265 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1266 if (!strcmp("all", pciaddname)
1267 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1268 long num_crtc;
1269 int res = -1;
1270
9accf2fd 1271 adev->enable_virtual_display = true;
0f66356d
ED
1272
1273 if (pciaddname_tmp)
1274 res = kstrtol(pciaddname_tmp, 10,
1275 &num_crtc);
1276
1277 if (!res) {
1278 if (num_crtc < 1)
1279 num_crtc = 1;
1280 if (num_crtc > 6)
1281 num_crtc = 6;
1282 adev->mode_info.num_crtc = num_crtc;
1283 } else {
1284 adev->mode_info.num_crtc = 1;
1285 }
9accf2fd
ED
1286 break;
1287 }
1288 }
1289
0f66356d
ED
1290 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1291 amdgpu_virtual_display, pci_address_name,
1292 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1293
1294 kfree(pciaddstr);
1295 }
1296}
1297
e3ecdffa
AD
1298/**
1299 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1300 *
1301 * @adev: amdgpu_device pointer
1302 *
1303 * Parses the asic configuration parameters specified in the gpu info
1304 * firmware and makes them availale to the driver for use in configuring
1305 * the asic.
1306 * Returns 0 on success, -EINVAL on failure.
1307 */
e2a75f88
AD
1308static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1309{
e2a75f88
AD
1310 const char *chip_name;
1311 char fw_name[30];
1312 int err;
1313 const struct gpu_info_firmware_header_v1_0 *hdr;
1314
ab4fe3e1
HR
1315 adev->firmware.gpu_info_fw = NULL;
1316
e2a75f88
AD
1317 switch (adev->asic_type) {
1318 case CHIP_TOPAZ:
1319 case CHIP_TONGA:
1320 case CHIP_FIJI:
e2a75f88 1321 case CHIP_POLARIS10:
cc07f18d 1322 case CHIP_POLARIS11:
e2a75f88 1323 case CHIP_POLARIS12:
cc07f18d 1324 case CHIP_VEGAM:
e2a75f88
AD
1325 case CHIP_CARRIZO:
1326 case CHIP_STONEY:
1327#ifdef CONFIG_DRM_AMDGPU_SI
1328 case CHIP_VERDE:
1329 case CHIP_TAHITI:
1330 case CHIP_PITCAIRN:
1331 case CHIP_OLAND:
1332 case CHIP_HAINAN:
1333#endif
1334#ifdef CONFIG_DRM_AMDGPU_CIK
1335 case CHIP_BONAIRE:
1336 case CHIP_HAWAII:
1337 case CHIP_KAVERI:
1338 case CHIP_KABINI:
1339 case CHIP_MULLINS:
1340#endif
27c0bc71 1341 case CHIP_VEGA20:
e2a75f88
AD
1342 default:
1343 return 0;
1344 case CHIP_VEGA10:
1345 chip_name = "vega10";
1346 break;
3f76dced
AD
1347 case CHIP_VEGA12:
1348 chip_name = "vega12";
1349 break;
2d2e5e7e 1350 case CHIP_RAVEN:
54c4d17e
FX
1351 if (adev->rev_id >= 8)
1352 chip_name = "raven2";
741deade
AD
1353 else if (adev->pdev->device == 0x15d8)
1354 chip_name = "picasso";
54c4d17e
FX
1355 else
1356 chip_name = "raven";
2d2e5e7e 1357 break;
e2a75f88
AD
1358 }
1359
1360 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1361 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1362 if (err) {
1363 dev_err(adev->dev,
1364 "Failed to load gpu_info firmware \"%s\"\n",
1365 fw_name);
1366 goto out;
1367 }
ab4fe3e1 1368 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1369 if (err) {
1370 dev_err(adev->dev,
1371 "Failed to validate gpu_info firmware \"%s\"\n",
1372 fw_name);
1373 goto out;
1374 }
1375
ab4fe3e1 1376 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1377 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1378
1379 switch (hdr->version_major) {
1380 case 1:
1381 {
1382 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1383 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1384 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1385
b5ab16bf
AD
1386 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1387 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1388 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1389 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1390 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1391 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1392 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1393 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1394 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1395 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1396 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1397 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1398 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1399 adev->gfx.cu_info.max_waves_per_simd =
1400 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1401 adev->gfx.cu_info.max_scratch_slots_per_cu =
1402 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1403 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1404 break;
1405 }
1406 default:
1407 dev_err(adev->dev,
1408 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1409 err = -EINVAL;
1410 goto out;
1411 }
1412out:
e2a75f88
AD
1413 return err;
1414}
1415
e3ecdffa
AD
1416/**
1417 * amdgpu_device_ip_early_init - run early init for hardware IPs
1418 *
1419 * @adev: amdgpu_device pointer
1420 *
1421 * Early initialization pass for hardware IPs. The hardware IPs that make
1422 * up each asic are discovered each IP's early_init callback is run. This
1423 * is the first stage in initializing the asic.
1424 * Returns 0 on success, negative error code on failure.
1425 */
06ec9070 1426static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1427{
aaa36a97 1428 int i, r;
d38ceaf9 1429
483ef985 1430 amdgpu_device_enable_virtual_display(adev);
a6be7570 1431
d38ceaf9 1432 switch (adev->asic_type) {
aaa36a97
AD
1433 case CHIP_TOPAZ:
1434 case CHIP_TONGA:
48299f95 1435 case CHIP_FIJI:
2cc0c0b5 1436 case CHIP_POLARIS10:
32cc7e53 1437 case CHIP_POLARIS11:
c4642a47 1438 case CHIP_POLARIS12:
32cc7e53 1439 case CHIP_VEGAM:
aaa36a97 1440 case CHIP_CARRIZO:
39bb0c92
SL
1441 case CHIP_STONEY:
1442 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1443 adev->family = AMDGPU_FAMILY_CZ;
1444 else
1445 adev->family = AMDGPU_FAMILY_VI;
1446
1447 r = vi_set_ip_blocks(adev);
1448 if (r)
1449 return r;
1450 break;
33f34802
KW
1451#ifdef CONFIG_DRM_AMDGPU_SI
1452 case CHIP_VERDE:
1453 case CHIP_TAHITI:
1454 case CHIP_PITCAIRN:
1455 case CHIP_OLAND:
1456 case CHIP_HAINAN:
295d0daf 1457 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1458 r = si_set_ip_blocks(adev);
1459 if (r)
1460 return r;
1461 break;
1462#endif
a2e73f56
AD
1463#ifdef CONFIG_DRM_AMDGPU_CIK
1464 case CHIP_BONAIRE:
1465 case CHIP_HAWAII:
1466 case CHIP_KAVERI:
1467 case CHIP_KABINI:
1468 case CHIP_MULLINS:
1469 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1470 adev->family = AMDGPU_FAMILY_CI;
1471 else
1472 adev->family = AMDGPU_FAMILY_KV;
1473
1474 r = cik_set_ip_blocks(adev);
1475 if (r)
1476 return r;
1477 break;
1478#endif
e48a3cd9
AD
1479 case CHIP_VEGA10:
1480 case CHIP_VEGA12:
e4bd8170 1481 case CHIP_VEGA20:
e48a3cd9 1482 case CHIP_RAVEN:
741deade 1483 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1484 adev->family = AMDGPU_FAMILY_RV;
1485 else
1486 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1487
1488 r = soc15_set_ip_blocks(adev);
1489 if (r)
1490 return r;
1491 break;
d38ceaf9
AD
1492 default:
1493 /* FIXME: not supported yet */
1494 return -EINVAL;
1495 }
1496
e2a75f88
AD
1497 r = amdgpu_device_parse_gpu_info_fw(adev);
1498 if (r)
1499 return r;
1500
1884734a 1501 amdgpu_amdkfd_device_probe(adev);
1502
3149d9da
XY
1503 if (amdgpu_sriov_vf(adev)) {
1504 r = amdgpu_virt_request_full_gpu(adev, true);
1505 if (r)
5ffa61c1 1506 return -EAGAIN;
3149d9da
XY
1507 }
1508
00f54b97
HR
1509 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1510
d38ceaf9
AD
1511 for (i = 0; i < adev->num_ip_blocks; i++) {
1512 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1513 DRM_ERROR("disabled ip block: %d <%s>\n",
1514 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1515 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1516 } else {
a1255107
AD
1517 if (adev->ip_blocks[i].version->funcs->early_init) {
1518 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1519 if (r == -ENOENT) {
a1255107 1520 adev->ip_blocks[i].status.valid = false;
2c1a2784 1521 } else if (r) {
a1255107
AD
1522 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1523 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1524 return r;
2c1a2784 1525 } else {
a1255107 1526 adev->ip_blocks[i].status.valid = true;
2c1a2784 1527 }
974e6b64 1528 } else {
a1255107 1529 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1530 }
d38ceaf9
AD
1531 }
1532 }
1533
395d1fb9
NH
1534 adev->cg_flags &= amdgpu_cg_mask;
1535 adev->pg_flags &= amdgpu_pg_mask;
1536
d38ceaf9
AD
1537 return 0;
1538}
1539
0a4f2520
RZ
1540static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1541{
1542 int i, r;
1543
1544 for (i = 0; i < adev->num_ip_blocks; i++) {
1545 if (!adev->ip_blocks[i].status.sw)
1546 continue;
1547 if (adev->ip_blocks[i].status.hw)
1548 continue;
1549 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1550 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1551 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1552 if (r) {
1553 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1554 adev->ip_blocks[i].version->funcs->name, r);
1555 return r;
1556 }
1557 adev->ip_blocks[i].status.hw = true;
1558 }
1559 }
1560
1561 return 0;
1562}
1563
1564static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1565{
1566 int i, r;
1567
1568 for (i = 0; i < adev->num_ip_blocks; i++) {
1569 if (!adev->ip_blocks[i].status.sw)
1570 continue;
1571 if (adev->ip_blocks[i].status.hw)
1572 continue;
1573 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1574 if (r) {
1575 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1576 adev->ip_blocks[i].version->funcs->name, r);
1577 return r;
1578 }
1579 adev->ip_blocks[i].status.hw = true;
1580 }
1581
1582 return 0;
1583}
1584
7a3e0bb2
RZ
1585static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1586{
1587 int r = 0;
1588 int i;
1589
1590 if (adev->asic_type >= CHIP_VEGA10) {
1591 for (i = 0; i < adev->num_ip_blocks; i++) {
1592 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1593 if (adev->in_gpu_reset || adev->in_suspend) {
1594 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1595 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1596 r = adev->ip_blocks[i].version->funcs->resume(adev);
1597 if (r) {
1598 DRM_ERROR("resume of IP block <%s> failed %d\n",
1599 adev->ip_blocks[i].version->funcs->name, r);
1600 return r;
1601 }
1602 } else {
1603 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1604 if (r) {
1605 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1606 adev->ip_blocks[i].version->funcs->name, r);
1607 return r;
1608 }
1609 }
1610 adev->ip_blocks[i].status.hw = true;
1611 }
1612 }
1613 }
1614
91eec27e 1615 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1616 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1617 if (r) {
1618 pr_err("firmware loading failed\n");
1619 return r;
1620 }
1621 }
1622
1623 return 0;
1624}
1625
e3ecdffa
AD
1626/**
1627 * amdgpu_device_ip_init - run init for hardware IPs
1628 *
1629 * @adev: amdgpu_device pointer
1630 *
1631 * Main initialization pass for hardware IPs. The list of all the hardware
1632 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1633 * are run. sw_init initializes the software state associated with each IP
1634 * and hw_init initializes the hardware associated with each IP.
1635 * Returns 0 on success, negative error code on failure.
1636 */
06ec9070 1637static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1638{
1639 int i, r;
1640
1641 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1642 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1643 continue;
a1255107 1644 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1645 if (r) {
a1255107
AD
1646 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1647 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1648 goto init_failed;
2c1a2784 1649 }
a1255107 1650 adev->ip_blocks[i].status.sw = true;
bfca0289 1651
d38ceaf9 1652 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1653 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1654 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1655 if (r) {
1656 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1657 goto init_failed;
2c1a2784 1658 }
a1255107 1659 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1660 if (r) {
1661 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1662 goto init_failed;
2c1a2784 1663 }
06ec9070 1664 r = amdgpu_device_wb_init(adev);
2c1a2784 1665 if (r) {
06ec9070 1666 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1667 goto init_failed;
2c1a2784 1668 }
a1255107 1669 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1670
1671 /* right after GMC hw init, we create CSA */
1672 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1673 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1674 AMDGPU_GEM_DOMAIN_VRAM,
1675 AMDGPU_CSA_SIZE);
2493664f
ML
1676 if (r) {
1677 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1678 goto init_failed;
2493664f
ML
1679 }
1680 }
d38ceaf9
AD
1681 }
1682 }
1683
c8963ea4
RZ
1684 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1685 if (r)
72d3f592 1686 goto init_failed;
0a4f2520
RZ
1687
1688 r = amdgpu_device_ip_hw_init_phase1(adev);
1689 if (r)
72d3f592 1690 goto init_failed;
0a4f2520 1691
7a3e0bb2
RZ
1692 r = amdgpu_device_fw_loading(adev);
1693 if (r)
72d3f592 1694 goto init_failed;
7a3e0bb2 1695
0a4f2520
RZ
1696 r = amdgpu_device_ip_hw_init_phase2(adev);
1697 if (r)
72d3f592 1698 goto init_failed;
d38ceaf9 1699
3e2e2ab5
HZ
1700 if (adev->gmc.xgmi.num_physical_nodes > 1)
1701 amdgpu_xgmi_add_device(adev);
1884734a 1702 amdgpu_amdkfd_device_init(adev);
c6332b97 1703
72d3f592 1704init_failed:
d3c117e5 1705 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1706 if (!r)
1707 amdgpu_virt_init_data_exchange(adev);
c6332b97 1708 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1709 }
c6332b97 1710
72d3f592 1711 return r;
d38ceaf9
AD
1712}
1713
e3ecdffa
AD
1714/**
1715 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1716 *
1717 * @adev: amdgpu_device pointer
1718 *
1719 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1720 * this function before a GPU reset. If the value is retained after a
1721 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1722 */
06ec9070 1723static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1724{
1725 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1726}
1727
e3ecdffa
AD
1728/**
1729 * amdgpu_device_check_vram_lost - check if vram is valid
1730 *
1731 * @adev: amdgpu_device pointer
1732 *
1733 * Checks the reset magic value written to the gart pointer in VRAM.
1734 * The driver calls this after a GPU reset to see if the contents of
1735 * VRAM is lost or now.
1736 * returns true if vram is lost, false if not.
1737 */
06ec9070 1738static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1739{
1740 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1741 AMDGPU_RESET_MAGIC_NUM);
1742}
1743
e3ecdffa 1744/**
1112a46b 1745 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1746 *
1747 * @adev: amdgpu_device pointer
1748 *
e3ecdffa 1749 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1750 * set_clockgating_state callbacks are run.
1751 * Late initialization pass enabling clockgating for hardware IPs.
1752 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1753 * Returns 0 on success, negative error code on failure.
1754 */
fdd34271 1755
1112a46b
RZ
1756static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1757 enum amd_clockgating_state state)
d38ceaf9 1758{
1112a46b 1759 int i, j, r;
d38ceaf9 1760
4a2ba394
SL
1761 if (amdgpu_emu_mode == 1)
1762 return 0;
1763
1112a46b
RZ
1764 for (j = 0; j < adev->num_ip_blocks; j++) {
1765 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1766 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1767 continue;
4a446d55 1768 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1769 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1770 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1771 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1772 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1773 /* enable clockgating to save power */
a1255107 1774 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1775 state);
4a446d55
AD
1776 if (r) {
1777 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1778 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1779 return r;
1780 }
b0b00ff1 1781 }
d38ceaf9 1782 }
06b18f61 1783
c9f96fd5
RZ
1784 return 0;
1785}
1786
1112a46b 1787static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1788{
1112a46b 1789 int i, j, r;
06b18f61 1790
c9f96fd5
RZ
1791 if (amdgpu_emu_mode == 1)
1792 return 0;
1793
1112a46b
RZ
1794 for (j = 0; j < adev->num_ip_blocks; j++) {
1795 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1796 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1797 continue;
1798 /* skip CG for VCE/UVD, it's handled specially */
1799 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1800 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1801 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1802 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1803 /* enable powergating to save power */
1804 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1805 state);
c9f96fd5
RZ
1806 if (r) {
1807 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1808 adev->ip_blocks[i].version->funcs->name, r);
1809 return r;
1810 }
1811 }
1812 }
2dc80b00
S
1813 return 0;
1814}
1815
e3ecdffa
AD
1816/**
1817 * amdgpu_device_ip_late_init - run late init for hardware IPs
1818 *
1819 * @adev: amdgpu_device pointer
1820 *
1821 * Late initialization pass for hardware IPs. The list of all the hardware
1822 * IPs that make up the asic is walked and the late_init callbacks are run.
1823 * late_init covers any special initialization that an IP requires
1824 * after all of the have been initialized or something that needs to happen
1825 * late in the init process.
1826 * Returns 0 on success, negative error code on failure.
1827 */
06ec9070 1828static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1829{
1830 int i = 0, r;
1831
1832 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1833 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1834 continue;
1835 if (adev->ip_blocks[i].version->funcs->late_init) {
1836 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1837 if (r) {
1838 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1839 adev->ip_blocks[i].version->funcs->name, r);
1840 return r;
1841 }
2dc80b00 1842 }
73f847db 1843 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1844 }
1845
1112a46b
RZ
1846 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1847 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1848
2c773de2
S
1849 queue_delayed_work(system_wq, &adev->late_init_work,
1850 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1851
06ec9070 1852 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1853
1854 return 0;
1855}
1856
e3ecdffa
AD
1857/**
1858 * amdgpu_device_ip_fini - run fini for hardware IPs
1859 *
1860 * @adev: amdgpu_device pointer
1861 *
1862 * Main teardown pass for hardware IPs. The list of all the hardware
1863 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1864 * are run. hw_fini tears down the hardware associated with each IP
1865 * and sw_fini tears down any software state associated with each IP.
1866 * Returns 0 on success, negative error code on failure.
1867 */
06ec9070 1868static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1869{
1870 int i, r;
1871
a82400b5
AG
1872 if (adev->gmc.xgmi.num_physical_nodes > 1)
1873 amdgpu_xgmi_remove_device(adev);
1874
1884734a 1875 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1876
1877 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1878 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1879
3e96dbfd
AD
1880 /* need to disable SMC first */
1881 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1882 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1883 continue;
fdd34271 1884 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1885 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1886 /* XXX handle errors */
1887 if (r) {
1888 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1889 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1890 }
a1255107 1891 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1892 break;
1893 }
1894 }
1895
d38ceaf9 1896 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1897 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1898 continue;
8201a67a 1899
a1255107 1900 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1901 /* XXX handle errors */
2c1a2784 1902 if (r) {
a1255107
AD
1903 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1904 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1905 }
8201a67a 1906
a1255107 1907 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1908 }
1909
9950cda2 1910
d38ceaf9 1911 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1912 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1913 continue;
c12aba3a
ML
1914
1915 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1916 amdgpu_ucode_free_bo(adev);
1e256e27 1917 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1918 amdgpu_device_wb_fini(adev);
1919 amdgpu_device_vram_scratch_fini(adev);
1920 }
1921
a1255107 1922 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1923 /* XXX handle errors */
2c1a2784 1924 if (r) {
a1255107
AD
1925 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1926 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1927 }
a1255107
AD
1928 adev->ip_blocks[i].status.sw = false;
1929 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1930 }
1931
a6dcfd9c 1932 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1933 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1934 continue;
a1255107
AD
1935 if (adev->ip_blocks[i].version->funcs->late_fini)
1936 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1937 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1938 }
1939
030308fc 1940 if (amdgpu_sriov_vf(adev))
24136135
ML
1941 if (amdgpu_virt_release_full_gpu(adev, false))
1942 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 1943
d38ceaf9
AD
1944 return 0;
1945}
1946
b55c9e7a
EQ
1947static int amdgpu_device_enable_mgpu_fan_boost(void)
1948{
1949 struct amdgpu_gpu_instance *gpu_ins;
1950 struct amdgpu_device *adev;
1951 int i, ret = 0;
1952
1953 mutex_lock(&mgpu_info.mutex);
1954
1955 /*
1956 * MGPU fan boost feature should be enabled
1957 * only when there are two or more dGPUs in
1958 * the system
1959 */
1960 if (mgpu_info.num_dgpu < 2)
1961 goto out;
1962
1963 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1964 gpu_ins = &(mgpu_info.gpu_ins[i]);
1965 adev = gpu_ins->adev;
1966 if (!(adev->flags & AMD_IS_APU) &&
1967 !gpu_ins->mgpu_fan_enabled &&
1968 adev->powerplay.pp_funcs &&
1969 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1970 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1971 if (ret)
1972 break;
1973
1974 gpu_ins->mgpu_fan_enabled = 1;
1975 }
1976 }
1977
1978out:
1979 mutex_unlock(&mgpu_info.mutex);
1980
1981 return ret;
1982}
1983
e3ecdffa 1984/**
1112a46b 1985 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 1986 *
1112a46b 1987 * @work: work_struct.
e3ecdffa 1988 */
06ec9070 1989static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
1990{
1991 struct amdgpu_device *adev =
1992 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
1993 int r;
1994
1995 r = amdgpu_ib_ring_tests(adev);
1996 if (r)
1997 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
1998
1999 r = amdgpu_device_enable_mgpu_fan_boost();
2000 if (r)
2001 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2dc80b00
S
2002}
2003
1e317b99
RZ
2004static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2005{
2006 struct amdgpu_device *adev =
2007 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2008
2009 mutex_lock(&adev->gfx.gfx_off_mutex);
2010 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2011 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2012 adev->gfx.gfx_off_state = true;
2013 }
2014 mutex_unlock(&adev->gfx.gfx_off_mutex);
2015}
2016
e3ecdffa 2017/**
e7854a03 2018 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2019 *
2020 * @adev: amdgpu_device pointer
2021 *
2022 * Main suspend function for hardware IPs. The list of all the hardware
2023 * IPs that make up the asic is walked, clockgating is disabled and the
2024 * suspend callbacks are run. suspend puts the hardware and software state
2025 * in each IP into a state suitable for suspend.
2026 * Returns 0 on success, negative error code on failure.
2027 */
e7854a03
AD
2028static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2029{
2030 int i, r;
2031
05df1f01 2032 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2033 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2034
e7854a03
AD
2035 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2036 if (!adev->ip_blocks[i].status.valid)
2037 continue;
2038 /* displays are handled separately */
2039 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2040 /* XXX handle errors */
2041 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2042 /* XXX handle errors */
2043 if (r) {
2044 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2045 adev->ip_blocks[i].version->funcs->name, r);
2046 }
2047 }
2048 }
2049
e7854a03
AD
2050 return 0;
2051}
2052
2053/**
2054 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2055 *
2056 * @adev: amdgpu_device pointer
2057 *
2058 * Main suspend function for hardware IPs. The list of all the hardware
2059 * IPs that make up the asic is walked, clockgating is disabled and the
2060 * suspend callbacks are run. suspend puts the hardware and software state
2061 * in each IP into a state suitable for suspend.
2062 * Returns 0 on success, negative error code on failure.
2063 */
2064static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2065{
2066 int i, r;
2067
2068 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2069 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2070 continue;
e7854a03
AD
2071 /* displays are handled in phase1 */
2072 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2073 continue;
d38ceaf9 2074 /* XXX handle errors */
a1255107 2075 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2076 /* XXX handle errors */
2c1a2784 2077 if (r) {
a1255107
AD
2078 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2079 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2080 }
d38ceaf9
AD
2081 }
2082
2083 return 0;
2084}
2085
e7854a03
AD
2086/**
2087 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2088 *
2089 * @adev: amdgpu_device pointer
2090 *
2091 * Main suspend function for hardware IPs. The list of all the hardware
2092 * IPs that make up the asic is walked, clockgating is disabled and the
2093 * suspend callbacks are run. suspend puts the hardware and software state
2094 * in each IP into a state suitable for suspend.
2095 * Returns 0 on success, negative error code on failure.
2096 */
2097int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2098{
2099 int r;
2100
e7819644
YT
2101 if (amdgpu_sriov_vf(adev))
2102 amdgpu_virt_request_full_gpu(adev, false);
2103
e7854a03
AD
2104 r = amdgpu_device_ip_suspend_phase1(adev);
2105 if (r)
2106 return r;
2107 r = amdgpu_device_ip_suspend_phase2(adev);
2108
e7819644
YT
2109 if (amdgpu_sriov_vf(adev))
2110 amdgpu_virt_release_full_gpu(adev, false);
2111
e7854a03
AD
2112 return r;
2113}
2114
06ec9070 2115static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2116{
2117 int i, r;
2118
2cb681b6
ML
2119 static enum amd_ip_block_type ip_order[] = {
2120 AMD_IP_BLOCK_TYPE_GMC,
2121 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2122 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2123 AMD_IP_BLOCK_TYPE_IH,
2124 };
a90ad3c2 2125
2cb681b6
ML
2126 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2127 int j;
2128 struct amdgpu_ip_block *block;
a90ad3c2 2129
2cb681b6
ML
2130 for (j = 0; j < adev->num_ip_blocks; j++) {
2131 block = &adev->ip_blocks[j];
2132
2133 if (block->version->type != ip_order[i] ||
2134 !block->status.valid)
2135 continue;
2136
2137 r = block->version->funcs->hw_init(adev);
0aaeefcc 2138 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2139 if (r)
2140 return r;
a90ad3c2
ML
2141 }
2142 }
2143
2144 return 0;
2145}
2146
06ec9070 2147static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2148{
2149 int i, r;
2150
2cb681b6
ML
2151 static enum amd_ip_block_type ip_order[] = {
2152 AMD_IP_BLOCK_TYPE_SMC,
2153 AMD_IP_BLOCK_TYPE_DCE,
2154 AMD_IP_BLOCK_TYPE_GFX,
2155 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2156 AMD_IP_BLOCK_TYPE_UVD,
2157 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2158 };
a90ad3c2 2159
2cb681b6
ML
2160 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2161 int j;
2162 struct amdgpu_ip_block *block;
a90ad3c2 2163
2cb681b6
ML
2164 for (j = 0; j < adev->num_ip_blocks; j++) {
2165 block = &adev->ip_blocks[j];
2166
2167 if (block->version->type != ip_order[i] ||
2168 !block->status.valid)
2169 continue;
2170
2171 r = block->version->funcs->hw_init(adev);
0aaeefcc 2172 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2173 if (r)
2174 return r;
a90ad3c2
ML
2175 }
2176 }
2177
2178 return 0;
2179}
2180
e3ecdffa
AD
2181/**
2182 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2183 *
2184 * @adev: amdgpu_device pointer
2185 *
2186 * First resume function for hardware IPs. The list of all the hardware
2187 * IPs that make up the asic is walked and the resume callbacks are run for
2188 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2189 * after a suspend and updates the software state as necessary. This
2190 * function is also used for restoring the GPU after a GPU reset.
2191 * Returns 0 on success, negative error code on failure.
2192 */
06ec9070 2193static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2194{
2195 int i, r;
2196
a90ad3c2
ML
2197 for (i = 0; i < adev->num_ip_blocks; i++) {
2198 if (!adev->ip_blocks[i].status.valid)
2199 continue;
a90ad3c2 2200 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2201 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2202 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2203 r = adev->ip_blocks[i].version->funcs->resume(adev);
2204 if (r) {
2205 DRM_ERROR("resume of IP block <%s> failed %d\n",
2206 adev->ip_blocks[i].version->funcs->name, r);
2207 return r;
2208 }
a90ad3c2
ML
2209 }
2210 }
2211
2212 return 0;
2213}
2214
e3ecdffa
AD
2215/**
2216 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2217 *
2218 * @adev: amdgpu_device pointer
2219 *
2220 * First resume function for hardware IPs. The list of all the hardware
2221 * IPs that make up the asic is walked and the resume callbacks are run for
2222 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2223 * functional state after a suspend and updates the software state as
2224 * necessary. This function is also used for restoring the GPU after a GPU
2225 * reset.
2226 * Returns 0 on success, negative error code on failure.
2227 */
06ec9070 2228static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2229{
2230 int i, r;
2231
2232 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2233 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2234 continue;
fcf0649f 2235 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2236 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2237 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2238 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2239 continue;
a1255107 2240 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2241 if (r) {
a1255107
AD
2242 DRM_ERROR("resume of IP block <%s> failed %d\n",
2243 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2244 return r;
2c1a2784 2245 }
d38ceaf9
AD
2246 }
2247
2248 return 0;
2249}
2250
e3ecdffa
AD
2251/**
2252 * amdgpu_device_ip_resume - run resume for hardware IPs
2253 *
2254 * @adev: amdgpu_device pointer
2255 *
2256 * Main resume function for hardware IPs. The hardware IPs
2257 * are split into two resume functions because they are
2258 * are also used in in recovering from a GPU reset and some additional
2259 * steps need to be take between them. In this case (S3/S4) they are
2260 * run sequentially.
2261 * Returns 0 on success, negative error code on failure.
2262 */
06ec9070 2263static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2264{
2265 int r;
2266
06ec9070 2267 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2268 if (r)
2269 return r;
7a3e0bb2
RZ
2270
2271 r = amdgpu_device_fw_loading(adev);
2272 if (r)
2273 return r;
2274
06ec9070 2275 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2276
2277 return r;
2278}
2279
e3ecdffa
AD
2280/**
2281 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2282 *
2283 * @adev: amdgpu_device pointer
2284 *
2285 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2286 */
4e99a44e 2287static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2288{
6867e1b5
ML
2289 if (amdgpu_sriov_vf(adev)) {
2290 if (adev->is_atom_fw) {
2291 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2292 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2293 } else {
2294 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2295 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2296 }
2297
2298 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2299 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2300 }
048765ad
AR
2301}
2302
e3ecdffa
AD
2303/**
2304 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2305 *
2306 * @asic_type: AMD asic type
2307 *
2308 * Check if there is DC (new modesetting infrastructre) support for an asic.
2309 * returns true if DC has support, false if not.
2310 */
4562236b
HW
2311bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2312{
2313 switch (asic_type) {
2314#if defined(CONFIG_DRM_AMD_DC)
2315 case CHIP_BONAIRE:
0d6fbccb 2316 case CHIP_KAVERI:
367e6687
AD
2317 case CHIP_KABINI:
2318 case CHIP_MULLINS:
d9fda248
HW
2319 /*
2320 * We have systems in the wild with these ASICs that require
2321 * LVDS and VGA support which is not supported with DC.
2322 *
2323 * Fallback to the non-DC driver here by default so as not to
2324 * cause regressions.
2325 */
2326 return amdgpu_dc > 0;
2327 case CHIP_HAWAII:
4562236b
HW
2328 case CHIP_CARRIZO:
2329 case CHIP_STONEY:
4562236b 2330 case CHIP_POLARIS10:
675fd32b 2331 case CHIP_POLARIS11:
2c8ad2d5 2332 case CHIP_POLARIS12:
675fd32b 2333 case CHIP_VEGAM:
4562236b
HW
2334 case CHIP_TONGA:
2335 case CHIP_FIJI:
42f8ffa1 2336 case CHIP_VEGA10:
dca7b401 2337 case CHIP_VEGA12:
c6034aa2 2338 case CHIP_VEGA20:
dc37a9a0 2339#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2340 case CHIP_RAVEN:
42f8ffa1 2341#endif
fd187853 2342 return amdgpu_dc != 0;
4562236b
HW
2343#endif
2344 default:
2345 return false;
2346 }
2347}
2348
2349/**
2350 * amdgpu_device_has_dc_support - check if dc is supported
2351 *
2352 * @adev: amdgpu_device_pointer
2353 *
2354 * Returns true for supported, false for not supported
2355 */
2356bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2357{
2555039d
XY
2358 if (amdgpu_sriov_vf(adev))
2359 return false;
2360
4562236b
HW
2361 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2362}
2363
d4535e2c
AG
2364
2365static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2366{
2367 struct amdgpu_device *adev =
2368 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2369
2370 adev->asic_reset_res = amdgpu_asic_reset(adev);
2371 if (adev->asic_reset_res)
2372 DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
2373 adev->asic_reset_res, adev->ddev->unique);
2374}
2375
2376
d38ceaf9
AD
2377/**
2378 * amdgpu_device_init - initialize the driver
2379 *
2380 * @adev: amdgpu_device pointer
87e3f136 2381 * @ddev: drm dev pointer
d38ceaf9
AD
2382 * @pdev: pci dev pointer
2383 * @flags: driver flags
2384 *
2385 * Initializes the driver info and hw (all asics).
2386 * Returns 0 for success or an error on failure.
2387 * Called at driver startup.
2388 */
2389int amdgpu_device_init(struct amdgpu_device *adev,
2390 struct drm_device *ddev,
2391 struct pci_dev *pdev,
2392 uint32_t flags)
2393{
2394 int r, i;
2395 bool runtime = false;
95844d20 2396 u32 max_MBps;
d38ceaf9
AD
2397
2398 adev->shutdown = false;
2399 adev->dev = &pdev->dev;
2400 adev->ddev = ddev;
2401 adev->pdev = pdev;
2402 adev->flags = flags;
2f7d10b3 2403 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2404 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2405 if (amdgpu_emu_mode == 1)
2406 adev->usec_timeout *= 2;
770d13b1 2407 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2408 adev->accel_working = false;
2409 adev->num_rings = 0;
2410 adev->mman.buffer_funcs = NULL;
2411 adev->mman.buffer_funcs_ring = NULL;
2412 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2413 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2414 adev->gmc.gmc_funcs = NULL;
f54d1867 2415 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2416 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2417
2418 adev->smc_rreg = &amdgpu_invalid_rreg;
2419 adev->smc_wreg = &amdgpu_invalid_wreg;
2420 adev->pcie_rreg = &amdgpu_invalid_rreg;
2421 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2422 adev->pciep_rreg = &amdgpu_invalid_rreg;
2423 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2424 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2425 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2426 adev->didt_rreg = &amdgpu_invalid_rreg;
2427 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2428 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2429 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2430 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2431 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2432
3e39ab90
AD
2433 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2434 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2435 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2436
2437 /* mutex initialization are all done here so we
2438 * can recall function without having locking issues */
d38ceaf9 2439 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2440 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2441 mutex_init(&adev->pm.mutex);
2442 mutex_init(&adev->gfx.gpu_clock_mutex);
2443 mutex_init(&adev->srbm_mutex);
b8866c26 2444 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2445 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2446 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2447 mutex_init(&adev->mn_lock);
e23b74aa 2448 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2449 hash_init(adev->mn_hash);
13a752e3 2450 mutex_init(&adev->lock_reset);
d38ceaf9 2451
06ec9070 2452 amdgpu_device_check_arguments(adev);
d38ceaf9 2453
d38ceaf9
AD
2454 spin_lock_init(&adev->mmio_idx_lock);
2455 spin_lock_init(&adev->smc_idx_lock);
2456 spin_lock_init(&adev->pcie_idx_lock);
2457 spin_lock_init(&adev->uvd_ctx_idx_lock);
2458 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2459 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2460 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2461 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2462 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2463
0c4e7fa5
CZ
2464 INIT_LIST_HEAD(&adev->shadow_list);
2465 mutex_init(&adev->shadow_list_lock);
2466
795f2813
AR
2467 INIT_LIST_HEAD(&adev->ring_lru_list);
2468 spin_lock_init(&adev->ring_lru_list_lock);
2469
06ec9070
AD
2470 INIT_DELAYED_WORK(&adev->late_init_work,
2471 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2472 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2473 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2474
d4535e2c
AG
2475 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2476
d23ee13f 2477 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2478 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2479
0fa49558
AX
2480 /* Registers mapping */
2481 /* TODO: block userspace mapping of io register */
da69c161
KW
2482 if (adev->asic_type >= CHIP_BONAIRE) {
2483 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2484 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2485 } else {
2486 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2487 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2488 }
d38ceaf9 2489
d38ceaf9
AD
2490 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2491 if (adev->rmmio == NULL) {
2492 return -ENOMEM;
2493 }
2494 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2495 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2496
d38ceaf9
AD
2497 /* io port mapping */
2498 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2499 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2500 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2501 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2502 break;
2503 }
2504 }
2505 if (adev->rio_mem == NULL)
b64a18c5 2506 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2507
5494d864
AD
2508 amdgpu_device_get_pcie_info(adev);
2509
d38ceaf9 2510 /* early init functions */
06ec9070 2511 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2512 if (r)
2513 return r;
2514
6585661d
OZ
2515 /* doorbell bar mapping and doorbell index init*/
2516 amdgpu_device_doorbell_init(adev);
2517
d38ceaf9
AD
2518 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2519 /* this will fail for cards that aren't VGA class devices, just
2520 * ignore it */
06ec9070 2521 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2522
e9bef455 2523 if (amdgpu_device_is_px(ddev))
d38ceaf9 2524 runtime = true;
84c8b22e
LW
2525 if (!pci_is_thunderbolt_attached(adev->pdev))
2526 vga_switcheroo_register_client(adev->pdev,
2527 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2528 if (runtime)
2529 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2530
9475a943
SL
2531 if (amdgpu_emu_mode == 1) {
2532 /* post the asic on emulation mode */
2533 emu_soc_asic_init(adev);
bfca0289 2534 goto fence_driver_init;
9475a943 2535 }
bfca0289 2536
d38ceaf9 2537 /* Read BIOS */
83ba126a
AD
2538 if (!amdgpu_get_bios(adev)) {
2539 r = -EINVAL;
2540 goto failed;
2541 }
f7e9e9fe 2542
d38ceaf9 2543 r = amdgpu_atombios_init(adev);
2c1a2784
AD
2544 if (r) {
2545 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
e23b74aa 2546 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
83ba126a 2547 goto failed;
2c1a2784 2548 }
d38ceaf9 2549
4e99a44e
ML
2550 /* detect if we are with an SRIOV vbios */
2551 amdgpu_device_detect_sriov_bios(adev);
048765ad 2552
d38ceaf9 2553 /* Post card if necessary */
39c640c0 2554 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2555 if (!adev->bios) {
bec86378 2556 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2557 r = -EINVAL;
2558 goto failed;
d38ceaf9 2559 }
bec86378 2560 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2561 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2562 if (r) {
2563 dev_err(adev->dev, "gpu post error!\n");
2564 goto failed;
2565 }
d38ceaf9
AD
2566 }
2567
88b64e95
AD
2568 if (adev->is_atom_fw) {
2569 /* Initialize clocks */
2570 r = amdgpu_atomfirmware_get_clock_info(adev);
2571 if (r) {
2572 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2573 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2574 goto failed;
2575 }
2576 } else {
a5bde2f9
AD
2577 /* Initialize clocks */
2578 r = amdgpu_atombios_get_clock_info(adev);
2579 if (r) {
2580 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2581 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2582 goto failed;
a5bde2f9
AD
2583 }
2584 /* init i2c buses */
4562236b
HW
2585 if (!amdgpu_device_has_dc_support(adev))
2586 amdgpu_atombios_i2c_init(adev);
2c1a2784 2587 }
d38ceaf9 2588
bfca0289 2589fence_driver_init:
d38ceaf9
AD
2590 /* Fence driver */
2591 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2592 if (r) {
2593 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2594 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2595 goto failed;
2c1a2784 2596 }
d38ceaf9
AD
2597
2598 /* init the mode config */
2599 drm_mode_config_init(adev->ddev);
2600
06ec9070 2601 r = amdgpu_device_ip_init(adev);
d38ceaf9 2602 if (r) {
8840a387 2603 /* failed in exclusive mode due to timeout */
2604 if (amdgpu_sriov_vf(adev) &&
2605 !amdgpu_sriov_runtime(adev) &&
2606 amdgpu_virt_mmio_blocked(adev) &&
2607 !amdgpu_virt_wait_reset(adev)) {
2608 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2609 /* Don't send request since VF is inactive. */
2610 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2611 adev->virt.ops = NULL;
8840a387 2612 r = -EAGAIN;
2613 goto failed;
2614 }
06ec9070 2615 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2616 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2617 if (amdgpu_virt_request_full_gpu(adev, false))
2618 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2619 goto failed;
d38ceaf9
AD
2620 }
2621
2622 adev->accel_working = true;
2623
e59c0205
AX
2624 amdgpu_vm_check_compute_bug(adev);
2625
95844d20
MO
2626 /* Initialize the buffer migration limit. */
2627 if (amdgpu_moverate >= 0)
2628 max_MBps = amdgpu_moverate;
2629 else
2630 max_MBps = 8; /* Allow 8 MB/s. */
2631 /* Get a log2 for easy divisions. */
2632 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2633
d38ceaf9
AD
2634 r = amdgpu_ib_pool_init(adev);
2635 if (r) {
2636 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
e23b74aa 2637 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
83ba126a 2638 goto failed;
d38ceaf9
AD
2639 }
2640
9bc92b9c
ML
2641 amdgpu_fbdev_init(adev);
2642
d2f52ac8
RZ
2643 r = amdgpu_pm_sysfs_init(adev);
2644 if (r)
2645 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2646
75758255 2647 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2648 if (r)
d38ceaf9 2649 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2650
2651 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2652 if (r)
d38ceaf9 2653 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2654
50ab2533 2655 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2656 if (r)
50ab2533 2657 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2658
763efb6c 2659 r = amdgpu_debugfs_init(adev);
db95e218 2660 if (r)
763efb6c 2661 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2662
d38ceaf9
AD
2663 if ((amdgpu_testing & 1)) {
2664 if (adev->accel_working)
2665 amdgpu_test_moves(adev);
2666 else
2667 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2668 }
d38ceaf9
AD
2669 if (amdgpu_benchmarking) {
2670 if (adev->accel_working)
2671 amdgpu_benchmark(adev, amdgpu_benchmarking);
2672 else
2673 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2674 }
2675
2676 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2677 * explicit gating rather than handling it automatically.
2678 */
06ec9070 2679 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2680 if (r) {
06ec9070 2681 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2682 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2683 goto failed;
2c1a2784 2684 }
d38ceaf9
AD
2685
2686 return 0;
83ba126a
AD
2687
2688failed:
89041940 2689 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2690 if (runtime)
2691 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2692
83ba126a 2693 return r;
d38ceaf9
AD
2694}
2695
d38ceaf9
AD
2696/**
2697 * amdgpu_device_fini - tear down the driver
2698 *
2699 * @adev: amdgpu_device pointer
2700 *
2701 * Tear down the driver info (all asics).
2702 * Called at driver shutdown.
2703 */
2704void amdgpu_device_fini(struct amdgpu_device *adev)
2705{
2706 int r;
2707
2708 DRM_INFO("amdgpu: finishing device.\n");
2709 adev->shutdown = true;
e5b03032
ML
2710 /* disable all interrupts */
2711 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2712 if (adev->mode_info.mode_config_initialized){
2713 if (!amdgpu_device_has_dc_support(adev))
2714 drm_crtc_force_disable_all(adev->ddev);
2715 else
2716 drm_atomic_helper_shutdown(adev->ddev);
2717 }
d38ceaf9
AD
2718 amdgpu_ib_pool_fini(adev);
2719 amdgpu_fence_driver_fini(adev);
58e955d9 2720 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2721 amdgpu_fbdev_fini(adev);
06ec9070 2722 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2723 if (adev->firmware.gpu_info_fw) {
2724 release_firmware(adev->firmware.gpu_info_fw);
2725 adev->firmware.gpu_info_fw = NULL;
2726 }
d38ceaf9 2727 adev->accel_working = false;
2dc80b00 2728 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2729 /* free i2c buses */
4562236b
HW
2730 if (!amdgpu_device_has_dc_support(adev))
2731 amdgpu_i2c_fini(adev);
bfca0289
SL
2732
2733 if (amdgpu_emu_mode != 1)
2734 amdgpu_atombios_fini(adev);
2735
d38ceaf9
AD
2736 kfree(adev->bios);
2737 adev->bios = NULL;
84c8b22e
LW
2738 if (!pci_is_thunderbolt_attached(adev->pdev))
2739 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2740 if (adev->flags & AMD_IS_PX)
2741 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2742 vga_client_register(adev->pdev, NULL, NULL, NULL);
2743 if (adev->rio_mem)
2744 pci_iounmap(adev->pdev, adev->rio_mem);
2745 adev->rio_mem = NULL;
2746 iounmap(adev->rmmio);
2747 adev->rmmio = NULL;
06ec9070 2748 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2749 amdgpu_debugfs_regs_cleanup(adev);
d38ceaf9
AD
2750}
2751
2752
2753/*
2754 * Suspend & resume.
2755 */
2756/**
810ddc3a 2757 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2758 *
87e3f136
DP
2759 * @dev: drm dev pointer
2760 * @suspend: suspend state
2761 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2762 *
2763 * Puts the hw in the suspend state (all asics).
2764 * Returns 0 for success or an error on failure.
2765 * Called at driver suspend.
2766 */
810ddc3a 2767int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2768{
2769 struct amdgpu_device *adev;
2770 struct drm_crtc *crtc;
2771 struct drm_connector *connector;
5ceb54c6 2772 int r;
d38ceaf9
AD
2773
2774 if (dev == NULL || dev->dev_private == NULL) {
2775 return -ENODEV;
2776 }
2777
2778 adev = dev->dev_private;
2779
2780 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2781 return 0;
2782
44779b43 2783 adev->in_suspend = true;
d38ceaf9
AD
2784 drm_kms_helper_poll_disable(dev);
2785
5f818173
S
2786 if (fbcon)
2787 amdgpu_fbdev_set_suspend(adev, 1);
2788
a5459475
RZ
2789 cancel_delayed_work_sync(&adev->late_init_work);
2790
4562236b
HW
2791 if (!amdgpu_device_has_dc_support(adev)) {
2792 /* turn off display hw */
2793 drm_modeset_lock_all(dev);
2794 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2795 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2796 }
2797 drm_modeset_unlock_all(dev);
fe1053b7
AD
2798 /* unpin the front buffers and cursors */
2799 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2800 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2801 struct drm_framebuffer *fb = crtc->primary->fb;
2802 struct amdgpu_bo *robj;
2803
91334223 2804 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2805 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2806 r = amdgpu_bo_reserve(aobj, true);
2807 if (r == 0) {
2808 amdgpu_bo_unpin(aobj);
2809 amdgpu_bo_unreserve(aobj);
2810 }
756e6880 2811 }
756e6880 2812
fe1053b7
AD
2813 if (fb == NULL || fb->obj[0] == NULL) {
2814 continue;
2815 }
2816 robj = gem_to_amdgpu_bo(fb->obj[0]);
2817 /* don't unpin kernel fb objects */
2818 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2819 r = amdgpu_bo_reserve(robj, true);
2820 if (r == 0) {
2821 amdgpu_bo_unpin(robj);
2822 amdgpu_bo_unreserve(robj);
2823 }
d38ceaf9
AD
2824 }
2825 }
2826 }
fe1053b7
AD
2827
2828 amdgpu_amdkfd_suspend(adev);
2829
2830 r = amdgpu_device_ip_suspend_phase1(adev);
2831
d38ceaf9
AD
2832 /* evict vram memory */
2833 amdgpu_bo_evict_vram(adev);
2834
5ceb54c6 2835 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2836
fe1053b7 2837 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2838
a0a71e49
AD
2839 /* evict remaining vram memory
2840 * This second call to evict vram is to evict the gart page table
2841 * using the CPU.
2842 */
d38ceaf9
AD
2843 amdgpu_bo_evict_vram(adev);
2844
2845 pci_save_state(dev->pdev);
2846 if (suspend) {
2847 /* Shut down the device */
2848 pci_disable_device(dev->pdev);
2849 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2850 } else {
2851 r = amdgpu_asic_reset(adev);
2852 if (r)
2853 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2854 }
2855
d38ceaf9
AD
2856 return 0;
2857}
2858
2859/**
810ddc3a 2860 * amdgpu_device_resume - initiate device resume
d38ceaf9 2861 *
87e3f136
DP
2862 * @dev: drm dev pointer
2863 * @resume: resume state
2864 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2865 *
2866 * Bring the hw back to operating state (all asics).
2867 * Returns 0 for success or an error on failure.
2868 * Called at driver resume.
2869 */
810ddc3a 2870int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2871{
2872 struct drm_connector *connector;
2873 struct amdgpu_device *adev = dev->dev_private;
756e6880 2874 struct drm_crtc *crtc;
03161a6e 2875 int r = 0;
d38ceaf9
AD
2876
2877 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2878 return 0;
2879
d38ceaf9
AD
2880 if (resume) {
2881 pci_set_power_state(dev->pdev, PCI_D0);
2882 pci_restore_state(dev->pdev);
74b0b157 2883 r = pci_enable_device(dev->pdev);
03161a6e 2884 if (r)
4d3b9ae5 2885 return r;
d38ceaf9
AD
2886 }
2887
2888 /* post card */
39c640c0 2889 if (amdgpu_device_need_post(adev)) {
74b0b157 2890 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2891 if (r)
2892 DRM_ERROR("amdgpu asic init failed\n");
2893 }
d38ceaf9 2894
06ec9070 2895 r = amdgpu_device_ip_resume(adev);
e6707218 2896 if (r) {
06ec9070 2897 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2898 return r;
e6707218 2899 }
5ceb54c6
AD
2900 amdgpu_fence_driver_resume(adev);
2901
d38ceaf9 2902
06ec9070 2903 r = amdgpu_device_ip_late_init(adev);
03161a6e 2904 if (r)
4d3b9ae5 2905 return r;
d38ceaf9 2906
fe1053b7
AD
2907 if (!amdgpu_device_has_dc_support(adev)) {
2908 /* pin cursors */
2909 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2910 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2911
91334223 2912 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2913 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2914 r = amdgpu_bo_reserve(aobj, true);
2915 if (r == 0) {
2916 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2917 if (r != 0)
2918 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2919 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2920 amdgpu_bo_unreserve(aobj);
2921 }
756e6880
AD
2922 }
2923 }
2924 }
ba997709
YZ
2925 r = amdgpu_amdkfd_resume(adev);
2926 if (r)
2927 return r;
756e6880 2928
96a5d8d4
LL
2929 /* Make sure IB tests flushed */
2930 flush_delayed_work(&adev->late_init_work);
2931
d38ceaf9
AD
2932 /* blat the mode back in */
2933 if (fbcon) {
4562236b
HW
2934 if (!amdgpu_device_has_dc_support(adev)) {
2935 /* pre DCE11 */
2936 drm_helper_resume_force_mode(dev);
2937
2938 /* turn on display hw */
2939 drm_modeset_lock_all(dev);
2940 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2941 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2942 }
2943 drm_modeset_unlock_all(dev);
d38ceaf9 2944 }
4d3b9ae5 2945 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
2946 }
2947
2948 drm_kms_helper_poll_enable(dev);
23a1a9e5
L
2949
2950 /*
2951 * Most of the connector probing functions try to acquire runtime pm
2952 * refs to ensure that the GPU is powered on when connector polling is
2953 * performed. Since we're calling this from a runtime PM callback,
2954 * trying to acquire rpm refs will cause us to deadlock.
2955 *
2956 * Since we're guaranteed to be holding the rpm lock, it's safe to
2957 * temporarily disable the rpm helpers so this doesn't deadlock us.
2958 */
2959#ifdef CONFIG_PM
2960 dev->dev->power.disable_depth++;
2961#endif
4562236b
HW
2962 if (!amdgpu_device_has_dc_support(adev))
2963 drm_helper_hpd_irq_event(dev);
2964 else
2965 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
2966#ifdef CONFIG_PM
2967 dev->dev->power.disable_depth--;
2968#endif
44779b43
RZ
2969 adev->in_suspend = false;
2970
4d3b9ae5 2971 return 0;
d38ceaf9
AD
2972}
2973
e3ecdffa
AD
2974/**
2975 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2976 *
2977 * @adev: amdgpu_device pointer
2978 *
2979 * The list of all the hardware IPs that make up the asic is walked and
2980 * the check_soft_reset callbacks are run. check_soft_reset determines
2981 * if the asic is still hung or not.
2982 * Returns true if any of the IPs are still in a hung state, false if not.
2983 */
06ec9070 2984static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
2985{
2986 int i;
2987 bool asic_hang = false;
2988
f993d628
ML
2989 if (amdgpu_sriov_vf(adev))
2990 return true;
2991
8bc04c29
AD
2992 if (amdgpu_asic_need_full_reset(adev))
2993 return true;
2994
63fbf42f 2995 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2996 if (!adev->ip_blocks[i].status.valid)
63fbf42f 2997 continue;
a1255107
AD
2998 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2999 adev->ip_blocks[i].status.hang =
3000 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3001 if (adev->ip_blocks[i].status.hang) {
3002 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3003 asic_hang = true;
3004 }
3005 }
3006 return asic_hang;
3007}
3008
e3ecdffa
AD
3009/**
3010 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3011 *
3012 * @adev: amdgpu_device pointer
3013 *
3014 * The list of all the hardware IPs that make up the asic is walked and the
3015 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3016 * handles any IP specific hardware or software state changes that are
3017 * necessary for a soft reset to succeed.
3018 * Returns 0 on success, negative error code on failure.
3019 */
06ec9070 3020static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3021{
3022 int i, r = 0;
3023
3024 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3025 if (!adev->ip_blocks[i].status.valid)
d31a501e 3026 continue;
a1255107
AD
3027 if (adev->ip_blocks[i].status.hang &&
3028 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3029 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3030 if (r)
3031 return r;
3032 }
3033 }
3034
3035 return 0;
3036}
3037
e3ecdffa
AD
3038/**
3039 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3040 *
3041 * @adev: amdgpu_device pointer
3042 *
3043 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3044 * reset is necessary to recover.
3045 * Returns true if a full asic reset is required, false if not.
3046 */
06ec9070 3047static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3048{
da146d3b
AD
3049 int i;
3050
8bc04c29
AD
3051 if (amdgpu_asic_need_full_reset(adev))
3052 return true;
3053
da146d3b 3054 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3055 if (!adev->ip_blocks[i].status.valid)
da146d3b 3056 continue;
a1255107
AD
3057 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3058 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3059 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3060 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3061 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3062 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3063 DRM_INFO("Some block need full reset!\n");
3064 return true;
3065 }
3066 }
35d782fe
CZ
3067 }
3068 return false;
3069}
3070
e3ecdffa
AD
3071/**
3072 * amdgpu_device_ip_soft_reset - do a soft reset
3073 *
3074 * @adev: amdgpu_device pointer
3075 *
3076 * The list of all the hardware IPs that make up the asic is walked and the
3077 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3078 * IP specific hardware or software state changes that are necessary to soft
3079 * reset the IP.
3080 * Returns 0 on success, negative error code on failure.
3081 */
06ec9070 3082static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3083{
3084 int i, r = 0;
3085
3086 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3087 if (!adev->ip_blocks[i].status.valid)
35d782fe 3088 continue;
a1255107
AD
3089 if (adev->ip_blocks[i].status.hang &&
3090 adev->ip_blocks[i].version->funcs->soft_reset) {
3091 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3092 if (r)
3093 return r;
3094 }
3095 }
3096
3097 return 0;
3098}
3099
e3ecdffa
AD
3100/**
3101 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3102 *
3103 * @adev: amdgpu_device pointer
3104 *
3105 * The list of all the hardware IPs that make up the asic is walked and the
3106 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3107 * handles any IP specific hardware or software state changes that are
3108 * necessary after the IP has been soft reset.
3109 * Returns 0 on success, negative error code on failure.
3110 */
06ec9070 3111static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3112{
3113 int i, r = 0;
3114
3115 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3116 if (!adev->ip_blocks[i].status.valid)
35d782fe 3117 continue;
a1255107
AD
3118 if (adev->ip_blocks[i].status.hang &&
3119 adev->ip_blocks[i].version->funcs->post_soft_reset)
3120 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3121 if (r)
3122 return r;
3123 }
3124
3125 return 0;
3126}
3127
e3ecdffa 3128/**
c33adbc7 3129 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3130 *
3131 * @adev: amdgpu_device pointer
3132 *
3133 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3134 * restore things like GPUVM page tables after a GPU reset where
3135 * the contents of VRAM might be lost.
403009bf
CK
3136 *
3137 * Returns:
3138 * 0 on success, negative error code on failure.
e3ecdffa 3139 */
c33adbc7 3140static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3141{
c41d1cf6 3142 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3143 struct amdgpu_bo *shadow;
3144 long r = 1, tmo;
c41d1cf6
ML
3145
3146 if (amdgpu_sriov_runtime(adev))
b045d3af 3147 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3148 else
3149 tmo = msecs_to_jiffies(100);
3150
3151 DRM_INFO("recover vram bo from shadow start\n");
3152 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3153 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3154
3155 /* No need to recover an evicted BO */
3156 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3157 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3158 continue;
3159
3160 r = amdgpu_bo_restore_shadow(shadow, &next);
3161 if (r)
3162 break;
3163
c41d1cf6
ML
3164 if (fence) {
3165 r = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3166 dma_fence_put(fence);
3167 fence = next;
3168 if (r <= 0)
c41d1cf6 3169 break;
403009bf
CK
3170 } else {
3171 fence = next;
c41d1cf6 3172 }
c41d1cf6
ML
3173 }
3174 mutex_unlock(&adev->shadow_list_lock);
3175
403009bf
CK
3176 if (fence)
3177 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3178 dma_fence_put(fence);
3179
403009bf 3180 if (r <= 0 || tmo <= 0) {
c41d1cf6 3181 DRM_ERROR("recover vram bo from shadow failed\n");
403009bf
CK
3182 return -EIO;
3183 }
c41d1cf6 3184
403009bf
CK
3185 DRM_INFO("recover vram bo from shadow done\n");
3186 return 0;
c41d1cf6
ML
3187}
3188
a90ad3c2 3189
e3ecdffa 3190/**
06ec9070 3191 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3192 *
3193 * @adev: amdgpu device pointer
87e3f136 3194 * @from_hypervisor: request from hypervisor
5740682e
ML
3195 *
3196 * do VF FLR and reinitialize Asic
3f48c681 3197 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3198 */
3199static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3200 bool from_hypervisor)
5740682e
ML
3201{
3202 int r;
3203
3204 if (from_hypervisor)
3205 r = amdgpu_virt_request_full_gpu(adev, true);
3206 else
3207 r = amdgpu_virt_reset_gpu(adev);
3208 if (r)
3209 return r;
a90ad3c2
ML
3210
3211 /* Resume IP prior to SMC */
06ec9070 3212 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3213 if (r)
3214 goto error;
a90ad3c2
ML
3215
3216 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3217 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3218
7a3e0bb2
RZ
3219 r = amdgpu_device_fw_loading(adev);
3220 if (r)
3221 return r;
3222
a90ad3c2 3223 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3224 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3225 if (r)
3226 goto error;
a90ad3c2
ML
3227
3228 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3229 r = amdgpu_ib_ring_tests(adev);
a90ad3c2 3230
abc34253 3231error:
d3c117e5 3232 amdgpu_virt_init_data_exchange(adev);
abc34253 3233 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3234 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3235 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3236 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3237 }
3238
3239 return r;
3240}
3241
12938fad
CK
3242/**
3243 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3244 *
3245 * @adev: amdgpu device pointer
3246 *
3247 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3248 * a hung GPU.
3249 */
3250bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3251{
3252 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3253 DRM_INFO("Timeout, but no hardware hang detected.\n");
3254 return false;
3255 }
3256
3ba7b418
AG
3257 if (amdgpu_gpu_recovery == 0)
3258 goto disabled;
3259
3260 if (amdgpu_sriov_vf(adev))
3261 return true;
3262
3263 if (amdgpu_gpu_recovery == -1) {
3264 switch (adev->asic_type) {
fc42d47c
AG
3265 case CHIP_BONAIRE:
3266 case CHIP_HAWAII:
3ba7b418
AG
3267 case CHIP_TOPAZ:
3268 case CHIP_TONGA:
3269 case CHIP_FIJI:
3270 case CHIP_POLARIS10:
3271 case CHIP_POLARIS11:
3272 case CHIP_POLARIS12:
3273 case CHIP_VEGAM:
3274 case CHIP_VEGA20:
3275 case CHIP_VEGA10:
3276 case CHIP_VEGA12:
3277 break;
3278 default:
3279 goto disabled;
3280 }
12938fad
CK
3281 }
3282
3283 return true;
3ba7b418
AG
3284
3285disabled:
3286 DRM_INFO("GPU recovery disabled.\n");
3287 return false;
12938fad
CK
3288}
3289
5c6dd71e 3290
26bc5340
AG
3291static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3292 struct amdgpu_job *job,
3293 bool *need_full_reset_arg)
3294{
3295 int i, r = 0;
3296 bool need_full_reset = *need_full_reset_arg;
71182665 3297
71182665 3298 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3299 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3300 struct amdgpu_ring *ring = adev->rings[i];
3301
51687759 3302 if (!ring || !ring->sched.thread)
0875dc9e 3303 continue;
5740682e 3304
71182665
ML
3305 kthread_park(ring->sched.thread);
3306
734afd4b 3307 if (job && job->base.sched != &ring->sched)
5740682e
ML
3308 continue;
3309
67ccea60 3310 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
5740682e 3311
2f9d4084
ML
3312 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3313 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3314 }
d38ceaf9 3315
26bc5340
AG
3316
3317
3318 if (!amdgpu_sriov_vf(adev)) {
3319
3320 if (!need_full_reset)
3321 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3322
3323 if (!need_full_reset) {
3324 amdgpu_device_ip_pre_soft_reset(adev);
3325 r = amdgpu_device_ip_soft_reset(adev);
3326 amdgpu_device_ip_post_soft_reset(adev);
3327 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3328 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3329 need_full_reset = true;
3330 }
3331 }
3332
3333 if (need_full_reset)
3334 r = amdgpu_device_ip_suspend(adev);
3335
3336 *need_full_reset_arg = need_full_reset;
3337 }
3338
3339 return r;
3340}
3341
3342static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3343 struct list_head *device_list_handle,
3344 bool *need_full_reset_arg)
3345{
3346 struct amdgpu_device *tmp_adev = NULL;
3347 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3348 int r = 0;
3349
3350 /*
3351 * ASIC reset has to be done on all HGMI hive nodes ASAP
3352 * to allow proper links negotiation in FW (within 1 sec)
3353 */
3354 if (need_full_reset) {
3355 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3356 /* For XGMI run all resets in parallel to speed up the process */
3357 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3358 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3359 r = -EALREADY;
3360 } else
3361 r = amdgpu_asic_reset(tmp_adev);
3362
3363 if (r) {
3364 DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
26bc5340 3365 r, tmp_adev->ddev->unique);
d4535e2c
AG
3366 break;
3367 }
3368 }
3369
3370 /* For XGMI wait for all PSP resets to complete before proceed */
3371 if (!r) {
3372 list_for_each_entry(tmp_adev, device_list_handle,
3373 gmc.xgmi.head) {
3374 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3375 flush_work(&tmp_adev->xgmi_reset_work);
3376 r = tmp_adev->asic_reset_res;
3377 if (r)
3378 break;
3379 }
3380 }
26bc5340
AG
3381 }
3382 }
3383
3384
3385 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3386 if (need_full_reset) {
3387 /* post card */
3388 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3389 DRM_WARN("asic atom init failed!");
3390
3391 if (!r) {
3392 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3393 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3394 if (r)
3395 goto out;
3396
3397 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3398 if (vram_lost) {
3399 DRM_ERROR("VRAM is lost!\n");
3400 atomic_inc(&tmp_adev->vram_lost_counter);
3401 }
3402
3403 r = amdgpu_gtt_mgr_recover(
3404 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3405 if (r)
3406 goto out;
3407
3408 r = amdgpu_device_fw_loading(tmp_adev);
3409 if (r)
3410 return r;
3411
3412 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3413 if (r)
3414 goto out;
3415
3416 if (vram_lost)
3417 amdgpu_device_fill_reset_magic(tmp_adev);
3418
3419 /* Update PSP FW topology after reset */
3420 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3421 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3422 }
3423 }
3424
3425
3426out:
3427 if (!r) {
3428 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3429 r = amdgpu_ib_ring_tests(tmp_adev);
3430 if (r) {
3431 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3432 r = amdgpu_device_ip_suspend(tmp_adev);
3433 need_full_reset = true;
3434 r = -EAGAIN;
3435 goto end;
3436 }
3437 }
3438
3439 if (!r)
3440 r = amdgpu_device_recover_vram(tmp_adev);
3441 else
3442 tmp_adev->asic_reset_res = r;
3443 }
3444
3445end:
3446 *need_full_reset_arg = need_full_reset;
3447 return r;
3448}
3449
3450static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
3451 struct amdgpu_job *job)
3452{
3453 int i;
5740682e 3454
71182665
ML
3455 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3456 struct amdgpu_ring *ring = adev->rings[i];
53cdccd5 3457
71182665
ML
3458 if (!ring || !ring->sched.thread)
3459 continue;
5740682e 3460
71182665
ML
3461 /* only need recovery sched of the given job's ring
3462 * or all rings (in the case @job is NULL)
3463 * after above amdgpu_reset accomplished
3464 */
26bc5340 3465 if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
1b1f42d8 3466 drm_sched_job_recovery(&ring->sched);
5740682e 3467
71182665 3468 kthread_unpark(ring->sched.thread);
d38ceaf9
AD
3469 }
3470
bf830604 3471 if (!amdgpu_device_has_dc_support(adev)) {
4562236b 3472 drm_helper_resume_force_mode(adev->ddev);
5740682e 3473 }
d38ceaf9 3474
26bc5340
AG
3475 adev->asic_reset_res = 0;
3476}
5740682e 3477
26bc5340
AG
3478static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
3479{
3480 mutex_lock(&adev->lock_reset);
3481 atomic_inc(&adev->gpu_reset_counter);
3482 adev->in_gpu_reset = 1;
7b184b00 3483 /* Block kfd: SRIOV would do it separately */
3484 if (!amdgpu_sriov_vf(adev))
3485 amdgpu_amdkfd_pre_reset(adev);
26bc5340 3486}
d38ceaf9 3487
26bc5340
AG
3488static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3489{
7b184b00 3490 /*unlock kfd: SRIOV would do it separately */
3491 if (!amdgpu_sriov_vf(adev))
3492 amdgpu_amdkfd_post_reset(adev);
89041940 3493 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3494 adev->in_gpu_reset = 0;
3495 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3496}
3497
3498
3499/**
3500 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3501 *
3502 * @adev: amdgpu device pointer
3503 * @job: which job trigger hang
3504 *
3505 * Attempt to reset the GPU if it has hung (all asics).
3506 * Attempt to do soft-reset or full-reset and reinitialize Asic
3507 * Returns 0 for success or an error on failure.
3508 */
3509
3510int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3511 struct amdgpu_job *job)
3512{
3513 int r;
3514 struct amdgpu_hive_info *hive = NULL;
3515 bool need_full_reset = false;
3516 struct amdgpu_device *tmp_adev = NULL;
3517 struct list_head device_list, *device_list_handle = NULL;
3518
3519 INIT_LIST_HEAD(&device_list);
3520
3521 dev_info(adev->dev, "GPU reset begin!\n");
3522
3523 /*
3524 * In case of XGMI hive disallow concurrent resets to be triggered
3525 * by different nodes. No point also since the one node already executing
3526 * reset will also reset all the other nodes in the hive.
3527 */
22d6575b 3528 hive = amdgpu_get_xgmi_hive(adev, 0);
26bc5340 3529 if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
22d6575b 3530 !mutex_trylock(&hive->reset_lock))
26bc5340
AG
3531 return 0;
3532
3533 /* Start with adev pre asic reset first for soft reset check.*/
3534 amdgpu_device_lock_adev(adev);
3535 r = amdgpu_device_pre_asic_reset(adev,
3536 job,
3537 &need_full_reset);
3538 if (r) {
3539 /*TODO Should we stop ?*/
3540 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3541 r, adev->ddev->unique);
3542 adev->asic_reset_res = r;
3543 }
3544
3545 /* Build list of devices to reset */
3546 if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
3547 if (!hive) {
3548 amdgpu_device_unlock_adev(adev);
3549 return -ENODEV;
3550 }
3551
3552 /*
3553 * In case we are in XGMI hive mode device reset is done for all the
3554 * nodes in the hive to retrain all XGMI links and hence the reset
3555 * sequence is executed in loop on all nodes.
3556 */
3557 device_list_handle = &hive->device_list;
3558 } else {
3559 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3560 device_list_handle = &device_list;
3561 }
3562
3563retry: /* Rest of adevs pre asic reset from XGMI hive. */
3564 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3565
3566 if (tmp_adev == adev)
3567 continue;
3568
26bc5340
AG
3569 amdgpu_device_lock_adev(tmp_adev);
3570 r = amdgpu_device_pre_asic_reset(tmp_adev,
3571 NULL,
3572 &need_full_reset);
3573 /*TODO Should we stop ?*/
3574 if (r) {
3575 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3576 r, tmp_adev->ddev->unique);
3577 tmp_adev->asic_reset_res = r;
3578 }
3579 }
3580
3581 /* Actual ASIC resets if needed.*/
3582 /* TODO Implement XGMI hive reset logic for SRIOV */
3583 if (amdgpu_sriov_vf(adev)) {
3584 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3585 if (r)
3586 adev->asic_reset_res = r;
3587 } else {
3588 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3589 if (r && r == -EAGAIN)
3590 goto retry;
3591 }
3592
3593 /* Post ASIC reset for all devs .*/
3594 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3595 amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
3596
3597 if (r) {
3598 /* bad news, how to tell it to userspace ? */
3599 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3600 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3601 } else {
3602 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3603 }
3604
3605 amdgpu_device_unlock_adev(tmp_adev);
3606 }
3607
3608 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
22d6575b 3609 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3610
3611 if (r)
3612 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3613 return r;
3614}
3615
e3ecdffa
AD
3616/**
3617 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3618 *
3619 * @adev: amdgpu_device pointer
3620 *
3621 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3622 * and lanes) of the slot the device is in. Handles APUs and
3623 * virtualized environments where PCIE config space may not be available.
3624 */
5494d864 3625static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3626{
5d9a6330
AD
3627 struct pci_dev *pdev;
3628 enum pci_bus_speed speed_cap;
3629 enum pcie_link_width link_width;
d0dd7f0c 3630
cd474ba0
AD
3631 if (amdgpu_pcie_gen_cap)
3632 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3633
cd474ba0
AD
3634 if (amdgpu_pcie_lane_cap)
3635 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3636
cd474ba0
AD
3637 /* covers APUs as well */
3638 if (pci_is_root_bus(adev->pdev->bus)) {
3639 if (adev->pm.pcie_gen_mask == 0)
3640 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3641 if (adev->pm.pcie_mlw_mask == 0)
3642 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3643 return;
cd474ba0 3644 }
d0dd7f0c 3645
cd474ba0 3646 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3647 /* asic caps */
3648 pdev = adev->pdev;
3649 speed_cap = pcie_get_speed_cap(pdev);
3650 if (speed_cap == PCI_SPEED_UNKNOWN) {
3651 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3652 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3653 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3654 } else {
5d9a6330
AD
3655 if (speed_cap == PCIE_SPEED_16_0GT)
3656 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3657 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3658 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3659 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3660 else if (speed_cap == PCIE_SPEED_8_0GT)
3661 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3662 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3663 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3664 else if (speed_cap == PCIE_SPEED_5_0GT)
3665 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3666 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3667 else
3668 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3669 }
3670 /* platform caps */
3671 pdev = adev->ddev->pdev->bus->self;
3672 speed_cap = pcie_get_speed_cap(pdev);
3673 if (speed_cap == PCI_SPEED_UNKNOWN) {
3674 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3675 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3676 } else {
3677 if (speed_cap == PCIE_SPEED_16_0GT)
3678 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3679 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3680 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3681 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3682 else if (speed_cap == PCIE_SPEED_8_0GT)
3683 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3684 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3685 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3686 else if (speed_cap == PCIE_SPEED_5_0GT)
3687 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3688 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3689 else
3690 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3691
cd474ba0
AD
3692 }
3693 }
3694 if (adev->pm.pcie_mlw_mask == 0) {
5d9a6330
AD
3695 pdev = adev->ddev->pdev->bus->self;
3696 link_width = pcie_get_width_cap(pdev);
3697 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3698 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3699 } else {
3700 switch (link_width) {
3701 case PCIE_LNK_X32:
cd474ba0
AD
3702 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3703 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3704 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3705 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3706 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3707 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3708 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3709 break;
5d9a6330 3710 case PCIE_LNK_X16:
cd474ba0
AD
3711 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3712 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3713 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3714 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3715 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3716 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3717 break;
5d9a6330 3718 case PCIE_LNK_X12:
cd474ba0
AD
3719 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3720 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3721 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3722 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3724 break;
5d9a6330 3725 case PCIE_LNK_X8:
cd474ba0
AD
3726 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3727 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3728 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3729 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3730 break;
5d9a6330 3731 case PCIE_LNK_X4:
cd474ba0
AD
3732 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3733 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3734 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3735 break;
5d9a6330 3736 case PCIE_LNK_X2:
cd474ba0
AD
3737 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3738 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3739 break;
5d9a6330 3740 case PCIE_LNK_X1:
cd474ba0
AD
3741 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3742 break;
3743 default:
3744 break;
3745 }
d0dd7f0c
AD
3746 }
3747 }
3748}
d38ceaf9 3749