drmi/rcar-du: prepare for drmP.h removal from drm_modeset_helper.h
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9
AD
32#include <drm/drmP.h>
33#include <drm/drm_crtc_helper.h>
4562236b 34#include <drm/drm_atomic_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
5183411b
AG
62#include "amdgpu_xgmi.h"
63
e2a75f88 64MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 65MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 66MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 67MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 68MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 69
2dc80b00
S
70#define AMDGPU_RESUME_MS 2000
71
d38ceaf9 72static const char *amdgpu_asic_name[] = {
da69c161
KW
73 "TAHITI",
74 "PITCAIRN",
75 "VERDE",
76 "OLAND",
77 "HAINAN",
d38ceaf9
AD
78 "BONAIRE",
79 "KAVERI",
80 "KABINI",
81 "HAWAII",
82 "MULLINS",
83 "TOPAZ",
84 "TONGA",
48299f95 85 "FIJI",
d38ceaf9 86 "CARRIZO",
139f4917 87 "STONEY",
2cc0c0b5
FC
88 "POLARIS10",
89 "POLARIS11",
c4642a47 90 "POLARIS12",
48ff108d 91 "VEGAM",
d4196f01 92 "VEGA10",
8fab806a 93 "VEGA12",
956fcddc 94 "VEGA20",
2ca8a5d2 95 "RAVEN",
d38ceaf9
AD
96 "LAST",
97};
98
5494d864
AD
99static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
100
e3ecdffa
AD
101/**
102 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
103 *
104 * @dev: drm_device pointer
105 *
106 * Returns true if the device is a dGPU with HG/PX power control,
107 * otherwise return false.
108 */
d38ceaf9
AD
109bool amdgpu_device_is_px(struct drm_device *dev)
110{
111 struct amdgpu_device *adev = dev->dev_private;
112
2f7d10b3 113 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
114 return true;
115 return false;
116}
117
118/*
119 * MMIO register access helper functions.
120 */
e3ecdffa
AD
121/**
122 * amdgpu_mm_rreg - read a memory mapped IO register
123 *
124 * @adev: amdgpu_device pointer
125 * @reg: dword aligned register offset
126 * @acc_flags: access flags which require special behavior
127 *
128 * Returns the 32 bit value from the offset specified.
129 */
d38ceaf9 130uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 131 uint32_t acc_flags)
d38ceaf9 132{
f4b373f4
TSD
133 uint32_t ret;
134
43ca8efa 135 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 136 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 137
15d72fd7 138 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 139 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
140 else {
141 unsigned long flags;
d38ceaf9
AD
142
143 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
144 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
145 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
146 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 147 }
f4b373f4
TSD
148 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
149 return ret;
d38ceaf9
AD
150}
151
421a2a30
ML
152/*
153 * MMIO register read with bytes helper functions
154 * @offset:bytes offset from MMIO start
155 *
156*/
157
e3ecdffa
AD
158/**
159 * amdgpu_mm_rreg8 - read a memory mapped IO register
160 *
161 * @adev: amdgpu_device pointer
162 * @offset: byte aligned register offset
163 *
164 * Returns the 8 bit value from the offset specified.
165 */
421a2a30
ML
166uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
167 if (offset < adev->rmmio_size)
168 return (readb(adev->rmmio + offset));
169 BUG();
170}
171
172/*
173 * MMIO register write with bytes helper functions
174 * @offset:bytes offset from MMIO start
175 * @value: the value want to be written to the register
176 *
177*/
e3ecdffa
AD
178/**
179 * amdgpu_mm_wreg8 - read a memory mapped IO register
180 *
181 * @adev: amdgpu_device pointer
182 * @offset: byte aligned register offset
183 * @value: 8 bit value to write
184 *
185 * Writes the value specified to the offset specified.
186 */
421a2a30
ML
187void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
188 if (offset < adev->rmmio_size)
189 writeb(value, adev->rmmio + offset);
190 else
191 BUG();
192}
193
e3ecdffa
AD
194/**
195 * amdgpu_mm_wreg - write to a memory mapped IO register
196 *
197 * @adev: amdgpu_device pointer
198 * @reg: dword aligned register offset
199 * @v: 32 bit value to write to the register
200 * @acc_flags: access flags which require special behavior
201 *
202 * Writes the value specified to the offset specified.
203 */
d38ceaf9 204void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 205 uint32_t acc_flags)
d38ceaf9 206{
f4b373f4 207 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 208
47ed4e1c
KW
209 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
210 adev->last_mm_index = v;
211 }
212
43ca8efa 213 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 214 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 215
15d72fd7 216 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
217 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
218 else {
219 unsigned long flags;
220
221 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
222 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
223 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
224 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
225 }
47ed4e1c
KW
226
227 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
228 udelay(500);
229 }
d38ceaf9
AD
230}
231
e3ecdffa
AD
232/**
233 * amdgpu_io_rreg - read an IO register
234 *
235 * @adev: amdgpu_device pointer
236 * @reg: dword aligned register offset
237 *
238 * Returns the 32 bit value from the offset specified.
239 */
d38ceaf9
AD
240u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
241{
242 if ((reg * 4) < adev->rio_mem_size)
243 return ioread32(adev->rio_mem + (reg * 4));
244 else {
245 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
246 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
247 }
248}
249
e3ecdffa
AD
250/**
251 * amdgpu_io_wreg - write to an IO register
252 *
253 * @adev: amdgpu_device pointer
254 * @reg: dword aligned register offset
255 * @v: 32 bit value to write to the register
256 *
257 * Writes the value specified to the offset specified.
258 */
d38ceaf9
AD
259void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
260{
47ed4e1c
KW
261 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
262 adev->last_mm_index = v;
263 }
d38ceaf9
AD
264
265 if ((reg * 4) < adev->rio_mem_size)
266 iowrite32(v, adev->rio_mem + (reg * 4));
267 else {
268 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
269 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
270 }
47ed4e1c
KW
271
272 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
273 udelay(500);
274 }
d38ceaf9
AD
275}
276
277/**
278 * amdgpu_mm_rdoorbell - read a doorbell dword
279 *
280 * @adev: amdgpu_device pointer
281 * @index: doorbell index
282 *
283 * Returns the value in the doorbell aperture at the
284 * requested doorbell index (CIK).
285 */
286u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
287{
288 if (index < adev->doorbell.num_doorbells) {
289 return readl(adev->doorbell.ptr + index);
290 } else {
291 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
292 return 0;
293 }
294}
295
296/**
297 * amdgpu_mm_wdoorbell - write a doorbell dword
298 *
299 * @adev: amdgpu_device pointer
300 * @index: doorbell index
301 * @v: value to write
302 *
303 * Writes @v to the doorbell aperture at the
304 * requested doorbell index (CIK).
305 */
306void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
307{
308 if (index < adev->doorbell.num_doorbells) {
309 writel(v, adev->doorbell.ptr + index);
310 } else {
311 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
312 }
313}
314
832be404
KW
315/**
316 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
317 *
318 * @adev: amdgpu_device pointer
319 * @index: doorbell index
320 *
321 * Returns the value in the doorbell aperture at the
322 * requested doorbell index (VEGA10+).
323 */
324u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
325{
326 if (index < adev->doorbell.num_doorbells) {
327 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
328 } else {
329 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
330 return 0;
331 }
332}
333
334/**
335 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
336 *
337 * @adev: amdgpu_device pointer
338 * @index: doorbell index
339 * @v: value to write
340 *
341 * Writes @v to the doorbell aperture at the
342 * requested doorbell index (VEGA10+).
343 */
344void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
345{
346 if (index < adev->doorbell.num_doorbells) {
347 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
348 } else {
349 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
350 }
351}
352
d38ceaf9
AD
353/**
354 * amdgpu_invalid_rreg - dummy reg read function
355 *
356 * @adev: amdgpu device pointer
357 * @reg: offset of register
358 *
359 * Dummy register read function. Used for register blocks
360 * that certain asics don't have (all asics).
361 * Returns the value in the register.
362 */
363static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
364{
365 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
366 BUG();
367 return 0;
368}
369
370/**
371 * amdgpu_invalid_wreg - dummy reg write function
372 *
373 * @adev: amdgpu device pointer
374 * @reg: offset of register
375 * @v: value to write to the register
376 *
377 * Dummy register read function. Used for register blocks
378 * that certain asics don't have (all asics).
379 */
380static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
381{
382 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
383 reg, v);
384 BUG();
385}
386
387/**
388 * amdgpu_block_invalid_rreg - dummy reg read function
389 *
390 * @adev: amdgpu device pointer
391 * @block: offset of instance
392 * @reg: offset of register
393 *
394 * Dummy register read function. Used for register blocks
395 * that certain asics don't have (all asics).
396 * Returns the value in the register.
397 */
398static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
399 uint32_t block, uint32_t reg)
400{
401 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
402 reg, block);
403 BUG();
404 return 0;
405}
406
407/**
408 * amdgpu_block_invalid_wreg - dummy reg write function
409 *
410 * @adev: amdgpu device pointer
411 * @block: offset of instance
412 * @reg: offset of register
413 * @v: value to write to the register
414 *
415 * Dummy register read function. Used for register blocks
416 * that certain asics don't have (all asics).
417 */
418static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
419 uint32_t block,
420 uint32_t reg, uint32_t v)
421{
422 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
423 reg, block, v);
424 BUG();
425}
426
e3ecdffa
AD
427/**
428 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
429 *
430 * @adev: amdgpu device pointer
431 *
432 * Allocates a scratch page of VRAM for use by various things in the
433 * driver.
434 */
06ec9070 435static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 436{
a4a02777
CK
437 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
438 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
439 &adev->vram_scratch.robj,
440 &adev->vram_scratch.gpu_addr,
441 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
442}
443
e3ecdffa
AD
444/**
445 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
446 *
447 * @adev: amdgpu device pointer
448 *
449 * Frees the VRAM scratch page.
450 */
06ec9070 451static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 452{
078af1a3 453 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
454}
455
456/**
9c3f2b54 457 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
458 *
459 * @adev: amdgpu_device pointer
460 * @registers: pointer to the register array
461 * @array_size: size of the register array
462 *
463 * Programs an array or registers with and and or masks.
464 * This is a helper for setting golden registers.
465 */
9c3f2b54
AD
466void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
467 const u32 *registers,
468 const u32 array_size)
d38ceaf9
AD
469{
470 u32 tmp, reg, and_mask, or_mask;
471 int i;
472
473 if (array_size % 3)
474 return;
475
476 for (i = 0; i < array_size; i +=3) {
477 reg = registers[i + 0];
478 and_mask = registers[i + 1];
479 or_mask = registers[i + 2];
480
481 if (and_mask == 0xffffffff) {
482 tmp = or_mask;
483 } else {
484 tmp = RREG32(reg);
485 tmp &= ~and_mask;
486 tmp |= or_mask;
487 }
488 WREG32(reg, tmp);
489 }
490}
491
e3ecdffa
AD
492/**
493 * amdgpu_device_pci_config_reset - reset the GPU
494 *
495 * @adev: amdgpu_device pointer
496 *
497 * Resets the GPU using the pci config reset sequence.
498 * Only applicable to asics prior to vega10.
499 */
8111c387 500void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
501{
502 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
503}
504
505/*
506 * GPU doorbell aperture helpers function.
507 */
508/**
06ec9070 509 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
510 *
511 * @adev: amdgpu_device pointer
512 *
513 * Init doorbell driver information (CIK)
514 * Returns 0 on success, error on failure.
515 */
06ec9070 516static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 517{
6585661d 518
705e519e
CK
519 /* No doorbell on SI hardware generation */
520 if (adev->asic_type < CHIP_BONAIRE) {
521 adev->doorbell.base = 0;
522 adev->doorbell.size = 0;
523 adev->doorbell.num_doorbells = 0;
524 adev->doorbell.ptr = NULL;
525 return 0;
526 }
527
d6895ad3
CK
528 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
529 return -EINVAL;
530
22357775
AD
531 amdgpu_asic_init_doorbell_index(adev);
532
d38ceaf9
AD
533 /* doorbell bar mapping */
534 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
535 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
536
edf600da 537 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 538 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
539 if (adev->doorbell.num_doorbells == 0)
540 return -EINVAL;
541
ec3db8a6 542 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
543 * paging queue doorbell use the second page. The
544 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
545 * doorbells are in the first page. So with paging queue enabled,
546 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
547 */
548 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 549 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 550
8972e5d2
CK
551 adev->doorbell.ptr = ioremap(adev->doorbell.base,
552 adev->doorbell.num_doorbells *
553 sizeof(u32));
554 if (adev->doorbell.ptr == NULL)
d38ceaf9 555 return -ENOMEM;
d38ceaf9
AD
556
557 return 0;
558}
559
560/**
06ec9070 561 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
562 *
563 * @adev: amdgpu_device pointer
564 *
565 * Tear down doorbell driver information (CIK)
566 */
06ec9070 567static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
568{
569 iounmap(adev->doorbell.ptr);
570 adev->doorbell.ptr = NULL;
571}
572
22cb0164 573
d38ceaf9
AD
574
575/*
06ec9070 576 * amdgpu_device_wb_*()
455a7bc2 577 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 578 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
579 */
580
581/**
06ec9070 582 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
583 *
584 * @adev: amdgpu_device pointer
585 *
586 * Disables Writeback and frees the Writeback memory (all asics).
587 * Used at driver shutdown.
588 */
06ec9070 589static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
590{
591 if (adev->wb.wb_obj) {
a76ed485
AD
592 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
593 &adev->wb.gpu_addr,
594 (void **)&adev->wb.wb);
d38ceaf9
AD
595 adev->wb.wb_obj = NULL;
596 }
597}
598
599/**
06ec9070 600 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
601 *
602 * @adev: amdgpu_device pointer
603 *
455a7bc2 604 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
605 * Used at driver startup.
606 * Returns 0 on success or an -error on failure.
607 */
06ec9070 608static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
609{
610 int r;
611
612 if (adev->wb.wb_obj == NULL) {
97407b63
AD
613 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
614 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
615 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
616 &adev->wb.wb_obj, &adev->wb.gpu_addr,
617 (void **)&adev->wb.wb);
d38ceaf9
AD
618 if (r) {
619 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
620 return r;
621 }
d38ceaf9
AD
622
623 adev->wb.num_wb = AMDGPU_MAX_WB;
624 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
625
626 /* clear wb memory */
73469585 627 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
628 }
629
630 return 0;
631}
632
633/**
131b4b36 634 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
635 *
636 * @adev: amdgpu_device pointer
637 * @wb: wb index
638 *
639 * Allocate a wb slot for use by the driver (all asics).
640 * Returns 0 on success or -EINVAL on failure.
641 */
131b4b36 642int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
643{
644 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 645
97407b63 646 if (offset < adev->wb.num_wb) {
7014285a 647 __set_bit(offset, adev->wb.used);
63ae07ca 648 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
649 return 0;
650 } else {
651 return -EINVAL;
652 }
653}
654
d38ceaf9 655/**
131b4b36 656 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
657 *
658 * @adev: amdgpu_device pointer
659 * @wb: wb index
660 *
661 * Free a wb slot allocated for use by the driver (all asics)
662 */
131b4b36 663void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 664{
73469585 665 wb >>= 3;
d38ceaf9 666 if (wb < adev->wb.num_wb)
73469585 667 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
668}
669
d6895ad3
CK
670/**
671 * amdgpu_device_resize_fb_bar - try to resize FB BAR
672 *
673 * @adev: amdgpu_device pointer
674 *
675 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
676 * to fail, but if any of the BARs is not accessible after the size we abort
677 * driver loading by returning -ENODEV.
678 */
679int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
680{
770d13b1 681 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 682 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
683 struct pci_bus *root;
684 struct resource *res;
685 unsigned i;
d6895ad3
CK
686 u16 cmd;
687 int r;
688
0c03b912 689 /* Bypass for VF */
690 if (amdgpu_sriov_vf(adev))
691 return 0;
692
31b8adab
CK
693 /* Check if the root BUS has 64bit memory resources */
694 root = adev->pdev->bus;
695 while (root->parent)
696 root = root->parent;
697
698 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 699 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
700 res->start > 0x100000000ull)
701 break;
702 }
703
704 /* Trying to resize is pointless without a root hub window above 4GB */
705 if (!res)
706 return 0;
707
d6895ad3
CK
708 /* Disable memory decoding while we change the BAR addresses and size */
709 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
710 pci_write_config_word(adev->pdev, PCI_COMMAND,
711 cmd & ~PCI_COMMAND_MEMORY);
712
713 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 714 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
715 if (adev->asic_type >= CHIP_BONAIRE)
716 pci_release_resource(adev->pdev, 2);
717
718 pci_release_resource(adev->pdev, 0);
719
720 r = pci_resize_resource(adev->pdev, 0, rbar_size);
721 if (r == -ENOSPC)
722 DRM_INFO("Not enough PCI address space for a large BAR.");
723 else if (r && r != -ENOTSUPP)
724 DRM_ERROR("Problem resizing BAR0 (%d).", r);
725
726 pci_assign_unassigned_bus_resources(adev->pdev->bus);
727
728 /* When the doorbell or fb BAR isn't available we have no chance of
729 * using the device.
730 */
06ec9070 731 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
732 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
733 return -ENODEV;
734
735 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
736
737 return 0;
738}
a05502e5 739
d38ceaf9
AD
740/*
741 * GPU helpers function.
742 */
743/**
39c640c0 744 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
745 *
746 * @adev: amdgpu_device pointer
747 *
c836fec5
JQ
748 * Check if the asic has been initialized (all asics) at driver startup
749 * or post is needed if hw reset is performed.
750 * Returns true if need or false if not.
d38ceaf9 751 */
39c640c0 752bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
753{
754 uint32_t reg;
755
bec86378
ML
756 if (amdgpu_sriov_vf(adev))
757 return false;
758
759 if (amdgpu_passthrough(adev)) {
1da2c326
ML
760 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
761 * some old smc fw still need driver do vPost otherwise gpu hang, while
762 * those smc fw version above 22.15 doesn't have this flaw, so we force
763 * vpost executed for smc version below 22.15
bec86378
ML
764 */
765 if (adev->asic_type == CHIP_FIJI) {
766 int err;
767 uint32_t fw_ver;
768 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
769 /* force vPost if error occured */
770 if (err)
771 return true;
772
773 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
774 if (fw_ver < 0x00160e00)
775 return true;
bec86378 776 }
bec86378 777 }
91fe77eb 778
779 if (adev->has_hw_reset) {
780 adev->has_hw_reset = false;
781 return true;
782 }
783
784 /* bios scratch used on CIK+ */
785 if (adev->asic_type >= CHIP_BONAIRE)
786 return amdgpu_atombios_scratch_need_asic_init(adev);
787
788 /* check MEM_SIZE for older asics */
789 reg = amdgpu_asic_get_config_memsize(adev);
790
791 if ((reg != 0) && (reg != 0xffffffff))
792 return false;
793
794 return true;
bec86378
ML
795}
796
d38ceaf9
AD
797/* if we get transitioned to only one device, take VGA back */
798/**
06ec9070 799 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
800 *
801 * @cookie: amdgpu_device pointer
802 * @state: enable/disable vga decode
803 *
804 * Enable/disable vga decode (all asics).
805 * Returns VGA resource flags.
806 */
06ec9070 807static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
808{
809 struct amdgpu_device *adev = cookie;
810 amdgpu_asic_set_vga_state(adev, state);
811 if (state)
812 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
813 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
814 else
815 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
816}
817
e3ecdffa
AD
818/**
819 * amdgpu_device_check_block_size - validate the vm block size
820 *
821 * @adev: amdgpu_device pointer
822 *
823 * Validates the vm block size specified via module parameter.
824 * The vm block size defines number of bits in page table versus page directory,
825 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
826 * page table and the remaining bits are in the page directory.
827 */
06ec9070 828static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
829{
830 /* defines number of bits in page table versus page directory,
831 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
832 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
833 if (amdgpu_vm_block_size == -1)
834 return;
a1adf8be 835
bab4fee7 836 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
837 dev_warn(adev->dev, "VM page table size (%d) too small\n",
838 amdgpu_vm_block_size);
97489129 839 amdgpu_vm_block_size = -1;
a1adf8be 840 }
a1adf8be
CZ
841}
842
e3ecdffa
AD
843/**
844 * amdgpu_device_check_vm_size - validate the vm size
845 *
846 * @adev: amdgpu_device pointer
847 *
848 * Validates the vm size in GB specified via module parameter.
849 * The VM size is the size of the GPU virtual memory space in GB.
850 */
06ec9070 851static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 852{
64dab074
AD
853 /* no need to check the default value */
854 if (amdgpu_vm_size == -1)
855 return;
856
83ca145d
ZJ
857 if (amdgpu_vm_size < 1) {
858 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
859 amdgpu_vm_size);
f3368128 860 amdgpu_vm_size = -1;
83ca145d 861 }
83ca145d
ZJ
862}
863
7951e376
RZ
864static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
865{
866 struct sysinfo si;
867 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
868 uint64_t total_memory;
869 uint64_t dram_size_seven_GB = 0x1B8000000;
870 uint64_t dram_size_three_GB = 0xB8000000;
871
872 if (amdgpu_smu_memory_pool_size == 0)
873 return;
874
875 if (!is_os_64) {
876 DRM_WARN("Not 64-bit OS, feature not supported\n");
877 goto def_value;
878 }
879 si_meminfo(&si);
880 total_memory = (uint64_t)si.totalram * si.mem_unit;
881
882 if ((amdgpu_smu_memory_pool_size == 1) ||
883 (amdgpu_smu_memory_pool_size == 2)) {
884 if (total_memory < dram_size_three_GB)
885 goto def_value1;
886 } else if ((amdgpu_smu_memory_pool_size == 4) ||
887 (amdgpu_smu_memory_pool_size == 8)) {
888 if (total_memory < dram_size_seven_GB)
889 goto def_value1;
890 } else {
891 DRM_WARN("Smu memory pool size not supported\n");
892 goto def_value;
893 }
894 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
895
896 return;
897
898def_value1:
899 DRM_WARN("No enough system memory\n");
900def_value:
901 adev->pm.smu_prv_buffer_size = 0;
902}
903
d38ceaf9 904/**
06ec9070 905 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
906 *
907 * @adev: amdgpu_device pointer
908 *
909 * Validates certain module parameters and updates
910 * the associated values used by the driver (all asics).
911 */
06ec9070 912static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 913{
5b011235
CZ
914 if (amdgpu_sched_jobs < 4) {
915 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
916 amdgpu_sched_jobs);
917 amdgpu_sched_jobs = 4;
76117507 918 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
919 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
920 amdgpu_sched_jobs);
921 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
922 }
d38ceaf9 923
83e74db6 924 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
925 /* gart size must be greater or equal to 32M */
926 dev_warn(adev->dev, "gart size (%d) too small\n",
927 amdgpu_gart_size);
83e74db6 928 amdgpu_gart_size = -1;
d38ceaf9
AD
929 }
930
36d38372 931 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 932 /* gtt size must be greater or equal to 32M */
36d38372
CK
933 dev_warn(adev->dev, "gtt size (%d) too small\n",
934 amdgpu_gtt_size);
935 amdgpu_gtt_size = -1;
d38ceaf9
AD
936 }
937
d07f14be
RH
938 /* valid range is between 4 and 9 inclusive */
939 if (amdgpu_vm_fragment_size != -1 &&
940 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
941 dev_warn(adev->dev, "valid range is between 4 and 9\n");
942 amdgpu_vm_fragment_size = -1;
943 }
944
7951e376
RZ
945 amdgpu_device_check_smu_prv_buffer_size(adev);
946
06ec9070 947 amdgpu_device_check_vm_size(adev);
d38ceaf9 948
06ec9070 949 amdgpu_device_check_block_size(adev);
6a7f76e7 950
526bae37 951 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 952 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
953 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
954 amdgpu_vram_page_split);
955 amdgpu_vram_page_split = 1024;
956 }
8854695a
AG
957
958 if (amdgpu_lockup_timeout == 0) {
959 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
960 amdgpu_lockup_timeout = 10000;
961 }
19aede77
AD
962
963 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
d38ceaf9
AD
964}
965
966/**
967 * amdgpu_switcheroo_set_state - set switcheroo state
968 *
969 * @pdev: pci dev pointer
1694467b 970 * @state: vga_switcheroo state
d38ceaf9
AD
971 *
972 * Callback for the switcheroo driver. Suspends or resumes the
973 * the asics before or after it is powered up using ACPI methods.
974 */
975static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
976{
977 struct drm_device *dev = pci_get_drvdata(pdev);
978
979 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
980 return;
981
982 if (state == VGA_SWITCHEROO_ON) {
7ca85295 983 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
984 /* don't suspend or resume card normally */
985 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
986
810ddc3a 987 amdgpu_device_resume(dev, true, true);
d38ceaf9 988
d38ceaf9
AD
989 dev->switch_power_state = DRM_SWITCH_POWER_ON;
990 drm_kms_helper_poll_enable(dev);
991 } else {
7ca85295 992 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
993 drm_kms_helper_poll_disable(dev);
994 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 995 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
996 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
997 }
998}
999
1000/**
1001 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1002 *
1003 * @pdev: pci dev pointer
1004 *
1005 * Callback for the switcheroo driver. Check of the switcheroo
1006 * state can be changed.
1007 * Returns true if the state can be changed, false if not.
1008 */
1009static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1010{
1011 struct drm_device *dev = pci_get_drvdata(pdev);
1012
1013 /*
1014 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1015 * locking inversion with the driver load path. And the access here is
1016 * completely racy anyway. So don't bother with locking for now.
1017 */
1018 return dev->open_count == 0;
1019}
1020
1021static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1022 .set_gpu_state = amdgpu_switcheroo_set_state,
1023 .reprobe = NULL,
1024 .can_switch = amdgpu_switcheroo_can_switch,
1025};
1026
e3ecdffa
AD
1027/**
1028 * amdgpu_device_ip_set_clockgating_state - set the CG state
1029 *
87e3f136 1030 * @dev: amdgpu_device pointer
e3ecdffa
AD
1031 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1032 * @state: clockgating state (gate or ungate)
1033 *
1034 * Sets the requested clockgating state for all instances of
1035 * the hardware IP specified.
1036 * Returns the error code from the last instance.
1037 */
43fa561f 1038int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1039 enum amd_ip_block_type block_type,
1040 enum amd_clockgating_state state)
d38ceaf9 1041{
43fa561f 1042 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1043 int i, r = 0;
1044
1045 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1046 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1047 continue;
c722865a
RZ
1048 if (adev->ip_blocks[i].version->type != block_type)
1049 continue;
1050 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1051 continue;
1052 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1053 (void *)adev, state);
1054 if (r)
1055 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1056 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1057 }
1058 return r;
1059}
1060
e3ecdffa
AD
1061/**
1062 * amdgpu_device_ip_set_powergating_state - set the PG state
1063 *
87e3f136 1064 * @dev: amdgpu_device pointer
e3ecdffa
AD
1065 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1066 * @state: powergating state (gate or ungate)
1067 *
1068 * Sets the requested powergating state for all instances of
1069 * the hardware IP specified.
1070 * Returns the error code from the last instance.
1071 */
43fa561f 1072int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1073 enum amd_ip_block_type block_type,
1074 enum amd_powergating_state state)
d38ceaf9 1075{
43fa561f 1076 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1077 int i, r = 0;
1078
1079 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1080 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1081 continue;
c722865a
RZ
1082 if (adev->ip_blocks[i].version->type != block_type)
1083 continue;
1084 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1085 continue;
1086 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1087 (void *)adev, state);
1088 if (r)
1089 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1090 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1091 }
1092 return r;
1093}
1094
e3ecdffa
AD
1095/**
1096 * amdgpu_device_ip_get_clockgating_state - get the CG state
1097 *
1098 * @adev: amdgpu_device pointer
1099 * @flags: clockgating feature flags
1100 *
1101 * Walks the list of IPs on the device and updates the clockgating
1102 * flags for each IP.
1103 * Updates @flags with the feature flags for each hardware IP where
1104 * clockgating is enabled.
1105 */
2990a1fc
AD
1106void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1107 u32 *flags)
6cb2d4e4
HR
1108{
1109 int i;
1110
1111 for (i = 0; i < adev->num_ip_blocks; i++) {
1112 if (!adev->ip_blocks[i].status.valid)
1113 continue;
1114 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1115 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1116 }
1117}
1118
e3ecdffa
AD
1119/**
1120 * amdgpu_device_ip_wait_for_idle - wait for idle
1121 *
1122 * @adev: amdgpu_device pointer
1123 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1124 *
1125 * Waits for the request hardware IP to be idle.
1126 * Returns 0 for success or a negative error code on failure.
1127 */
2990a1fc
AD
1128int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1129 enum amd_ip_block_type block_type)
5dbbb60b
AD
1130{
1131 int i, r;
1132
1133 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1134 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1135 continue;
a1255107
AD
1136 if (adev->ip_blocks[i].version->type == block_type) {
1137 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1138 if (r)
1139 return r;
1140 break;
1141 }
1142 }
1143 return 0;
1144
1145}
1146
e3ecdffa
AD
1147/**
1148 * amdgpu_device_ip_is_idle - is the hardware IP idle
1149 *
1150 * @adev: amdgpu_device pointer
1151 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1152 *
1153 * Check if the hardware IP is idle or not.
1154 * Returns true if it the IP is idle, false if not.
1155 */
2990a1fc
AD
1156bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1157 enum amd_ip_block_type block_type)
5dbbb60b
AD
1158{
1159 int i;
1160
1161 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1162 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1163 continue;
a1255107
AD
1164 if (adev->ip_blocks[i].version->type == block_type)
1165 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1166 }
1167 return true;
1168
1169}
1170
e3ecdffa
AD
1171/**
1172 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1173 *
1174 * @adev: amdgpu_device pointer
87e3f136 1175 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1176 *
1177 * Returns a pointer to the hardware IP block structure
1178 * if it exists for the asic, otherwise NULL.
1179 */
2990a1fc
AD
1180struct amdgpu_ip_block *
1181amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1182 enum amd_ip_block_type type)
d38ceaf9
AD
1183{
1184 int i;
1185
1186 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1187 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1188 return &adev->ip_blocks[i];
1189
1190 return NULL;
1191}
1192
1193/**
2990a1fc 1194 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1195 *
1196 * @adev: amdgpu_device pointer
5fc3aeeb 1197 * @type: enum amd_ip_block_type
d38ceaf9
AD
1198 * @major: major version
1199 * @minor: minor version
1200 *
1201 * return 0 if equal or greater
1202 * return 1 if smaller or the ip_block doesn't exist
1203 */
2990a1fc
AD
1204int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1205 enum amd_ip_block_type type,
1206 u32 major, u32 minor)
d38ceaf9 1207{
2990a1fc 1208 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1209
a1255107
AD
1210 if (ip_block && ((ip_block->version->major > major) ||
1211 ((ip_block->version->major == major) &&
1212 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1213 return 0;
1214
1215 return 1;
1216}
1217
a1255107 1218/**
2990a1fc 1219 * amdgpu_device_ip_block_add
a1255107
AD
1220 *
1221 * @adev: amdgpu_device pointer
1222 * @ip_block_version: pointer to the IP to add
1223 *
1224 * Adds the IP block driver information to the collection of IPs
1225 * on the asic.
1226 */
2990a1fc
AD
1227int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1228 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1229{
1230 if (!ip_block_version)
1231 return -EINVAL;
1232
e966a725 1233 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1234 ip_block_version->funcs->name);
1235
a1255107
AD
1236 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1237
1238 return 0;
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_enable_virtual_display - enable virtual display feature
1243 *
1244 * @adev: amdgpu_device pointer
1245 *
1246 * Enabled the virtual display feature if the user has enabled it via
1247 * the module parameter virtual_display. This feature provides a virtual
1248 * display hardware on headless boards or in virtualized environments.
1249 * This function parses and validates the configuration string specified by
1250 * the user and configues the virtual display configuration (number of
1251 * virtual connectors, crtcs, etc.) specified.
1252 */
483ef985 1253static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1254{
1255 adev->enable_virtual_display = false;
1256
1257 if (amdgpu_virtual_display) {
1258 struct drm_device *ddev = adev->ddev;
1259 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1260 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1261
1262 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1263 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1264 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1265 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1266 if (!strcmp("all", pciaddname)
1267 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1268 long num_crtc;
1269 int res = -1;
1270
9accf2fd 1271 adev->enable_virtual_display = true;
0f66356d
ED
1272
1273 if (pciaddname_tmp)
1274 res = kstrtol(pciaddname_tmp, 10,
1275 &num_crtc);
1276
1277 if (!res) {
1278 if (num_crtc < 1)
1279 num_crtc = 1;
1280 if (num_crtc > 6)
1281 num_crtc = 6;
1282 adev->mode_info.num_crtc = num_crtc;
1283 } else {
1284 adev->mode_info.num_crtc = 1;
1285 }
9accf2fd
ED
1286 break;
1287 }
1288 }
1289
0f66356d
ED
1290 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1291 amdgpu_virtual_display, pci_address_name,
1292 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1293
1294 kfree(pciaddstr);
1295 }
1296}
1297
e3ecdffa
AD
1298/**
1299 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1300 *
1301 * @adev: amdgpu_device pointer
1302 *
1303 * Parses the asic configuration parameters specified in the gpu info
1304 * firmware and makes them availale to the driver for use in configuring
1305 * the asic.
1306 * Returns 0 on success, -EINVAL on failure.
1307 */
e2a75f88
AD
1308static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1309{
e2a75f88
AD
1310 const char *chip_name;
1311 char fw_name[30];
1312 int err;
1313 const struct gpu_info_firmware_header_v1_0 *hdr;
1314
ab4fe3e1
HR
1315 adev->firmware.gpu_info_fw = NULL;
1316
e2a75f88
AD
1317 switch (adev->asic_type) {
1318 case CHIP_TOPAZ:
1319 case CHIP_TONGA:
1320 case CHIP_FIJI:
e2a75f88 1321 case CHIP_POLARIS10:
cc07f18d 1322 case CHIP_POLARIS11:
e2a75f88 1323 case CHIP_POLARIS12:
cc07f18d 1324 case CHIP_VEGAM:
e2a75f88
AD
1325 case CHIP_CARRIZO:
1326 case CHIP_STONEY:
1327#ifdef CONFIG_DRM_AMDGPU_SI
1328 case CHIP_VERDE:
1329 case CHIP_TAHITI:
1330 case CHIP_PITCAIRN:
1331 case CHIP_OLAND:
1332 case CHIP_HAINAN:
1333#endif
1334#ifdef CONFIG_DRM_AMDGPU_CIK
1335 case CHIP_BONAIRE:
1336 case CHIP_HAWAII:
1337 case CHIP_KAVERI:
1338 case CHIP_KABINI:
1339 case CHIP_MULLINS:
1340#endif
27c0bc71 1341 case CHIP_VEGA20:
e2a75f88
AD
1342 default:
1343 return 0;
1344 case CHIP_VEGA10:
1345 chip_name = "vega10";
1346 break;
3f76dced
AD
1347 case CHIP_VEGA12:
1348 chip_name = "vega12";
1349 break;
2d2e5e7e 1350 case CHIP_RAVEN:
54c4d17e
FX
1351 if (adev->rev_id >= 8)
1352 chip_name = "raven2";
741deade
AD
1353 else if (adev->pdev->device == 0x15d8)
1354 chip_name = "picasso";
54c4d17e
FX
1355 else
1356 chip_name = "raven";
2d2e5e7e 1357 break;
e2a75f88
AD
1358 }
1359
1360 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1361 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1362 if (err) {
1363 dev_err(adev->dev,
1364 "Failed to load gpu_info firmware \"%s\"\n",
1365 fw_name);
1366 goto out;
1367 }
ab4fe3e1 1368 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1369 if (err) {
1370 dev_err(adev->dev,
1371 "Failed to validate gpu_info firmware \"%s\"\n",
1372 fw_name);
1373 goto out;
1374 }
1375
ab4fe3e1 1376 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1377 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1378
1379 switch (hdr->version_major) {
1380 case 1:
1381 {
1382 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1383 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1384 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1385
b5ab16bf
AD
1386 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1387 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1388 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1389 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1390 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1391 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1392 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1393 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1394 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1395 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1396 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1397 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1398 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1399 adev->gfx.cu_info.max_waves_per_simd =
1400 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1401 adev->gfx.cu_info.max_scratch_slots_per_cu =
1402 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1403 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1404 break;
1405 }
1406 default:
1407 dev_err(adev->dev,
1408 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1409 err = -EINVAL;
1410 goto out;
1411 }
1412out:
e2a75f88
AD
1413 return err;
1414}
1415
e3ecdffa
AD
1416/**
1417 * amdgpu_device_ip_early_init - run early init for hardware IPs
1418 *
1419 * @adev: amdgpu_device pointer
1420 *
1421 * Early initialization pass for hardware IPs. The hardware IPs that make
1422 * up each asic are discovered each IP's early_init callback is run. This
1423 * is the first stage in initializing the asic.
1424 * Returns 0 on success, negative error code on failure.
1425 */
06ec9070 1426static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1427{
aaa36a97 1428 int i, r;
d38ceaf9 1429
483ef985 1430 amdgpu_device_enable_virtual_display(adev);
a6be7570 1431
d38ceaf9 1432 switch (adev->asic_type) {
aaa36a97
AD
1433 case CHIP_TOPAZ:
1434 case CHIP_TONGA:
48299f95 1435 case CHIP_FIJI:
2cc0c0b5 1436 case CHIP_POLARIS10:
32cc7e53 1437 case CHIP_POLARIS11:
c4642a47 1438 case CHIP_POLARIS12:
32cc7e53 1439 case CHIP_VEGAM:
aaa36a97 1440 case CHIP_CARRIZO:
39bb0c92
SL
1441 case CHIP_STONEY:
1442 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1443 adev->family = AMDGPU_FAMILY_CZ;
1444 else
1445 adev->family = AMDGPU_FAMILY_VI;
1446
1447 r = vi_set_ip_blocks(adev);
1448 if (r)
1449 return r;
1450 break;
33f34802
KW
1451#ifdef CONFIG_DRM_AMDGPU_SI
1452 case CHIP_VERDE:
1453 case CHIP_TAHITI:
1454 case CHIP_PITCAIRN:
1455 case CHIP_OLAND:
1456 case CHIP_HAINAN:
295d0daf 1457 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1458 r = si_set_ip_blocks(adev);
1459 if (r)
1460 return r;
1461 break;
1462#endif
a2e73f56
AD
1463#ifdef CONFIG_DRM_AMDGPU_CIK
1464 case CHIP_BONAIRE:
1465 case CHIP_HAWAII:
1466 case CHIP_KAVERI:
1467 case CHIP_KABINI:
1468 case CHIP_MULLINS:
1469 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1470 adev->family = AMDGPU_FAMILY_CI;
1471 else
1472 adev->family = AMDGPU_FAMILY_KV;
1473
1474 r = cik_set_ip_blocks(adev);
1475 if (r)
1476 return r;
1477 break;
1478#endif
e48a3cd9
AD
1479 case CHIP_VEGA10:
1480 case CHIP_VEGA12:
e4bd8170 1481 case CHIP_VEGA20:
e48a3cd9 1482 case CHIP_RAVEN:
741deade 1483 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1484 adev->family = AMDGPU_FAMILY_RV;
1485 else
1486 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1487
1488 r = soc15_set_ip_blocks(adev);
1489 if (r)
1490 return r;
1491 break;
d38ceaf9
AD
1492 default:
1493 /* FIXME: not supported yet */
1494 return -EINVAL;
1495 }
1496
e2a75f88
AD
1497 r = amdgpu_device_parse_gpu_info_fw(adev);
1498 if (r)
1499 return r;
1500
1884734a 1501 amdgpu_amdkfd_device_probe(adev);
1502
3149d9da
XY
1503 if (amdgpu_sriov_vf(adev)) {
1504 r = amdgpu_virt_request_full_gpu(adev, true);
1505 if (r)
5ffa61c1 1506 return -EAGAIN;
3149d9da
XY
1507 }
1508
00f54b97
HR
1509 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1510
d38ceaf9
AD
1511 for (i = 0; i < adev->num_ip_blocks; i++) {
1512 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1513 DRM_ERROR("disabled ip block: %d <%s>\n",
1514 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1515 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1516 } else {
a1255107
AD
1517 if (adev->ip_blocks[i].version->funcs->early_init) {
1518 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1519 if (r == -ENOENT) {
a1255107 1520 adev->ip_blocks[i].status.valid = false;
2c1a2784 1521 } else if (r) {
a1255107
AD
1522 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1523 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1524 return r;
2c1a2784 1525 } else {
a1255107 1526 adev->ip_blocks[i].status.valid = true;
2c1a2784 1527 }
974e6b64 1528 } else {
a1255107 1529 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1530 }
d38ceaf9
AD
1531 }
1532 }
1533
395d1fb9
NH
1534 adev->cg_flags &= amdgpu_cg_mask;
1535 adev->pg_flags &= amdgpu_pg_mask;
1536
d38ceaf9
AD
1537 return 0;
1538}
1539
0a4f2520
RZ
1540static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1541{
1542 int i, r;
1543
1544 for (i = 0; i < adev->num_ip_blocks; i++) {
1545 if (!adev->ip_blocks[i].status.sw)
1546 continue;
1547 if (adev->ip_blocks[i].status.hw)
1548 continue;
1549 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1550 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1551 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1552 if (r) {
1553 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1554 adev->ip_blocks[i].version->funcs->name, r);
1555 return r;
1556 }
1557 adev->ip_blocks[i].status.hw = true;
1558 }
1559 }
1560
1561 return 0;
1562}
1563
1564static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1565{
1566 int i, r;
1567
1568 for (i = 0; i < adev->num_ip_blocks; i++) {
1569 if (!adev->ip_blocks[i].status.sw)
1570 continue;
1571 if (adev->ip_blocks[i].status.hw)
1572 continue;
1573 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1574 if (r) {
1575 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1576 adev->ip_blocks[i].version->funcs->name, r);
1577 return r;
1578 }
1579 adev->ip_blocks[i].status.hw = true;
1580 }
1581
1582 return 0;
1583}
1584
7a3e0bb2
RZ
1585static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1586{
1587 int r = 0;
1588 int i;
1589
1590 if (adev->asic_type >= CHIP_VEGA10) {
1591 for (i = 0; i < adev->num_ip_blocks; i++) {
1592 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1593 if (adev->in_gpu_reset || adev->in_suspend) {
1594 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1595 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1596 r = adev->ip_blocks[i].version->funcs->resume(adev);
1597 if (r) {
1598 DRM_ERROR("resume of IP block <%s> failed %d\n",
1599 adev->ip_blocks[i].version->funcs->name, r);
1600 return r;
1601 }
1602 } else {
1603 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1604 if (r) {
1605 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1606 adev->ip_blocks[i].version->funcs->name, r);
1607 return r;
1608 }
1609 }
1610 adev->ip_blocks[i].status.hw = true;
1611 }
1612 }
1613 }
1614
91eec27e 1615 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1616 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1617 if (r) {
1618 pr_err("firmware loading failed\n");
1619 return r;
1620 }
1621 }
1622
1623 return 0;
1624}
1625
e3ecdffa
AD
1626/**
1627 * amdgpu_device_ip_init - run init for hardware IPs
1628 *
1629 * @adev: amdgpu_device pointer
1630 *
1631 * Main initialization pass for hardware IPs. The list of all the hardware
1632 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1633 * are run. sw_init initializes the software state associated with each IP
1634 * and hw_init initializes the hardware associated with each IP.
1635 * Returns 0 on success, negative error code on failure.
1636 */
06ec9070 1637static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1638{
1639 int i, r;
1640
1641 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1642 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1643 continue;
a1255107 1644 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1645 if (r) {
a1255107
AD
1646 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1647 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1648 return r;
2c1a2784 1649 }
a1255107 1650 adev->ip_blocks[i].status.sw = true;
bfca0289 1651
d38ceaf9 1652 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1653 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1654 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1655 if (r) {
1656 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
d38ceaf9 1657 return r;
2c1a2784 1658 }
a1255107 1659 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1660 if (r) {
1661 DRM_ERROR("hw_init %d failed %d\n", i, r);
d38ceaf9 1662 return r;
2c1a2784 1663 }
06ec9070 1664 r = amdgpu_device_wb_init(adev);
2c1a2784 1665 if (r) {
06ec9070 1666 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
d38ceaf9 1667 return r;
2c1a2784 1668 }
a1255107 1669 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1670
1671 /* right after GMC hw init, we create CSA */
1672 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1673 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1674 AMDGPU_GEM_DOMAIN_VRAM,
1675 AMDGPU_CSA_SIZE);
2493664f
ML
1676 if (r) {
1677 DRM_ERROR("allocate CSA failed %d\n", r);
1678 return r;
1679 }
1680 }
d38ceaf9
AD
1681 }
1682 }
1683
c8963ea4
RZ
1684 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1685 if (r)
1686 return r;
0a4f2520
RZ
1687
1688 r = amdgpu_device_ip_hw_init_phase1(adev);
1689 if (r)
1690 return r;
1691
7a3e0bb2
RZ
1692 r = amdgpu_device_fw_loading(adev);
1693 if (r)
1694 return r;
1695
0a4f2520
RZ
1696 r = amdgpu_device_ip_hw_init_phase2(adev);
1697 if (r)
1698 return r;
d38ceaf9 1699
3e2e2ab5
HZ
1700 if (adev->gmc.xgmi.num_physical_nodes > 1)
1701 amdgpu_xgmi_add_device(adev);
1884734a 1702 amdgpu_amdkfd_device_init(adev);
c6332b97 1703
1704 if (amdgpu_sriov_vf(adev))
1705 amdgpu_virt_release_full_gpu(adev, true);
1706
d38ceaf9
AD
1707 return 0;
1708}
1709
e3ecdffa
AD
1710/**
1711 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1712 *
1713 * @adev: amdgpu_device pointer
1714 *
1715 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1716 * this function before a GPU reset. If the value is retained after a
1717 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1718 */
06ec9070 1719static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1720{
1721 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1722}
1723
e3ecdffa
AD
1724/**
1725 * amdgpu_device_check_vram_lost - check if vram is valid
1726 *
1727 * @adev: amdgpu_device pointer
1728 *
1729 * Checks the reset magic value written to the gart pointer in VRAM.
1730 * The driver calls this after a GPU reset to see if the contents of
1731 * VRAM is lost or now.
1732 * returns true if vram is lost, false if not.
1733 */
06ec9070 1734static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1735{
1736 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1737 AMDGPU_RESET_MAGIC_NUM);
1738}
1739
e3ecdffa 1740/**
1112a46b 1741 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1742 *
1743 * @adev: amdgpu_device pointer
1744 *
e3ecdffa 1745 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1746 * set_clockgating_state callbacks are run.
1747 * Late initialization pass enabling clockgating for hardware IPs.
1748 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1749 * Returns 0 on success, negative error code on failure.
1750 */
fdd34271 1751
1112a46b
RZ
1752static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1753 enum amd_clockgating_state state)
d38ceaf9 1754{
1112a46b 1755 int i, j, r;
d38ceaf9 1756
4a2ba394
SL
1757 if (amdgpu_emu_mode == 1)
1758 return 0;
1759
1112a46b
RZ
1760 for (j = 0; j < adev->num_ip_blocks; j++) {
1761 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1762 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1763 continue;
4a446d55 1764 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1765 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1766 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1767 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1768 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1769 /* enable clockgating to save power */
a1255107 1770 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1771 state);
4a446d55
AD
1772 if (r) {
1773 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1774 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1775 return r;
1776 }
b0b00ff1 1777 }
d38ceaf9 1778 }
06b18f61 1779
c9f96fd5
RZ
1780 return 0;
1781}
1782
1112a46b 1783static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1784{
1112a46b 1785 int i, j, r;
06b18f61 1786
c9f96fd5
RZ
1787 if (amdgpu_emu_mode == 1)
1788 return 0;
1789
1112a46b
RZ
1790 for (j = 0; j < adev->num_ip_blocks; j++) {
1791 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1792 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1793 continue;
1794 /* skip CG for VCE/UVD, it's handled specially */
1795 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1796 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1797 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1798 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1799 /* enable powergating to save power */
1800 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1801 state);
c9f96fd5
RZ
1802 if (r) {
1803 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1804 adev->ip_blocks[i].version->funcs->name, r);
1805 return r;
1806 }
1807 }
1808 }
2dc80b00
S
1809 return 0;
1810}
1811
e3ecdffa
AD
1812/**
1813 * amdgpu_device_ip_late_init - run late init for hardware IPs
1814 *
1815 * @adev: amdgpu_device pointer
1816 *
1817 * Late initialization pass for hardware IPs. The list of all the hardware
1818 * IPs that make up the asic is walked and the late_init callbacks are run.
1819 * late_init covers any special initialization that an IP requires
1820 * after all of the have been initialized or something that needs to happen
1821 * late in the init process.
1822 * Returns 0 on success, negative error code on failure.
1823 */
06ec9070 1824static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1825{
1826 int i = 0, r;
1827
1828 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1829 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1830 continue;
1831 if (adev->ip_blocks[i].version->funcs->late_init) {
1832 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1833 if (r) {
1834 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1835 adev->ip_blocks[i].version->funcs->name, r);
1836 return r;
1837 }
2dc80b00 1838 }
73f847db 1839 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1840 }
1841
1112a46b
RZ
1842 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1843 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1844
2c773de2
S
1845 queue_delayed_work(system_wq, &adev->late_init_work,
1846 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1847
06ec9070 1848 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1849
1850 return 0;
1851}
1852
e3ecdffa
AD
1853/**
1854 * amdgpu_device_ip_fini - run fini for hardware IPs
1855 *
1856 * @adev: amdgpu_device pointer
1857 *
1858 * Main teardown pass for hardware IPs. The list of all the hardware
1859 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1860 * are run. hw_fini tears down the hardware associated with each IP
1861 * and sw_fini tears down any software state associated with each IP.
1862 * Returns 0 on success, negative error code on failure.
1863 */
06ec9070 1864static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1865{
1866 int i, r;
1867
a82400b5
AG
1868 if (adev->gmc.xgmi.num_physical_nodes > 1)
1869 amdgpu_xgmi_remove_device(adev);
1870
1884734a 1871 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1872
1873 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1874 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1875
3e96dbfd
AD
1876 /* need to disable SMC first */
1877 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1878 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1879 continue;
fdd34271 1880 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1881 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1882 /* XXX handle errors */
1883 if (r) {
1884 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1885 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1886 }
a1255107 1887 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1888 break;
1889 }
1890 }
1891
d38ceaf9 1892 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1893 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1894 continue;
8201a67a 1895
a1255107 1896 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1897 /* XXX handle errors */
2c1a2784 1898 if (r) {
a1255107
AD
1899 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1900 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1901 }
8201a67a 1902
a1255107 1903 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1904 }
1905
9950cda2 1906
d38ceaf9 1907 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1908 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1909 continue;
c12aba3a
ML
1910
1911 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1912 amdgpu_ucode_free_bo(adev);
1e256e27 1913 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1914 amdgpu_device_wb_fini(adev);
1915 amdgpu_device_vram_scratch_fini(adev);
1916 }
1917
a1255107 1918 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1919 /* XXX handle errors */
2c1a2784 1920 if (r) {
a1255107
AD
1921 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1922 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1923 }
a1255107
AD
1924 adev->ip_blocks[i].status.sw = false;
1925 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1926 }
1927
a6dcfd9c 1928 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1929 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1930 continue;
a1255107
AD
1931 if (adev->ip_blocks[i].version->funcs->late_fini)
1932 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1933 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1934 }
1935
030308fc 1936 if (amdgpu_sriov_vf(adev))
24136135
ML
1937 if (amdgpu_virt_release_full_gpu(adev, false))
1938 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 1939
d38ceaf9
AD
1940 return 0;
1941}
1942
b55c9e7a
EQ
1943static int amdgpu_device_enable_mgpu_fan_boost(void)
1944{
1945 struct amdgpu_gpu_instance *gpu_ins;
1946 struct amdgpu_device *adev;
1947 int i, ret = 0;
1948
1949 mutex_lock(&mgpu_info.mutex);
1950
1951 /*
1952 * MGPU fan boost feature should be enabled
1953 * only when there are two or more dGPUs in
1954 * the system
1955 */
1956 if (mgpu_info.num_dgpu < 2)
1957 goto out;
1958
1959 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1960 gpu_ins = &(mgpu_info.gpu_ins[i]);
1961 adev = gpu_ins->adev;
1962 if (!(adev->flags & AMD_IS_APU) &&
1963 !gpu_ins->mgpu_fan_enabled &&
1964 adev->powerplay.pp_funcs &&
1965 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1966 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1967 if (ret)
1968 break;
1969
1970 gpu_ins->mgpu_fan_enabled = 1;
1971 }
1972 }
1973
1974out:
1975 mutex_unlock(&mgpu_info.mutex);
1976
1977 return ret;
1978}
1979
e3ecdffa 1980/**
1112a46b 1981 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 1982 *
1112a46b 1983 * @work: work_struct.
e3ecdffa 1984 */
06ec9070 1985static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
1986{
1987 struct amdgpu_device *adev =
1988 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
1989 int r;
1990
1991 r = amdgpu_ib_ring_tests(adev);
1992 if (r)
1993 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
1994
1995 r = amdgpu_device_enable_mgpu_fan_boost();
1996 if (r)
1997 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2dc80b00
S
1998}
1999
1e317b99
RZ
2000static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2001{
2002 struct amdgpu_device *adev =
2003 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2004
2005 mutex_lock(&adev->gfx.gfx_off_mutex);
2006 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2007 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2008 adev->gfx.gfx_off_state = true;
2009 }
2010 mutex_unlock(&adev->gfx.gfx_off_mutex);
2011}
2012
e3ecdffa 2013/**
e7854a03 2014 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2015 *
2016 * @adev: amdgpu_device pointer
2017 *
2018 * Main suspend function for hardware IPs. The list of all the hardware
2019 * IPs that make up the asic is walked, clockgating is disabled and the
2020 * suspend callbacks are run. suspend puts the hardware and software state
2021 * in each IP into a state suitable for suspend.
2022 * Returns 0 on success, negative error code on failure.
2023 */
e7854a03
AD
2024static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2025{
2026 int i, r;
2027
05df1f01 2028 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2029 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2030
e7854a03
AD
2031 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2032 if (!adev->ip_blocks[i].status.valid)
2033 continue;
2034 /* displays are handled separately */
2035 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2036 /* XXX handle errors */
2037 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2038 /* XXX handle errors */
2039 if (r) {
2040 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2041 adev->ip_blocks[i].version->funcs->name, r);
2042 }
2043 }
2044 }
2045
e7854a03
AD
2046 return 0;
2047}
2048
2049/**
2050 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2051 *
2052 * @adev: amdgpu_device pointer
2053 *
2054 * Main suspend function for hardware IPs. The list of all the hardware
2055 * IPs that make up the asic is walked, clockgating is disabled and the
2056 * suspend callbacks are run. suspend puts the hardware and software state
2057 * in each IP into a state suitable for suspend.
2058 * Returns 0 on success, negative error code on failure.
2059 */
2060static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2061{
2062 int i, r;
2063
2064 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2065 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2066 continue;
e7854a03
AD
2067 /* displays are handled in phase1 */
2068 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2069 continue;
d38ceaf9 2070 /* XXX handle errors */
a1255107 2071 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2072 /* XXX handle errors */
2c1a2784 2073 if (r) {
a1255107
AD
2074 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2075 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2076 }
d38ceaf9
AD
2077 }
2078
2079 return 0;
2080}
2081
e7854a03
AD
2082/**
2083 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2084 *
2085 * @adev: amdgpu_device pointer
2086 *
2087 * Main suspend function for hardware IPs. The list of all the hardware
2088 * IPs that make up the asic is walked, clockgating is disabled and the
2089 * suspend callbacks are run. suspend puts the hardware and software state
2090 * in each IP into a state suitable for suspend.
2091 * Returns 0 on success, negative error code on failure.
2092 */
2093int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2094{
2095 int r;
2096
e7819644
YT
2097 if (amdgpu_sriov_vf(adev))
2098 amdgpu_virt_request_full_gpu(adev, false);
2099
e7854a03
AD
2100 r = amdgpu_device_ip_suspend_phase1(adev);
2101 if (r)
2102 return r;
2103 r = amdgpu_device_ip_suspend_phase2(adev);
2104
e7819644
YT
2105 if (amdgpu_sriov_vf(adev))
2106 amdgpu_virt_release_full_gpu(adev, false);
2107
e7854a03
AD
2108 return r;
2109}
2110
06ec9070 2111static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2112{
2113 int i, r;
2114
2cb681b6
ML
2115 static enum amd_ip_block_type ip_order[] = {
2116 AMD_IP_BLOCK_TYPE_GMC,
2117 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2118 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2119 AMD_IP_BLOCK_TYPE_IH,
2120 };
a90ad3c2 2121
2cb681b6
ML
2122 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2123 int j;
2124 struct amdgpu_ip_block *block;
a90ad3c2 2125
2cb681b6
ML
2126 for (j = 0; j < adev->num_ip_blocks; j++) {
2127 block = &adev->ip_blocks[j];
2128
2129 if (block->version->type != ip_order[i] ||
2130 !block->status.valid)
2131 continue;
2132
2133 r = block->version->funcs->hw_init(adev);
3f48c681 2134 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2135 if (r)
2136 return r;
a90ad3c2
ML
2137 }
2138 }
2139
2140 return 0;
2141}
2142
06ec9070 2143static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2144{
2145 int i, r;
2146
2cb681b6
ML
2147 static enum amd_ip_block_type ip_order[] = {
2148 AMD_IP_BLOCK_TYPE_SMC,
2149 AMD_IP_BLOCK_TYPE_DCE,
2150 AMD_IP_BLOCK_TYPE_GFX,
2151 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2152 AMD_IP_BLOCK_TYPE_UVD,
2153 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2154 };
a90ad3c2 2155
2cb681b6
ML
2156 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2157 int j;
2158 struct amdgpu_ip_block *block;
a90ad3c2 2159
2cb681b6
ML
2160 for (j = 0; j < adev->num_ip_blocks; j++) {
2161 block = &adev->ip_blocks[j];
2162
2163 if (block->version->type != ip_order[i] ||
2164 !block->status.valid)
2165 continue;
2166
2167 r = block->version->funcs->hw_init(adev);
3f48c681 2168 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2169 if (r)
2170 return r;
a90ad3c2
ML
2171 }
2172 }
2173
2174 return 0;
2175}
2176
e3ecdffa
AD
2177/**
2178 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2179 *
2180 * @adev: amdgpu_device pointer
2181 *
2182 * First resume function for hardware IPs. The list of all the hardware
2183 * IPs that make up the asic is walked and the resume callbacks are run for
2184 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2185 * after a suspend and updates the software state as necessary. This
2186 * function is also used for restoring the GPU after a GPU reset.
2187 * Returns 0 on success, negative error code on failure.
2188 */
06ec9070 2189static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2190{
2191 int i, r;
2192
a90ad3c2
ML
2193 for (i = 0; i < adev->num_ip_blocks; i++) {
2194 if (!adev->ip_blocks[i].status.valid)
2195 continue;
a90ad3c2 2196 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2197 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2198 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2199 r = adev->ip_blocks[i].version->funcs->resume(adev);
2200 if (r) {
2201 DRM_ERROR("resume of IP block <%s> failed %d\n",
2202 adev->ip_blocks[i].version->funcs->name, r);
2203 return r;
2204 }
a90ad3c2
ML
2205 }
2206 }
2207
2208 return 0;
2209}
2210
e3ecdffa
AD
2211/**
2212 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2213 *
2214 * @adev: amdgpu_device pointer
2215 *
2216 * First resume function for hardware IPs. The list of all the hardware
2217 * IPs that make up the asic is walked and the resume callbacks are run for
2218 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2219 * functional state after a suspend and updates the software state as
2220 * necessary. This function is also used for restoring the GPU after a GPU
2221 * reset.
2222 * Returns 0 on success, negative error code on failure.
2223 */
06ec9070 2224static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2225{
2226 int i, r;
2227
2228 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2229 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2230 continue;
fcf0649f 2231 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2232 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2233 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2234 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2235 continue;
a1255107 2236 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2237 if (r) {
a1255107
AD
2238 DRM_ERROR("resume of IP block <%s> failed %d\n",
2239 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2240 return r;
2c1a2784 2241 }
d38ceaf9
AD
2242 }
2243
2244 return 0;
2245}
2246
e3ecdffa
AD
2247/**
2248 * amdgpu_device_ip_resume - run resume for hardware IPs
2249 *
2250 * @adev: amdgpu_device pointer
2251 *
2252 * Main resume function for hardware IPs. The hardware IPs
2253 * are split into two resume functions because they are
2254 * are also used in in recovering from a GPU reset and some additional
2255 * steps need to be take between them. In this case (S3/S4) they are
2256 * run sequentially.
2257 * Returns 0 on success, negative error code on failure.
2258 */
06ec9070 2259static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2260{
2261 int r;
2262
06ec9070 2263 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2264 if (r)
2265 return r;
7a3e0bb2
RZ
2266
2267 r = amdgpu_device_fw_loading(adev);
2268 if (r)
2269 return r;
2270
06ec9070 2271 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2272
2273 return r;
2274}
2275
e3ecdffa
AD
2276/**
2277 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2278 *
2279 * @adev: amdgpu_device pointer
2280 *
2281 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2282 */
4e99a44e 2283static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2284{
6867e1b5
ML
2285 if (amdgpu_sriov_vf(adev)) {
2286 if (adev->is_atom_fw) {
2287 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2288 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2289 } else {
2290 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2291 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2292 }
2293
2294 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2295 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2296 }
048765ad
AR
2297}
2298
e3ecdffa
AD
2299/**
2300 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2301 *
2302 * @asic_type: AMD asic type
2303 *
2304 * Check if there is DC (new modesetting infrastructre) support for an asic.
2305 * returns true if DC has support, false if not.
2306 */
4562236b
HW
2307bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2308{
2309 switch (asic_type) {
2310#if defined(CONFIG_DRM_AMD_DC)
2311 case CHIP_BONAIRE:
0d6fbccb 2312 case CHIP_KAVERI:
367e6687
AD
2313 case CHIP_KABINI:
2314 case CHIP_MULLINS:
d9fda248
HW
2315 /*
2316 * We have systems in the wild with these ASICs that require
2317 * LVDS and VGA support which is not supported with DC.
2318 *
2319 * Fallback to the non-DC driver here by default so as not to
2320 * cause regressions.
2321 */
2322 return amdgpu_dc > 0;
2323 case CHIP_HAWAII:
4562236b
HW
2324 case CHIP_CARRIZO:
2325 case CHIP_STONEY:
4562236b 2326 case CHIP_POLARIS10:
675fd32b 2327 case CHIP_POLARIS11:
2c8ad2d5 2328 case CHIP_POLARIS12:
675fd32b 2329 case CHIP_VEGAM:
4562236b
HW
2330 case CHIP_TONGA:
2331 case CHIP_FIJI:
42f8ffa1 2332 case CHIP_VEGA10:
dca7b401 2333 case CHIP_VEGA12:
c6034aa2 2334 case CHIP_VEGA20:
dc37a9a0 2335#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2336 case CHIP_RAVEN:
42f8ffa1 2337#endif
fd187853 2338 return amdgpu_dc != 0;
4562236b
HW
2339#endif
2340 default:
2341 return false;
2342 }
2343}
2344
2345/**
2346 * amdgpu_device_has_dc_support - check if dc is supported
2347 *
2348 * @adev: amdgpu_device_pointer
2349 *
2350 * Returns true for supported, false for not supported
2351 */
2352bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2353{
2555039d
XY
2354 if (amdgpu_sriov_vf(adev))
2355 return false;
2356
4562236b
HW
2357 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2358}
2359
d4535e2c
AG
2360
2361static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2362{
2363 struct amdgpu_device *adev =
2364 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2365
2366 adev->asic_reset_res = amdgpu_asic_reset(adev);
2367 if (adev->asic_reset_res)
2368 DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
2369 adev->asic_reset_res, adev->ddev->unique);
2370}
2371
2372
d38ceaf9
AD
2373/**
2374 * amdgpu_device_init - initialize the driver
2375 *
2376 * @adev: amdgpu_device pointer
87e3f136 2377 * @ddev: drm dev pointer
d38ceaf9
AD
2378 * @pdev: pci dev pointer
2379 * @flags: driver flags
2380 *
2381 * Initializes the driver info and hw (all asics).
2382 * Returns 0 for success or an error on failure.
2383 * Called at driver startup.
2384 */
2385int amdgpu_device_init(struct amdgpu_device *adev,
2386 struct drm_device *ddev,
2387 struct pci_dev *pdev,
2388 uint32_t flags)
2389{
2390 int r, i;
2391 bool runtime = false;
95844d20 2392 u32 max_MBps;
d38ceaf9
AD
2393
2394 adev->shutdown = false;
2395 adev->dev = &pdev->dev;
2396 adev->ddev = ddev;
2397 adev->pdev = pdev;
2398 adev->flags = flags;
2f7d10b3 2399 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2400 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2401 if (amdgpu_emu_mode == 1)
2402 adev->usec_timeout *= 2;
770d13b1 2403 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2404 adev->accel_working = false;
2405 adev->num_rings = 0;
2406 adev->mman.buffer_funcs = NULL;
2407 adev->mman.buffer_funcs_ring = NULL;
2408 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2409 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2410 adev->gmc.gmc_funcs = NULL;
f54d1867 2411 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2412 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2413
2414 adev->smc_rreg = &amdgpu_invalid_rreg;
2415 adev->smc_wreg = &amdgpu_invalid_wreg;
2416 adev->pcie_rreg = &amdgpu_invalid_rreg;
2417 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2418 adev->pciep_rreg = &amdgpu_invalid_rreg;
2419 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2420 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2421 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2422 adev->didt_rreg = &amdgpu_invalid_rreg;
2423 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2424 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2425 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2426 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2427 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2428
3e39ab90
AD
2429 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2430 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2431 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2432
2433 /* mutex initialization are all done here so we
2434 * can recall function without having locking issues */
d38ceaf9 2435 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2436 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2437 mutex_init(&adev->pm.mutex);
2438 mutex_init(&adev->gfx.gpu_clock_mutex);
2439 mutex_init(&adev->srbm_mutex);
b8866c26 2440 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2441 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2442 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2443 mutex_init(&adev->mn_lock);
e23b74aa 2444 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2445 hash_init(adev->mn_hash);
13a752e3 2446 mutex_init(&adev->lock_reset);
d38ceaf9 2447
06ec9070 2448 amdgpu_device_check_arguments(adev);
d38ceaf9 2449
d38ceaf9
AD
2450 spin_lock_init(&adev->mmio_idx_lock);
2451 spin_lock_init(&adev->smc_idx_lock);
2452 spin_lock_init(&adev->pcie_idx_lock);
2453 spin_lock_init(&adev->uvd_ctx_idx_lock);
2454 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2455 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2456 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2457 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2458 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2459
0c4e7fa5
CZ
2460 INIT_LIST_HEAD(&adev->shadow_list);
2461 mutex_init(&adev->shadow_list_lock);
2462
795f2813
AR
2463 INIT_LIST_HEAD(&adev->ring_lru_list);
2464 spin_lock_init(&adev->ring_lru_list_lock);
2465
06ec9070
AD
2466 INIT_DELAYED_WORK(&adev->late_init_work,
2467 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2468 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2469 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2470
d4535e2c
AG
2471 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2472
d23ee13f 2473 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2474 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2475
0fa49558
AX
2476 /* Registers mapping */
2477 /* TODO: block userspace mapping of io register */
da69c161
KW
2478 if (adev->asic_type >= CHIP_BONAIRE) {
2479 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2480 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2481 } else {
2482 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2483 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2484 }
d38ceaf9 2485
d38ceaf9
AD
2486 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2487 if (adev->rmmio == NULL) {
2488 return -ENOMEM;
2489 }
2490 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2491 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2492
d38ceaf9
AD
2493 /* io port mapping */
2494 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2495 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2496 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2497 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2498 break;
2499 }
2500 }
2501 if (adev->rio_mem == NULL)
b64a18c5 2502 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2503
5494d864
AD
2504 amdgpu_device_get_pcie_info(adev);
2505
d38ceaf9 2506 /* early init functions */
06ec9070 2507 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2508 if (r)
2509 return r;
2510
6585661d
OZ
2511 /* doorbell bar mapping and doorbell index init*/
2512 amdgpu_device_doorbell_init(adev);
2513
d38ceaf9
AD
2514 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2515 /* this will fail for cards that aren't VGA class devices, just
2516 * ignore it */
06ec9070 2517 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2518
e9bef455 2519 if (amdgpu_device_is_px(ddev))
d38ceaf9 2520 runtime = true;
84c8b22e
LW
2521 if (!pci_is_thunderbolt_attached(adev->pdev))
2522 vga_switcheroo_register_client(adev->pdev,
2523 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2524 if (runtime)
2525 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2526
9475a943
SL
2527 if (amdgpu_emu_mode == 1) {
2528 /* post the asic on emulation mode */
2529 emu_soc_asic_init(adev);
bfca0289 2530 goto fence_driver_init;
9475a943 2531 }
bfca0289 2532
d38ceaf9 2533 /* Read BIOS */
83ba126a
AD
2534 if (!amdgpu_get_bios(adev)) {
2535 r = -EINVAL;
2536 goto failed;
2537 }
f7e9e9fe 2538
d38ceaf9 2539 r = amdgpu_atombios_init(adev);
2c1a2784
AD
2540 if (r) {
2541 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
e23b74aa 2542 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
83ba126a 2543 goto failed;
2c1a2784 2544 }
d38ceaf9 2545
4e99a44e
ML
2546 /* detect if we are with an SRIOV vbios */
2547 amdgpu_device_detect_sriov_bios(adev);
048765ad 2548
d38ceaf9 2549 /* Post card if necessary */
39c640c0 2550 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2551 if (!adev->bios) {
bec86378 2552 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2553 r = -EINVAL;
2554 goto failed;
d38ceaf9 2555 }
bec86378 2556 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2557 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2558 if (r) {
2559 dev_err(adev->dev, "gpu post error!\n");
2560 goto failed;
2561 }
d38ceaf9
AD
2562 }
2563
88b64e95
AD
2564 if (adev->is_atom_fw) {
2565 /* Initialize clocks */
2566 r = amdgpu_atomfirmware_get_clock_info(adev);
2567 if (r) {
2568 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2569 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2570 goto failed;
2571 }
2572 } else {
a5bde2f9
AD
2573 /* Initialize clocks */
2574 r = amdgpu_atombios_get_clock_info(adev);
2575 if (r) {
2576 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2577 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2578 goto failed;
a5bde2f9
AD
2579 }
2580 /* init i2c buses */
4562236b
HW
2581 if (!amdgpu_device_has_dc_support(adev))
2582 amdgpu_atombios_i2c_init(adev);
2c1a2784 2583 }
d38ceaf9 2584
bfca0289 2585fence_driver_init:
d38ceaf9
AD
2586 /* Fence driver */
2587 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2588 if (r) {
2589 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2590 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2591 goto failed;
2c1a2784 2592 }
d38ceaf9
AD
2593
2594 /* init the mode config */
2595 drm_mode_config_init(adev->ddev);
2596
06ec9070 2597 r = amdgpu_device_ip_init(adev);
d38ceaf9 2598 if (r) {
8840a387 2599 /* failed in exclusive mode due to timeout */
2600 if (amdgpu_sriov_vf(adev) &&
2601 !amdgpu_sriov_runtime(adev) &&
2602 amdgpu_virt_mmio_blocked(adev) &&
2603 !amdgpu_virt_wait_reset(adev)) {
2604 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2605 /* Don't send request since VF is inactive. */
2606 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2607 adev->virt.ops = NULL;
8840a387 2608 r = -EAGAIN;
2609 goto failed;
2610 }
06ec9070 2611 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2612 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 2613 goto failed;
d38ceaf9
AD
2614 }
2615
2616 adev->accel_working = true;
2617
e59c0205
AX
2618 amdgpu_vm_check_compute_bug(adev);
2619
95844d20
MO
2620 /* Initialize the buffer migration limit. */
2621 if (amdgpu_moverate >= 0)
2622 max_MBps = amdgpu_moverate;
2623 else
2624 max_MBps = 8; /* Allow 8 MB/s. */
2625 /* Get a log2 for easy divisions. */
2626 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2627
d38ceaf9
AD
2628 r = amdgpu_ib_pool_init(adev);
2629 if (r) {
2630 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
e23b74aa 2631 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
83ba126a 2632 goto failed;
d38ceaf9
AD
2633 }
2634
2dc8f81e
HC
2635 if (amdgpu_sriov_vf(adev))
2636 amdgpu_virt_init_data_exchange(adev);
2637
9bc92b9c
ML
2638 amdgpu_fbdev_init(adev);
2639
d2f52ac8
RZ
2640 r = amdgpu_pm_sysfs_init(adev);
2641 if (r)
2642 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2643
75758255 2644 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2645 if (r)
d38ceaf9 2646 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2647
2648 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2649 if (r)
d38ceaf9 2650 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2651
50ab2533 2652 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2653 if (r)
50ab2533 2654 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2655
763efb6c 2656 r = amdgpu_debugfs_init(adev);
db95e218 2657 if (r)
763efb6c 2658 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2659
d38ceaf9
AD
2660 if ((amdgpu_testing & 1)) {
2661 if (adev->accel_working)
2662 amdgpu_test_moves(adev);
2663 else
2664 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2665 }
d38ceaf9
AD
2666 if (amdgpu_benchmarking) {
2667 if (adev->accel_working)
2668 amdgpu_benchmark(adev, amdgpu_benchmarking);
2669 else
2670 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2671 }
2672
2673 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2674 * explicit gating rather than handling it automatically.
2675 */
06ec9070 2676 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2677 if (r) {
06ec9070 2678 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2679 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2680 goto failed;
2c1a2784 2681 }
d38ceaf9
AD
2682
2683 return 0;
83ba126a
AD
2684
2685failed:
89041940 2686 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2687 if (runtime)
2688 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2689
83ba126a 2690 return r;
d38ceaf9
AD
2691}
2692
d38ceaf9
AD
2693/**
2694 * amdgpu_device_fini - tear down the driver
2695 *
2696 * @adev: amdgpu_device pointer
2697 *
2698 * Tear down the driver info (all asics).
2699 * Called at driver shutdown.
2700 */
2701void amdgpu_device_fini(struct amdgpu_device *adev)
2702{
2703 int r;
2704
2705 DRM_INFO("amdgpu: finishing device.\n");
2706 adev->shutdown = true;
e5b03032
ML
2707 /* disable all interrupts */
2708 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2709 if (adev->mode_info.mode_config_initialized){
2710 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2711 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2712 else
2713 drm_atomic_helper_shutdown(adev->ddev);
2714 }
d38ceaf9
AD
2715 amdgpu_ib_pool_fini(adev);
2716 amdgpu_fence_driver_fini(adev);
58e955d9 2717 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2718 amdgpu_fbdev_fini(adev);
06ec9070 2719 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2720 if (adev->firmware.gpu_info_fw) {
2721 release_firmware(adev->firmware.gpu_info_fw);
2722 adev->firmware.gpu_info_fw = NULL;
2723 }
d38ceaf9 2724 adev->accel_working = false;
2dc80b00 2725 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2726 /* free i2c buses */
4562236b
HW
2727 if (!amdgpu_device_has_dc_support(adev))
2728 amdgpu_i2c_fini(adev);
bfca0289
SL
2729
2730 if (amdgpu_emu_mode != 1)
2731 amdgpu_atombios_fini(adev);
2732
d38ceaf9
AD
2733 kfree(adev->bios);
2734 adev->bios = NULL;
84c8b22e
LW
2735 if (!pci_is_thunderbolt_attached(adev->pdev))
2736 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2737 if (adev->flags & AMD_IS_PX)
2738 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2739 vga_client_register(adev->pdev, NULL, NULL, NULL);
2740 if (adev->rio_mem)
2741 pci_iounmap(adev->pdev, adev->rio_mem);
2742 adev->rio_mem = NULL;
2743 iounmap(adev->rmmio);
2744 adev->rmmio = NULL;
06ec9070 2745 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2746 amdgpu_debugfs_regs_cleanup(adev);
d38ceaf9
AD
2747}
2748
2749
2750/*
2751 * Suspend & resume.
2752 */
2753/**
810ddc3a 2754 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2755 *
87e3f136
DP
2756 * @dev: drm dev pointer
2757 * @suspend: suspend state
2758 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2759 *
2760 * Puts the hw in the suspend state (all asics).
2761 * Returns 0 for success or an error on failure.
2762 * Called at driver suspend.
2763 */
810ddc3a 2764int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2765{
2766 struct amdgpu_device *adev;
2767 struct drm_crtc *crtc;
2768 struct drm_connector *connector;
5ceb54c6 2769 int r;
d38ceaf9
AD
2770
2771 if (dev == NULL || dev->dev_private == NULL) {
2772 return -ENODEV;
2773 }
2774
2775 adev = dev->dev_private;
2776
2777 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2778 return 0;
2779
44779b43 2780 adev->in_suspend = true;
d38ceaf9
AD
2781 drm_kms_helper_poll_disable(dev);
2782
5f818173
S
2783 if (fbcon)
2784 amdgpu_fbdev_set_suspend(adev, 1);
2785
a5459475
RZ
2786 cancel_delayed_work_sync(&adev->late_init_work);
2787
4562236b
HW
2788 if (!amdgpu_device_has_dc_support(adev)) {
2789 /* turn off display hw */
2790 drm_modeset_lock_all(dev);
2791 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2792 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2793 }
2794 drm_modeset_unlock_all(dev);
fe1053b7
AD
2795 /* unpin the front buffers and cursors */
2796 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2797 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2798 struct drm_framebuffer *fb = crtc->primary->fb;
2799 struct amdgpu_bo *robj;
2800
2801 if (amdgpu_crtc->cursor_bo) {
2802 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2803 r = amdgpu_bo_reserve(aobj, true);
2804 if (r == 0) {
2805 amdgpu_bo_unpin(aobj);
2806 amdgpu_bo_unreserve(aobj);
2807 }
756e6880 2808 }
756e6880 2809
fe1053b7
AD
2810 if (fb == NULL || fb->obj[0] == NULL) {
2811 continue;
2812 }
2813 robj = gem_to_amdgpu_bo(fb->obj[0]);
2814 /* don't unpin kernel fb objects */
2815 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2816 r = amdgpu_bo_reserve(robj, true);
2817 if (r == 0) {
2818 amdgpu_bo_unpin(robj);
2819 amdgpu_bo_unreserve(robj);
2820 }
d38ceaf9
AD
2821 }
2822 }
2823 }
fe1053b7
AD
2824
2825 amdgpu_amdkfd_suspend(adev);
2826
2827 r = amdgpu_device_ip_suspend_phase1(adev);
2828
d38ceaf9
AD
2829 /* evict vram memory */
2830 amdgpu_bo_evict_vram(adev);
2831
5ceb54c6 2832 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2833
fe1053b7 2834 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2835
a0a71e49
AD
2836 /* evict remaining vram memory
2837 * This second call to evict vram is to evict the gart page table
2838 * using the CPU.
2839 */
d38ceaf9
AD
2840 amdgpu_bo_evict_vram(adev);
2841
2842 pci_save_state(dev->pdev);
2843 if (suspend) {
2844 /* Shut down the device */
2845 pci_disable_device(dev->pdev);
2846 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2847 } else {
2848 r = amdgpu_asic_reset(adev);
2849 if (r)
2850 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2851 }
2852
d38ceaf9
AD
2853 return 0;
2854}
2855
2856/**
810ddc3a 2857 * amdgpu_device_resume - initiate device resume
d38ceaf9 2858 *
87e3f136
DP
2859 * @dev: drm dev pointer
2860 * @resume: resume state
2861 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2862 *
2863 * Bring the hw back to operating state (all asics).
2864 * Returns 0 for success or an error on failure.
2865 * Called at driver resume.
2866 */
810ddc3a 2867int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2868{
2869 struct drm_connector *connector;
2870 struct amdgpu_device *adev = dev->dev_private;
756e6880 2871 struct drm_crtc *crtc;
03161a6e 2872 int r = 0;
d38ceaf9
AD
2873
2874 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2875 return 0;
2876
d38ceaf9
AD
2877 if (resume) {
2878 pci_set_power_state(dev->pdev, PCI_D0);
2879 pci_restore_state(dev->pdev);
74b0b157 2880 r = pci_enable_device(dev->pdev);
03161a6e 2881 if (r)
4d3b9ae5 2882 return r;
d38ceaf9
AD
2883 }
2884
2885 /* post card */
39c640c0 2886 if (amdgpu_device_need_post(adev)) {
74b0b157 2887 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2888 if (r)
2889 DRM_ERROR("amdgpu asic init failed\n");
2890 }
d38ceaf9 2891
06ec9070 2892 r = amdgpu_device_ip_resume(adev);
e6707218 2893 if (r) {
06ec9070 2894 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2895 return r;
e6707218 2896 }
5ceb54c6
AD
2897 amdgpu_fence_driver_resume(adev);
2898
d38ceaf9 2899
06ec9070 2900 r = amdgpu_device_ip_late_init(adev);
03161a6e 2901 if (r)
4d3b9ae5 2902 return r;
d38ceaf9 2903
fe1053b7
AD
2904 if (!amdgpu_device_has_dc_support(adev)) {
2905 /* pin cursors */
2906 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2907 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2908
2909 if (amdgpu_crtc->cursor_bo) {
2910 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2911 r = amdgpu_bo_reserve(aobj, true);
2912 if (r == 0) {
2913 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2914 if (r != 0)
2915 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2916 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2917 amdgpu_bo_unreserve(aobj);
2918 }
756e6880
AD
2919 }
2920 }
2921 }
ba997709
YZ
2922 r = amdgpu_amdkfd_resume(adev);
2923 if (r)
2924 return r;
756e6880 2925
96a5d8d4
LL
2926 /* Make sure IB tests flushed */
2927 flush_delayed_work(&adev->late_init_work);
2928
d38ceaf9
AD
2929 /* blat the mode back in */
2930 if (fbcon) {
4562236b
HW
2931 if (!amdgpu_device_has_dc_support(adev)) {
2932 /* pre DCE11 */
2933 drm_helper_resume_force_mode(dev);
2934
2935 /* turn on display hw */
2936 drm_modeset_lock_all(dev);
2937 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2938 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
2939 }
2940 drm_modeset_unlock_all(dev);
d38ceaf9 2941 }
4d3b9ae5 2942 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
2943 }
2944
2945 drm_kms_helper_poll_enable(dev);
23a1a9e5
L
2946
2947 /*
2948 * Most of the connector probing functions try to acquire runtime pm
2949 * refs to ensure that the GPU is powered on when connector polling is
2950 * performed. Since we're calling this from a runtime PM callback,
2951 * trying to acquire rpm refs will cause us to deadlock.
2952 *
2953 * Since we're guaranteed to be holding the rpm lock, it's safe to
2954 * temporarily disable the rpm helpers so this doesn't deadlock us.
2955 */
2956#ifdef CONFIG_PM
2957 dev->dev->power.disable_depth++;
2958#endif
4562236b
HW
2959 if (!amdgpu_device_has_dc_support(adev))
2960 drm_helper_hpd_irq_event(dev);
2961 else
2962 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
2963#ifdef CONFIG_PM
2964 dev->dev->power.disable_depth--;
2965#endif
44779b43
RZ
2966 adev->in_suspend = false;
2967
4d3b9ae5 2968 return 0;
d38ceaf9
AD
2969}
2970
e3ecdffa
AD
2971/**
2972 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2973 *
2974 * @adev: amdgpu_device pointer
2975 *
2976 * The list of all the hardware IPs that make up the asic is walked and
2977 * the check_soft_reset callbacks are run. check_soft_reset determines
2978 * if the asic is still hung or not.
2979 * Returns true if any of the IPs are still in a hung state, false if not.
2980 */
06ec9070 2981static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
2982{
2983 int i;
2984 bool asic_hang = false;
2985
f993d628
ML
2986 if (amdgpu_sriov_vf(adev))
2987 return true;
2988
8bc04c29
AD
2989 if (amdgpu_asic_need_full_reset(adev))
2990 return true;
2991
63fbf42f 2992 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2993 if (!adev->ip_blocks[i].status.valid)
63fbf42f 2994 continue;
a1255107
AD
2995 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
2996 adev->ip_blocks[i].status.hang =
2997 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
2998 if (adev->ip_blocks[i].status.hang) {
2999 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3000 asic_hang = true;
3001 }
3002 }
3003 return asic_hang;
3004}
3005
e3ecdffa
AD
3006/**
3007 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3008 *
3009 * @adev: amdgpu_device pointer
3010 *
3011 * The list of all the hardware IPs that make up the asic is walked and the
3012 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3013 * handles any IP specific hardware or software state changes that are
3014 * necessary for a soft reset to succeed.
3015 * Returns 0 on success, negative error code on failure.
3016 */
06ec9070 3017static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3018{
3019 int i, r = 0;
3020
3021 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3022 if (!adev->ip_blocks[i].status.valid)
d31a501e 3023 continue;
a1255107
AD
3024 if (adev->ip_blocks[i].status.hang &&
3025 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3026 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3027 if (r)
3028 return r;
3029 }
3030 }
3031
3032 return 0;
3033}
3034
e3ecdffa
AD
3035/**
3036 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3037 *
3038 * @adev: amdgpu_device pointer
3039 *
3040 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3041 * reset is necessary to recover.
3042 * Returns true if a full asic reset is required, false if not.
3043 */
06ec9070 3044static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3045{
da146d3b
AD
3046 int i;
3047
8bc04c29
AD
3048 if (amdgpu_asic_need_full_reset(adev))
3049 return true;
3050
da146d3b 3051 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3052 if (!adev->ip_blocks[i].status.valid)
da146d3b 3053 continue;
a1255107
AD
3054 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3055 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3056 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3057 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3058 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3059 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3060 DRM_INFO("Some block need full reset!\n");
3061 return true;
3062 }
3063 }
35d782fe
CZ
3064 }
3065 return false;
3066}
3067
e3ecdffa
AD
3068/**
3069 * amdgpu_device_ip_soft_reset - do a soft reset
3070 *
3071 * @adev: amdgpu_device pointer
3072 *
3073 * The list of all the hardware IPs that make up the asic is walked and the
3074 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3075 * IP specific hardware or software state changes that are necessary to soft
3076 * reset the IP.
3077 * Returns 0 on success, negative error code on failure.
3078 */
06ec9070 3079static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3080{
3081 int i, r = 0;
3082
3083 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3084 if (!adev->ip_blocks[i].status.valid)
35d782fe 3085 continue;
a1255107
AD
3086 if (adev->ip_blocks[i].status.hang &&
3087 adev->ip_blocks[i].version->funcs->soft_reset) {
3088 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3089 if (r)
3090 return r;
3091 }
3092 }
3093
3094 return 0;
3095}
3096
e3ecdffa
AD
3097/**
3098 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3099 *
3100 * @adev: amdgpu_device pointer
3101 *
3102 * The list of all the hardware IPs that make up the asic is walked and the
3103 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3104 * handles any IP specific hardware or software state changes that are
3105 * necessary after the IP has been soft reset.
3106 * Returns 0 on success, negative error code on failure.
3107 */
06ec9070 3108static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3109{
3110 int i, r = 0;
3111
3112 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3113 if (!adev->ip_blocks[i].status.valid)
35d782fe 3114 continue;
a1255107
AD
3115 if (adev->ip_blocks[i].status.hang &&
3116 adev->ip_blocks[i].version->funcs->post_soft_reset)
3117 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3118 if (r)
3119 return r;
3120 }
3121
3122 return 0;
3123}
3124
e3ecdffa 3125/**
c33adbc7 3126 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3127 *
3128 * @adev: amdgpu_device pointer
3129 *
3130 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3131 * restore things like GPUVM page tables after a GPU reset where
3132 * the contents of VRAM might be lost.
403009bf
CK
3133 *
3134 * Returns:
3135 * 0 on success, negative error code on failure.
e3ecdffa 3136 */
c33adbc7 3137static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3138{
c41d1cf6 3139 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3140 struct amdgpu_bo *shadow;
3141 long r = 1, tmo;
c41d1cf6
ML
3142
3143 if (amdgpu_sriov_runtime(adev))
b045d3af 3144 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3145 else
3146 tmo = msecs_to_jiffies(100);
3147
3148 DRM_INFO("recover vram bo from shadow start\n");
3149 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3150 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3151
3152 /* No need to recover an evicted BO */
3153 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
3154 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3155 continue;
3156
3157 r = amdgpu_bo_restore_shadow(shadow, &next);
3158 if (r)
3159 break;
3160
c41d1cf6
ML
3161 if (fence) {
3162 r = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3163 dma_fence_put(fence);
3164 fence = next;
3165 if (r <= 0)
c41d1cf6 3166 break;
403009bf
CK
3167 } else {
3168 fence = next;
c41d1cf6 3169 }
c41d1cf6
ML
3170 }
3171 mutex_unlock(&adev->shadow_list_lock);
3172
403009bf
CK
3173 if (fence)
3174 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3175 dma_fence_put(fence);
3176
403009bf 3177 if (r <= 0 || tmo <= 0) {
c41d1cf6 3178 DRM_ERROR("recover vram bo from shadow failed\n");
403009bf
CK
3179 return -EIO;
3180 }
c41d1cf6 3181
403009bf
CK
3182 DRM_INFO("recover vram bo from shadow done\n");
3183 return 0;
c41d1cf6
ML
3184}
3185
a90ad3c2 3186
e3ecdffa 3187/**
06ec9070 3188 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3189 *
3190 * @adev: amdgpu device pointer
87e3f136 3191 * @from_hypervisor: request from hypervisor
5740682e
ML
3192 *
3193 * do VF FLR and reinitialize Asic
3f48c681 3194 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3195 */
3196static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3197 bool from_hypervisor)
5740682e
ML
3198{
3199 int r;
3200
3201 if (from_hypervisor)
3202 r = amdgpu_virt_request_full_gpu(adev, true);
3203 else
3204 r = amdgpu_virt_reset_gpu(adev);
3205 if (r)
3206 return r;
a90ad3c2
ML
3207
3208 /* Resume IP prior to SMC */
06ec9070 3209 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3210 if (r)
3211 goto error;
a90ad3c2
ML
3212
3213 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3214 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3215
7a3e0bb2
RZ
3216 r = amdgpu_device_fw_loading(adev);
3217 if (r)
3218 return r;
3219
a90ad3c2 3220 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3221 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3222 if (r)
3223 goto error;
a90ad3c2
ML
3224
3225 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3226 r = amdgpu_ib_ring_tests(adev);
a90ad3c2 3227
abc34253
ED
3228error:
3229 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3230 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3231 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3232 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3233 }
3234
3235 return r;
3236}
3237
12938fad
CK
3238/**
3239 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3240 *
3241 * @adev: amdgpu device pointer
3242 *
3243 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3244 * a hung GPU.
3245 */
3246bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3247{
3248 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3249 DRM_INFO("Timeout, but no hardware hang detected.\n");
3250 return false;
3251 }
3252
3ba7b418
AG
3253 if (amdgpu_gpu_recovery == 0)
3254 goto disabled;
3255
3256 if (amdgpu_sriov_vf(adev))
3257 return true;
3258
3259 if (amdgpu_gpu_recovery == -1) {
3260 switch (adev->asic_type) {
fc42d47c
AG
3261 case CHIP_BONAIRE:
3262 case CHIP_HAWAII:
3ba7b418
AG
3263 case CHIP_TOPAZ:
3264 case CHIP_TONGA:
3265 case CHIP_FIJI:
3266 case CHIP_POLARIS10:
3267 case CHIP_POLARIS11:
3268 case CHIP_POLARIS12:
3269 case CHIP_VEGAM:
3270 case CHIP_VEGA20:
3271 case CHIP_VEGA10:
3272 case CHIP_VEGA12:
3273 break;
3274 default:
3275 goto disabled;
3276 }
12938fad
CK
3277 }
3278
3279 return true;
3ba7b418
AG
3280
3281disabled:
3282 DRM_INFO("GPU recovery disabled.\n");
3283 return false;
12938fad
CK
3284}
3285
5c6dd71e 3286
26bc5340
AG
3287static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3288 struct amdgpu_job *job,
3289 bool *need_full_reset_arg)
3290{
3291 int i, r = 0;
3292 bool need_full_reset = *need_full_reset_arg;
71182665 3293
71182665 3294 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3295 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3296 struct amdgpu_ring *ring = adev->rings[i];
3297
51687759 3298 if (!ring || !ring->sched.thread)
0875dc9e 3299 continue;
5740682e 3300
71182665
ML
3301 kthread_park(ring->sched.thread);
3302
734afd4b 3303 if (job && job->base.sched != &ring->sched)
5740682e
ML
3304 continue;
3305
67ccea60 3306 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
5740682e 3307
2f9d4084
ML
3308 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3309 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3310 }
d38ceaf9 3311
26bc5340
AG
3312
3313
3314 if (!amdgpu_sriov_vf(adev)) {
3315
3316 if (!need_full_reset)
3317 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3318
3319 if (!need_full_reset) {
3320 amdgpu_device_ip_pre_soft_reset(adev);
3321 r = amdgpu_device_ip_soft_reset(adev);
3322 amdgpu_device_ip_post_soft_reset(adev);
3323 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3324 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3325 need_full_reset = true;
3326 }
3327 }
3328
3329 if (need_full_reset)
3330 r = amdgpu_device_ip_suspend(adev);
3331
3332 *need_full_reset_arg = need_full_reset;
3333 }
3334
3335 return r;
3336}
3337
3338static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3339 struct list_head *device_list_handle,
3340 bool *need_full_reset_arg)
3341{
3342 struct amdgpu_device *tmp_adev = NULL;
3343 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3344 int r = 0;
3345
3346 /*
3347 * ASIC reset has to be done on all HGMI hive nodes ASAP
3348 * to allow proper links negotiation in FW (within 1 sec)
3349 */
3350 if (need_full_reset) {
3351 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3352 /* For XGMI run all resets in parallel to speed up the process */
3353 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3354 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3355 r = -EALREADY;
3356 } else
3357 r = amdgpu_asic_reset(tmp_adev);
3358
3359 if (r) {
3360 DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
26bc5340 3361 r, tmp_adev->ddev->unique);
d4535e2c
AG
3362 break;
3363 }
3364 }
3365
3366 /* For XGMI wait for all PSP resets to complete before proceed */
3367 if (!r) {
3368 list_for_each_entry(tmp_adev, device_list_handle,
3369 gmc.xgmi.head) {
3370 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3371 flush_work(&tmp_adev->xgmi_reset_work);
3372 r = tmp_adev->asic_reset_res;
3373 if (r)
3374 break;
3375 }
3376 }
26bc5340
AG
3377 }
3378 }
3379
3380
3381 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3382 if (need_full_reset) {
3383 /* post card */
3384 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3385 DRM_WARN("asic atom init failed!");
3386
3387 if (!r) {
3388 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3389 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3390 if (r)
3391 goto out;
3392
3393 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3394 if (vram_lost) {
3395 DRM_ERROR("VRAM is lost!\n");
3396 atomic_inc(&tmp_adev->vram_lost_counter);
3397 }
3398
3399 r = amdgpu_gtt_mgr_recover(
3400 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3401 if (r)
3402 goto out;
3403
3404 r = amdgpu_device_fw_loading(tmp_adev);
3405 if (r)
3406 return r;
3407
3408 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3409 if (r)
3410 goto out;
3411
3412 if (vram_lost)
3413 amdgpu_device_fill_reset_magic(tmp_adev);
3414
3415 /* Update PSP FW topology after reset */
3416 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3417 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3418 }
3419 }
3420
3421
3422out:
3423 if (!r) {
3424 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3425 r = amdgpu_ib_ring_tests(tmp_adev);
3426 if (r) {
3427 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3428 r = amdgpu_device_ip_suspend(tmp_adev);
3429 need_full_reset = true;
3430 r = -EAGAIN;
3431 goto end;
3432 }
3433 }
3434
3435 if (!r)
3436 r = amdgpu_device_recover_vram(tmp_adev);
3437 else
3438 tmp_adev->asic_reset_res = r;
3439 }
3440
3441end:
3442 *need_full_reset_arg = need_full_reset;
3443 return r;
3444}
3445
3446static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
3447 struct amdgpu_job *job)
3448{
3449 int i;
5740682e 3450
71182665
ML
3451 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3452 struct amdgpu_ring *ring = adev->rings[i];
53cdccd5 3453
71182665
ML
3454 if (!ring || !ring->sched.thread)
3455 continue;
5740682e 3456
71182665
ML
3457 /* only need recovery sched of the given job's ring
3458 * or all rings (in the case @job is NULL)
3459 * after above amdgpu_reset accomplished
3460 */
26bc5340 3461 if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
1b1f42d8 3462 drm_sched_job_recovery(&ring->sched);
5740682e 3463
71182665 3464 kthread_unpark(ring->sched.thread);
d38ceaf9
AD
3465 }
3466
bf830604 3467 if (!amdgpu_device_has_dc_support(adev)) {
4562236b 3468 drm_helper_resume_force_mode(adev->ddev);
5740682e 3469 }
d38ceaf9 3470
26bc5340
AG
3471 adev->asic_reset_res = 0;
3472}
5740682e 3473
26bc5340
AG
3474static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
3475{
3476 mutex_lock(&adev->lock_reset);
3477 atomic_inc(&adev->gpu_reset_counter);
3478 adev->in_gpu_reset = 1;
7b184b00 3479 /* Block kfd: SRIOV would do it separately */
3480 if (!amdgpu_sriov_vf(adev))
3481 amdgpu_amdkfd_pre_reset(adev);
26bc5340 3482}
d38ceaf9 3483
26bc5340
AG
3484static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3485{
7b184b00 3486 /*unlock kfd: SRIOV would do it separately */
3487 if (!amdgpu_sriov_vf(adev))
3488 amdgpu_amdkfd_post_reset(adev);
89041940 3489 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3490 adev->in_gpu_reset = 0;
3491 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3492}
3493
3494
3495/**
3496 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3497 *
3498 * @adev: amdgpu device pointer
3499 * @job: which job trigger hang
3500 *
3501 * Attempt to reset the GPU if it has hung (all asics).
3502 * Attempt to do soft-reset or full-reset and reinitialize Asic
3503 * Returns 0 for success or an error on failure.
3504 */
3505
3506int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3507 struct amdgpu_job *job)
3508{
3509 int r;
3510 struct amdgpu_hive_info *hive = NULL;
3511 bool need_full_reset = false;
3512 struct amdgpu_device *tmp_adev = NULL;
3513 struct list_head device_list, *device_list_handle = NULL;
3514
3515 INIT_LIST_HEAD(&device_list);
3516
3517 dev_info(adev->dev, "GPU reset begin!\n");
3518
3519 /*
3520 * In case of XGMI hive disallow concurrent resets to be triggered
3521 * by different nodes. No point also since the one node already executing
3522 * reset will also reset all the other nodes in the hive.
3523 */
3524 hive = amdgpu_get_xgmi_hive(adev);
3525 if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
3526 !mutex_trylock(&hive->hive_lock))
3527 return 0;
3528
3529 /* Start with adev pre asic reset first for soft reset check.*/
3530 amdgpu_device_lock_adev(adev);
3531 r = amdgpu_device_pre_asic_reset(adev,
3532 job,
3533 &need_full_reset);
3534 if (r) {
3535 /*TODO Should we stop ?*/
3536 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3537 r, adev->ddev->unique);
3538 adev->asic_reset_res = r;
3539 }
3540
3541 /* Build list of devices to reset */
3542 if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
3543 if (!hive) {
3544 amdgpu_device_unlock_adev(adev);
3545 return -ENODEV;
3546 }
3547
3548 /*
3549 * In case we are in XGMI hive mode device reset is done for all the
3550 * nodes in the hive to retrain all XGMI links and hence the reset
3551 * sequence is executed in loop on all nodes.
3552 */
3553 device_list_handle = &hive->device_list;
3554 } else {
3555 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3556 device_list_handle = &device_list;
3557 }
3558
3559retry: /* Rest of adevs pre asic reset from XGMI hive. */
3560 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3561
3562 if (tmp_adev == adev)
3563 continue;
3564
26bc5340
AG
3565 amdgpu_device_lock_adev(tmp_adev);
3566 r = amdgpu_device_pre_asic_reset(tmp_adev,
3567 NULL,
3568 &need_full_reset);
3569 /*TODO Should we stop ?*/
3570 if (r) {
3571 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3572 r, tmp_adev->ddev->unique);
3573 tmp_adev->asic_reset_res = r;
3574 }
3575 }
3576
3577 /* Actual ASIC resets if needed.*/
3578 /* TODO Implement XGMI hive reset logic for SRIOV */
3579 if (amdgpu_sriov_vf(adev)) {
3580 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3581 if (r)
3582 adev->asic_reset_res = r;
3583 } else {
3584 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3585 if (r && r == -EAGAIN)
3586 goto retry;
3587 }
3588
3589 /* Post ASIC reset for all devs .*/
3590 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3591 amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
3592
3593 if (r) {
3594 /* bad news, how to tell it to userspace ? */
3595 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3596 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3597 } else {
3598 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3599 }
3600
3601 amdgpu_device_unlock_adev(tmp_adev);
3602 }
3603
3604 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
3605 mutex_unlock(&hive->hive_lock);
3606
3607 if (r)
3608 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3609 return r;
3610}
3611
e3ecdffa
AD
3612/**
3613 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3614 *
3615 * @adev: amdgpu_device pointer
3616 *
3617 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3618 * and lanes) of the slot the device is in. Handles APUs and
3619 * virtualized environments where PCIE config space may not be available.
3620 */
5494d864 3621static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3622{
5d9a6330
AD
3623 struct pci_dev *pdev;
3624 enum pci_bus_speed speed_cap;
3625 enum pcie_link_width link_width;
d0dd7f0c 3626
cd474ba0
AD
3627 if (amdgpu_pcie_gen_cap)
3628 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3629
cd474ba0
AD
3630 if (amdgpu_pcie_lane_cap)
3631 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3632
cd474ba0
AD
3633 /* covers APUs as well */
3634 if (pci_is_root_bus(adev->pdev->bus)) {
3635 if (adev->pm.pcie_gen_mask == 0)
3636 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3637 if (adev->pm.pcie_mlw_mask == 0)
3638 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3639 return;
cd474ba0 3640 }
d0dd7f0c 3641
cd474ba0 3642 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3643 /* asic caps */
3644 pdev = adev->pdev;
3645 speed_cap = pcie_get_speed_cap(pdev);
3646 if (speed_cap == PCI_SPEED_UNKNOWN) {
3647 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3648 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3649 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3650 } else {
5d9a6330
AD
3651 if (speed_cap == PCIE_SPEED_16_0GT)
3652 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3653 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3654 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3655 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3656 else if (speed_cap == PCIE_SPEED_8_0GT)
3657 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3658 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3659 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3660 else if (speed_cap == PCIE_SPEED_5_0GT)
3661 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3662 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3663 else
3664 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3665 }
3666 /* platform caps */
3667 pdev = adev->ddev->pdev->bus->self;
3668 speed_cap = pcie_get_speed_cap(pdev);
3669 if (speed_cap == PCI_SPEED_UNKNOWN) {
3670 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3671 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3672 } else {
3673 if (speed_cap == PCIE_SPEED_16_0GT)
3674 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3675 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3676 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3677 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
3678 else if (speed_cap == PCIE_SPEED_8_0GT)
3679 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3680 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3681 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
3682 else if (speed_cap == PCIE_SPEED_5_0GT)
3683 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3684 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3685 else
3686 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3687
cd474ba0
AD
3688 }
3689 }
3690 if (adev->pm.pcie_mlw_mask == 0) {
5d9a6330
AD
3691 pdev = adev->ddev->pdev->bus->self;
3692 link_width = pcie_get_width_cap(pdev);
3693 if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
3694 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3695 } else {
3696 switch (link_width) {
3697 case PCIE_LNK_X32:
cd474ba0
AD
3698 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3699 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3700 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3701 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3702 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3703 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3704 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3705 break;
5d9a6330 3706 case PCIE_LNK_X16:
cd474ba0
AD
3707 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3708 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3709 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3710 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3711 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3712 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3713 break;
5d9a6330 3714 case PCIE_LNK_X12:
cd474ba0
AD
3715 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3716 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3717 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3718 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3719 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3720 break;
5d9a6330 3721 case PCIE_LNK_X8:
cd474ba0
AD
3722 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3724 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3725 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3726 break;
5d9a6330 3727 case PCIE_LNK_X4:
cd474ba0
AD
3728 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3729 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3730 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3731 break;
5d9a6330 3732 case PCIE_LNK_X2:
cd474ba0
AD
3733 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3734 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3735 break;
5d9a6330 3736 case PCIE_LNK_X1:
cd474ba0
AD
3737 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3738 break;
3739 default:
3740 break;
3741 }
d0dd7f0c
AD
3742 }
3743 }
3744}
d38ceaf9 3745