drm/amdgpu: fix build error without CONFIG_HSA_AMD
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e
AG
68#include <linux/suspend.h>
69
e2a75f88 70MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 71MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 72MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 73MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 74MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 75MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 76MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 77MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 78MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 79MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 80
2dc80b00
S
81#define AMDGPU_RESUME_MS 2000
82
050091ab 83const char *amdgpu_asic_name[] = {
da69c161
KW
84 "TAHITI",
85 "PITCAIRN",
86 "VERDE",
87 "OLAND",
88 "HAINAN",
d38ceaf9
AD
89 "BONAIRE",
90 "KAVERI",
91 "KABINI",
92 "HAWAII",
93 "MULLINS",
94 "TOPAZ",
95 "TONGA",
48299f95 96 "FIJI",
d38ceaf9 97 "CARRIZO",
139f4917 98 "STONEY",
2cc0c0b5
FC
99 "POLARIS10",
100 "POLARIS11",
c4642a47 101 "POLARIS12",
48ff108d 102 "VEGAM",
d4196f01 103 "VEGA10",
8fab806a 104 "VEGA12",
956fcddc 105 "VEGA20",
2ca8a5d2 106 "RAVEN",
d6c3b24e 107 "ARCTURUS",
1eee4228 108 "RENOIR",
852a6626 109 "NAVI10",
87dbad02 110 "NAVI14",
9802f5d7 111 "NAVI12",
d38ceaf9
AD
112 "LAST",
113};
114
dcea6e65
KR
115/**
116 * DOC: pcie_replay_count
117 *
118 * The amdgpu driver provides a sysfs API for reporting the total number
119 * of PCIe replays (NAKs)
120 * The file pcie_replay_count is used for this and returns the total
121 * number of replays as a sum of the NAKs generated and NAKs received
122 */
123
124static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
125 struct device_attribute *attr, char *buf)
126{
127 struct drm_device *ddev = dev_get_drvdata(dev);
128 struct amdgpu_device *adev = ddev->dev_private;
129 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
130
131 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
132}
133
134static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
135 amdgpu_device_get_pcie_replay_count, NULL);
136
5494d864
AD
137static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
138
e3ecdffa
AD
139/**
140 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
141 *
142 * @dev: drm_device pointer
143 *
144 * Returns true if the device is a dGPU with HG/PX power control,
145 * otherwise return false.
146 */
d38ceaf9
AD
147bool amdgpu_device_is_px(struct drm_device *dev)
148{
149 struct amdgpu_device *adev = dev->dev_private;
150
2f7d10b3 151 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
152 return true;
153 return false;
154}
155
156/*
157 * MMIO register access helper functions.
158 */
e3ecdffa
AD
159/**
160 * amdgpu_mm_rreg - read a memory mapped IO register
161 *
162 * @adev: amdgpu_device pointer
163 * @reg: dword aligned register offset
164 * @acc_flags: access flags which require special behavior
165 *
166 * Returns the 32 bit value from the offset specified.
167 */
d38ceaf9 168uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 169 uint32_t acc_flags)
d38ceaf9 170{
f4b373f4
TSD
171 uint32_t ret;
172
43ca8efa 173 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 174 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 175
15d72fd7 176 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 177 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
178 else {
179 unsigned long flags;
d38ceaf9
AD
180
181 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
182 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
183 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
184 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 185 }
f4b373f4
TSD
186 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
187 return ret;
d38ceaf9
AD
188}
189
421a2a30
ML
190/*
191 * MMIO register read with bytes helper functions
192 * @offset:bytes offset from MMIO start
193 *
194*/
195
e3ecdffa
AD
196/**
197 * amdgpu_mm_rreg8 - read a memory mapped IO register
198 *
199 * @adev: amdgpu_device pointer
200 * @offset: byte aligned register offset
201 *
202 * Returns the 8 bit value from the offset specified.
203 */
421a2a30
ML
204uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
205 if (offset < adev->rmmio_size)
206 return (readb(adev->rmmio + offset));
207 BUG();
208}
209
210/*
211 * MMIO register write with bytes helper functions
212 * @offset:bytes offset from MMIO start
213 * @value: the value want to be written to the register
214 *
215*/
e3ecdffa
AD
216/**
217 * amdgpu_mm_wreg8 - read a memory mapped IO register
218 *
219 * @adev: amdgpu_device pointer
220 * @offset: byte aligned register offset
221 * @value: 8 bit value to write
222 *
223 * Writes the value specified to the offset specified.
224 */
421a2a30
ML
225void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
226 if (offset < adev->rmmio_size)
227 writeb(value, adev->rmmio + offset);
228 else
229 BUG();
230}
231
e3ecdffa
AD
232/**
233 * amdgpu_mm_wreg - write to a memory mapped IO register
234 *
235 * @adev: amdgpu_device pointer
236 * @reg: dword aligned register offset
237 * @v: 32 bit value to write to the register
238 * @acc_flags: access flags which require special behavior
239 *
240 * Writes the value specified to the offset specified.
241 */
d38ceaf9 242void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 243 uint32_t acc_flags)
d38ceaf9 244{
f4b373f4 245 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 246
47ed4e1c
KW
247 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
248 adev->last_mm_index = v;
249 }
250
43ca8efa 251 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 252 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 253
15d72fd7 254 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
255 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
256 else {
257 unsigned long flags;
258
259 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
260 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
261 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
262 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
263 }
47ed4e1c
KW
264
265 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
266 udelay(500);
267 }
d38ceaf9
AD
268}
269
e3ecdffa
AD
270/**
271 * amdgpu_io_rreg - read an IO register
272 *
273 * @adev: amdgpu_device pointer
274 * @reg: dword aligned register offset
275 *
276 * Returns the 32 bit value from the offset specified.
277 */
d38ceaf9
AD
278u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
279{
280 if ((reg * 4) < adev->rio_mem_size)
281 return ioread32(adev->rio_mem + (reg * 4));
282 else {
283 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
284 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
285 }
286}
287
e3ecdffa
AD
288/**
289 * amdgpu_io_wreg - write to an IO register
290 *
291 * @adev: amdgpu_device pointer
292 * @reg: dword aligned register offset
293 * @v: 32 bit value to write to the register
294 *
295 * Writes the value specified to the offset specified.
296 */
d38ceaf9
AD
297void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
298{
47ed4e1c
KW
299 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
300 adev->last_mm_index = v;
301 }
d38ceaf9
AD
302
303 if ((reg * 4) < adev->rio_mem_size)
304 iowrite32(v, adev->rio_mem + (reg * 4));
305 else {
306 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
307 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
308 }
47ed4e1c
KW
309
310 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
311 udelay(500);
312 }
d38ceaf9
AD
313}
314
315/**
316 * amdgpu_mm_rdoorbell - read a doorbell dword
317 *
318 * @adev: amdgpu_device pointer
319 * @index: doorbell index
320 *
321 * Returns the value in the doorbell aperture at the
322 * requested doorbell index (CIK).
323 */
324u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
325{
326 if (index < adev->doorbell.num_doorbells) {
327 return readl(adev->doorbell.ptr + index);
328 } else {
329 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
330 return 0;
331 }
332}
333
334/**
335 * amdgpu_mm_wdoorbell - write a doorbell dword
336 *
337 * @adev: amdgpu_device pointer
338 * @index: doorbell index
339 * @v: value to write
340 *
341 * Writes @v to the doorbell aperture at the
342 * requested doorbell index (CIK).
343 */
344void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
345{
346 if (index < adev->doorbell.num_doorbells) {
347 writel(v, adev->doorbell.ptr + index);
348 } else {
349 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
350 }
351}
352
832be404
KW
353/**
354 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
355 *
356 * @adev: amdgpu_device pointer
357 * @index: doorbell index
358 *
359 * Returns the value in the doorbell aperture at the
360 * requested doorbell index (VEGA10+).
361 */
362u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
363{
364 if (index < adev->doorbell.num_doorbells) {
365 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
366 } else {
367 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
368 return 0;
369 }
370}
371
372/**
373 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
374 *
375 * @adev: amdgpu_device pointer
376 * @index: doorbell index
377 * @v: value to write
378 *
379 * Writes @v to the doorbell aperture at the
380 * requested doorbell index (VEGA10+).
381 */
382void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
383{
384 if (index < adev->doorbell.num_doorbells) {
385 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
386 } else {
387 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
388 }
389}
390
d38ceaf9
AD
391/**
392 * amdgpu_invalid_rreg - dummy reg read function
393 *
394 * @adev: amdgpu device pointer
395 * @reg: offset of register
396 *
397 * Dummy register read function. Used for register blocks
398 * that certain asics don't have (all asics).
399 * Returns the value in the register.
400 */
401static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
402{
403 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
404 BUG();
405 return 0;
406}
407
408/**
409 * amdgpu_invalid_wreg - dummy reg write function
410 *
411 * @adev: amdgpu device pointer
412 * @reg: offset of register
413 * @v: value to write to the register
414 *
415 * Dummy register read function. Used for register blocks
416 * that certain asics don't have (all asics).
417 */
418static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
419{
420 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
421 reg, v);
422 BUG();
423}
424
4fa1c6a6
TZ
425/**
426 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
427 *
428 * @adev: amdgpu device pointer
429 * @reg: offset of register
430 *
431 * Dummy register read function. Used for register blocks
432 * that certain asics don't have (all asics).
433 * Returns the value in the register.
434 */
435static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
436{
437 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
438 BUG();
439 return 0;
440}
441
442/**
443 * amdgpu_invalid_wreg64 - dummy reg write function
444 *
445 * @adev: amdgpu device pointer
446 * @reg: offset of register
447 * @v: value to write to the register
448 *
449 * Dummy register read function. Used for register blocks
450 * that certain asics don't have (all asics).
451 */
452static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
453{
454 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
455 reg, v);
456 BUG();
457}
458
d38ceaf9
AD
459/**
460 * amdgpu_block_invalid_rreg - dummy reg read function
461 *
462 * @adev: amdgpu device pointer
463 * @block: offset of instance
464 * @reg: offset of register
465 *
466 * Dummy register read function. Used for register blocks
467 * that certain asics don't have (all asics).
468 * Returns the value in the register.
469 */
470static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
471 uint32_t block, uint32_t reg)
472{
473 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
474 reg, block);
475 BUG();
476 return 0;
477}
478
479/**
480 * amdgpu_block_invalid_wreg - dummy reg write function
481 *
482 * @adev: amdgpu device pointer
483 * @block: offset of instance
484 * @reg: offset of register
485 * @v: value to write to the register
486 *
487 * Dummy register read function. Used for register blocks
488 * that certain asics don't have (all asics).
489 */
490static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
491 uint32_t block,
492 uint32_t reg, uint32_t v)
493{
494 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
495 reg, block, v);
496 BUG();
497}
498
e3ecdffa
AD
499/**
500 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
501 *
502 * @adev: amdgpu device pointer
503 *
504 * Allocates a scratch page of VRAM for use by various things in the
505 * driver.
506 */
06ec9070 507static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 508{
a4a02777
CK
509 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
510 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
511 &adev->vram_scratch.robj,
512 &adev->vram_scratch.gpu_addr,
513 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
514}
515
e3ecdffa
AD
516/**
517 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
518 *
519 * @adev: amdgpu device pointer
520 *
521 * Frees the VRAM scratch page.
522 */
06ec9070 523static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 524{
078af1a3 525 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
526}
527
528/**
9c3f2b54 529 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
530 *
531 * @adev: amdgpu_device pointer
532 * @registers: pointer to the register array
533 * @array_size: size of the register array
534 *
535 * Programs an array or registers with and and or masks.
536 * This is a helper for setting golden registers.
537 */
9c3f2b54
AD
538void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
539 const u32 *registers,
540 const u32 array_size)
d38ceaf9
AD
541{
542 u32 tmp, reg, and_mask, or_mask;
543 int i;
544
545 if (array_size % 3)
546 return;
547
548 for (i = 0; i < array_size; i +=3) {
549 reg = registers[i + 0];
550 and_mask = registers[i + 1];
551 or_mask = registers[i + 2];
552
553 if (and_mask == 0xffffffff) {
554 tmp = or_mask;
555 } else {
556 tmp = RREG32(reg);
557 tmp &= ~and_mask;
e0d07657
HZ
558 if (adev->family >= AMDGPU_FAMILY_AI)
559 tmp |= (or_mask & and_mask);
560 else
561 tmp |= or_mask;
d38ceaf9
AD
562 }
563 WREG32(reg, tmp);
564 }
565}
566
e3ecdffa
AD
567/**
568 * amdgpu_device_pci_config_reset - reset the GPU
569 *
570 * @adev: amdgpu_device pointer
571 *
572 * Resets the GPU using the pci config reset sequence.
573 * Only applicable to asics prior to vega10.
574 */
8111c387 575void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
576{
577 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
578}
579
580/*
581 * GPU doorbell aperture helpers function.
582 */
583/**
06ec9070 584 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
585 *
586 * @adev: amdgpu_device pointer
587 *
588 * Init doorbell driver information (CIK)
589 * Returns 0 on success, error on failure.
590 */
06ec9070 591static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 592{
6585661d 593
705e519e
CK
594 /* No doorbell on SI hardware generation */
595 if (adev->asic_type < CHIP_BONAIRE) {
596 adev->doorbell.base = 0;
597 adev->doorbell.size = 0;
598 adev->doorbell.num_doorbells = 0;
599 adev->doorbell.ptr = NULL;
600 return 0;
601 }
602
d6895ad3
CK
603 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
604 return -EINVAL;
605
22357775
AD
606 amdgpu_asic_init_doorbell_index(adev);
607
d38ceaf9
AD
608 /* doorbell bar mapping */
609 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
610 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
611
edf600da 612 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 613 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
614 if (adev->doorbell.num_doorbells == 0)
615 return -EINVAL;
616
ec3db8a6 617 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
618 * paging queue doorbell use the second page. The
619 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
620 * doorbells are in the first page. So with paging queue enabled,
621 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
622 */
623 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 624 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 625
8972e5d2
CK
626 adev->doorbell.ptr = ioremap(adev->doorbell.base,
627 adev->doorbell.num_doorbells *
628 sizeof(u32));
629 if (adev->doorbell.ptr == NULL)
d38ceaf9 630 return -ENOMEM;
d38ceaf9
AD
631
632 return 0;
633}
634
635/**
06ec9070 636 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
637 *
638 * @adev: amdgpu_device pointer
639 *
640 * Tear down doorbell driver information (CIK)
641 */
06ec9070 642static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
643{
644 iounmap(adev->doorbell.ptr);
645 adev->doorbell.ptr = NULL;
646}
647
22cb0164 648
d38ceaf9
AD
649
650/*
06ec9070 651 * amdgpu_device_wb_*()
455a7bc2 652 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 653 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
654 */
655
656/**
06ec9070 657 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
658 *
659 * @adev: amdgpu_device pointer
660 *
661 * Disables Writeback and frees the Writeback memory (all asics).
662 * Used at driver shutdown.
663 */
06ec9070 664static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
665{
666 if (adev->wb.wb_obj) {
a76ed485
AD
667 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
668 &adev->wb.gpu_addr,
669 (void **)&adev->wb.wb);
d38ceaf9
AD
670 adev->wb.wb_obj = NULL;
671 }
672}
673
674/**
06ec9070 675 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
676 *
677 * @adev: amdgpu_device pointer
678 *
455a7bc2 679 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
680 * Used at driver startup.
681 * Returns 0 on success or an -error on failure.
682 */
06ec9070 683static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
684{
685 int r;
686
687 if (adev->wb.wb_obj == NULL) {
97407b63
AD
688 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
689 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
690 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
691 &adev->wb.wb_obj, &adev->wb.gpu_addr,
692 (void **)&adev->wb.wb);
d38ceaf9
AD
693 if (r) {
694 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
695 return r;
696 }
d38ceaf9
AD
697
698 adev->wb.num_wb = AMDGPU_MAX_WB;
699 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
700
701 /* clear wb memory */
73469585 702 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
703 }
704
705 return 0;
706}
707
708/**
131b4b36 709 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
710 *
711 * @adev: amdgpu_device pointer
712 * @wb: wb index
713 *
714 * Allocate a wb slot for use by the driver (all asics).
715 * Returns 0 on success or -EINVAL on failure.
716 */
131b4b36 717int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
718{
719 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 720
97407b63 721 if (offset < adev->wb.num_wb) {
7014285a 722 __set_bit(offset, adev->wb.used);
63ae07ca 723 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
724 return 0;
725 } else {
726 return -EINVAL;
727 }
728}
729
d38ceaf9 730/**
131b4b36 731 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
732 *
733 * @adev: amdgpu_device pointer
734 * @wb: wb index
735 *
736 * Free a wb slot allocated for use by the driver (all asics)
737 */
131b4b36 738void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 739{
73469585 740 wb >>= 3;
d38ceaf9 741 if (wb < adev->wb.num_wb)
73469585 742 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
743}
744
d6895ad3
CK
745/**
746 * amdgpu_device_resize_fb_bar - try to resize FB BAR
747 *
748 * @adev: amdgpu_device pointer
749 *
750 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
751 * to fail, but if any of the BARs is not accessible after the size we abort
752 * driver loading by returning -ENODEV.
753 */
754int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
755{
770d13b1 756 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 757 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
758 struct pci_bus *root;
759 struct resource *res;
760 unsigned i;
d6895ad3
CK
761 u16 cmd;
762 int r;
763
0c03b912 764 /* Bypass for VF */
765 if (amdgpu_sriov_vf(adev))
766 return 0;
767
31b8adab
CK
768 /* Check if the root BUS has 64bit memory resources */
769 root = adev->pdev->bus;
770 while (root->parent)
771 root = root->parent;
772
773 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 774 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
775 res->start > 0x100000000ull)
776 break;
777 }
778
779 /* Trying to resize is pointless without a root hub window above 4GB */
780 if (!res)
781 return 0;
782
d6895ad3
CK
783 /* Disable memory decoding while we change the BAR addresses and size */
784 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
785 pci_write_config_word(adev->pdev, PCI_COMMAND,
786 cmd & ~PCI_COMMAND_MEMORY);
787
788 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 789 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
790 if (adev->asic_type >= CHIP_BONAIRE)
791 pci_release_resource(adev->pdev, 2);
792
793 pci_release_resource(adev->pdev, 0);
794
795 r = pci_resize_resource(adev->pdev, 0, rbar_size);
796 if (r == -ENOSPC)
797 DRM_INFO("Not enough PCI address space for a large BAR.");
798 else if (r && r != -ENOTSUPP)
799 DRM_ERROR("Problem resizing BAR0 (%d).", r);
800
801 pci_assign_unassigned_bus_resources(adev->pdev->bus);
802
803 /* When the doorbell or fb BAR isn't available we have no chance of
804 * using the device.
805 */
06ec9070 806 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
807 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
808 return -ENODEV;
809
810 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
811
812 return 0;
813}
a05502e5 814
d38ceaf9
AD
815/*
816 * GPU helpers function.
817 */
818/**
39c640c0 819 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
820 *
821 * @adev: amdgpu_device pointer
822 *
c836fec5
JQ
823 * Check if the asic has been initialized (all asics) at driver startup
824 * or post is needed if hw reset is performed.
825 * Returns true if need or false if not.
d38ceaf9 826 */
39c640c0 827bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
828{
829 uint32_t reg;
830
bec86378
ML
831 if (amdgpu_sriov_vf(adev))
832 return false;
833
834 if (amdgpu_passthrough(adev)) {
1da2c326
ML
835 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
836 * some old smc fw still need driver do vPost otherwise gpu hang, while
837 * those smc fw version above 22.15 doesn't have this flaw, so we force
838 * vpost executed for smc version below 22.15
bec86378
ML
839 */
840 if (adev->asic_type == CHIP_FIJI) {
841 int err;
842 uint32_t fw_ver;
843 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
844 /* force vPost if error occured */
845 if (err)
846 return true;
847
848 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
849 if (fw_ver < 0x00160e00)
850 return true;
bec86378 851 }
bec86378 852 }
91fe77eb 853
854 if (adev->has_hw_reset) {
855 adev->has_hw_reset = false;
856 return true;
857 }
858
859 /* bios scratch used on CIK+ */
860 if (adev->asic_type >= CHIP_BONAIRE)
861 return amdgpu_atombios_scratch_need_asic_init(adev);
862
863 /* check MEM_SIZE for older asics */
864 reg = amdgpu_asic_get_config_memsize(adev);
865
866 if ((reg != 0) && (reg != 0xffffffff))
867 return false;
868
869 return true;
bec86378
ML
870}
871
d38ceaf9
AD
872/* if we get transitioned to only one device, take VGA back */
873/**
06ec9070 874 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
875 *
876 * @cookie: amdgpu_device pointer
877 * @state: enable/disable vga decode
878 *
879 * Enable/disable vga decode (all asics).
880 * Returns VGA resource flags.
881 */
06ec9070 882static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
883{
884 struct amdgpu_device *adev = cookie;
885 amdgpu_asic_set_vga_state(adev, state);
886 if (state)
887 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
888 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
889 else
890 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
891}
892
e3ecdffa
AD
893/**
894 * amdgpu_device_check_block_size - validate the vm block size
895 *
896 * @adev: amdgpu_device pointer
897 *
898 * Validates the vm block size specified via module parameter.
899 * The vm block size defines number of bits in page table versus page directory,
900 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
901 * page table and the remaining bits are in the page directory.
902 */
06ec9070 903static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
904{
905 /* defines number of bits in page table versus page directory,
906 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
907 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
908 if (amdgpu_vm_block_size == -1)
909 return;
a1adf8be 910
bab4fee7 911 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
912 dev_warn(adev->dev, "VM page table size (%d) too small\n",
913 amdgpu_vm_block_size);
97489129 914 amdgpu_vm_block_size = -1;
a1adf8be 915 }
a1adf8be
CZ
916}
917
e3ecdffa
AD
918/**
919 * amdgpu_device_check_vm_size - validate the vm size
920 *
921 * @adev: amdgpu_device pointer
922 *
923 * Validates the vm size in GB specified via module parameter.
924 * The VM size is the size of the GPU virtual memory space in GB.
925 */
06ec9070 926static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 927{
64dab074
AD
928 /* no need to check the default value */
929 if (amdgpu_vm_size == -1)
930 return;
931
83ca145d
ZJ
932 if (amdgpu_vm_size < 1) {
933 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
934 amdgpu_vm_size);
f3368128 935 amdgpu_vm_size = -1;
83ca145d 936 }
83ca145d
ZJ
937}
938
7951e376
RZ
939static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
940{
941 struct sysinfo si;
942 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
943 uint64_t total_memory;
944 uint64_t dram_size_seven_GB = 0x1B8000000;
945 uint64_t dram_size_three_GB = 0xB8000000;
946
947 if (amdgpu_smu_memory_pool_size == 0)
948 return;
949
950 if (!is_os_64) {
951 DRM_WARN("Not 64-bit OS, feature not supported\n");
952 goto def_value;
953 }
954 si_meminfo(&si);
955 total_memory = (uint64_t)si.totalram * si.mem_unit;
956
957 if ((amdgpu_smu_memory_pool_size == 1) ||
958 (amdgpu_smu_memory_pool_size == 2)) {
959 if (total_memory < dram_size_three_GB)
960 goto def_value1;
961 } else if ((amdgpu_smu_memory_pool_size == 4) ||
962 (amdgpu_smu_memory_pool_size == 8)) {
963 if (total_memory < dram_size_seven_GB)
964 goto def_value1;
965 } else {
966 DRM_WARN("Smu memory pool size not supported\n");
967 goto def_value;
968 }
969 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
970
971 return;
972
973def_value1:
974 DRM_WARN("No enough system memory\n");
975def_value:
976 adev->pm.smu_prv_buffer_size = 0;
977}
978
d38ceaf9 979/**
06ec9070 980 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
981 *
982 * @adev: amdgpu_device pointer
983 *
984 * Validates certain module parameters and updates
985 * the associated values used by the driver (all asics).
986 */
912dfc84 987static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 988{
912dfc84
EQ
989 int ret = 0;
990
5b011235
CZ
991 if (amdgpu_sched_jobs < 4) {
992 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
993 amdgpu_sched_jobs);
994 amdgpu_sched_jobs = 4;
76117507 995 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
996 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
997 amdgpu_sched_jobs);
998 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
999 }
d38ceaf9 1000
83e74db6 1001 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1002 /* gart size must be greater or equal to 32M */
1003 dev_warn(adev->dev, "gart size (%d) too small\n",
1004 amdgpu_gart_size);
83e74db6 1005 amdgpu_gart_size = -1;
d38ceaf9
AD
1006 }
1007
36d38372 1008 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1009 /* gtt size must be greater or equal to 32M */
36d38372
CK
1010 dev_warn(adev->dev, "gtt size (%d) too small\n",
1011 amdgpu_gtt_size);
1012 amdgpu_gtt_size = -1;
d38ceaf9
AD
1013 }
1014
d07f14be
RH
1015 /* valid range is between 4 and 9 inclusive */
1016 if (amdgpu_vm_fragment_size != -1 &&
1017 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1018 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1019 amdgpu_vm_fragment_size = -1;
1020 }
1021
7951e376
RZ
1022 amdgpu_device_check_smu_prv_buffer_size(adev);
1023
06ec9070 1024 amdgpu_device_check_vm_size(adev);
d38ceaf9 1025
06ec9070 1026 amdgpu_device_check_block_size(adev);
6a7f76e7 1027
912dfc84
EQ
1028 ret = amdgpu_device_get_job_timeout_settings(adev);
1029 if (ret) {
1030 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
1031 return ret;
8854695a 1032 }
19aede77
AD
1033
1034 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1035
1036 return ret;
d38ceaf9
AD
1037}
1038
1039/**
1040 * amdgpu_switcheroo_set_state - set switcheroo state
1041 *
1042 * @pdev: pci dev pointer
1694467b 1043 * @state: vga_switcheroo state
d38ceaf9
AD
1044 *
1045 * Callback for the switcheroo driver. Suspends or resumes the
1046 * the asics before or after it is powered up using ACPI methods.
1047 */
1048static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1049{
1050 struct drm_device *dev = pci_get_drvdata(pdev);
1051
1052 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1053 return;
1054
1055 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1056 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1057 /* don't suspend or resume card normally */
1058 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1059
810ddc3a 1060 amdgpu_device_resume(dev, true, true);
d38ceaf9 1061
d38ceaf9
AD
1062 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1063 drm_kms_helper_poll_enable(dev);
1064 } else {
7ca85295 1065 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1066 drm_kms_helper_poll_disable(dev);
1067 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1068 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1069 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1070 }
1071}
1072
1073/**
1074 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1075 *
1076 * @pdev: pci dev pointer
1077 *
1078 * Callback for the switcheroo driver. Check of the switcheroo
1079 * state can be changed.
1080 * Returns true if the state can be changed, false if not.
1081 */
1082static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1083{
1084 struct drm_device *dev = pci_get_drvdata(pdev);
1085
1086 /*
1087 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1088 * locking inversion with the driver load path. And the access here is
1089 * completely racy anyway. So don't bother with locking for now.
1090 */
1091 return dev->open_count == 0;
1092}
1093
1094static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1095 .set_gpu_state = amdgpu_switcheroo_set_state,
1096 .reprobe = NULL,
1097 .can_switch = amdgpu_switcheroo_can_switch,
1098};
1099
e3ecdffa
AD
1100/**
1101 * amdgpu_device_ip_set_clockgating_state - set the CG state
1102 *
87e3f136 1103 * @dev: amdgpu_device pointer
e3ecdffa
AD
1104 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1105 * @state: clockgating state (gate or ungate)
1106 *
1107 * Sets the requested clockgating state for all instances of
1108 * the hardware IP specified.
1109 * Returns the error code from the last instance.
1110 */
43fa561f 1111int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1112 enum amd_ip_block_type block_type,
1113 enum amd_clockgating_state state)
d38ceaf9 1114{
43fa561f 1115 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1116 int i, r = 0;
1117
1118 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1119 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1120 continue;
c722865a
RZ
1121 if (adev->ip_blocks[i].version->type != block_type)
1122 continue;
1123 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1124 continue;
1125 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1126 (void *)adev, state);
1127 if (r)
1128 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1129 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1130 }
1131 return r;
1132}
1133
e3ecdffa
AD
1134/**
1135 * amdgpu_device_ip_set_powergating_state - set the PG state
1136 *
87e3f136 1137 * @dev: amdgpu_device pointer
e3ecdffa
AD
1138 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1139 * @state: powergating state (gate or ungate)
1140 *
1141 * Sets the requested powergating state for all instances of
1142 * the hardware IP specified.
1143 * Returns the error code from the last instance.
1144 */
43fa561f 1145int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1146 enum amd_ip_block_type block_type,
1147 enum amd_powergating_state state)
d38ceaf9 1148{
43fa561f 1149 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1150 int i, r = 0;
1151
1152 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1153 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1154 continue;
c722865a
RZ
1155 if (adev->ip_blocks[i].version->type != block_type)
1156 continue;
1157 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1158 continue;
1159 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1160 (void *)adev, state);
1161 if (r)
1162 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1163 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1164 }
1165 return r;
1166}
1167
e3ecdffa
AD
1168/**
1169 * amdgpu_device_ip_get_clockgating_state - get the CG state
1170 *
1171 * @adev: amdgpu_device pointer
1172 * @flags: clockgating feature flags
1173 *
1174 * Walks the list of IPs on the device and updates the clockgating
1175 * flags for each IP.
1176 * Updates @flags with the feature flags for each hardware IP where
1177 * clockgating is enabled.
1178 */
2990a1fc
AD
1179void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1180 u32 *flags)
6cb2d4e4
HR
1181{
1182 int i;
1183
1184 for (i = 0; i < adev->num_ip_blocks; i++) {
1185 if (!adev->ip_blocks[i].status.valid)
1186 continue;
1187 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1188 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1189 }
1190}
1191
e3ecdffa
AD
1192/**
1193 * amdgpu_device_ip_wait_for_idle - wait for idle
1194 *
1195 * @adev: amdgpu_device pointer
1196 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1197 *
1198 * Waits for the request hardware IP to be idle.
1199 * Returns 0 for success or a negative error code on failure.
1200 */
2990a1fc
AD
1201int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1202 enum amd_ip_block_type block_type)
5dbbb60b
AD
1203{
1204 int i, r;
1205
1206 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1207 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1208 continue;
a1255107
AD
1209 if (adev->ip_blocks[i].version->type == block_type) {
1210 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1211 if (r)
1212 return r;
1213 break;
1214 }
1215 }
1216 return 0;
1217
1218}
1219
e3ecdffa
AD
1220/**
1221 * amdgpu_device_ip_is_idle - is the hardware IP idle
1222 *
1223 * @adev: amdgpu_device pointer
1224 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1225 *
1226 * Check if the hardware IP is idle or not.
1227 * Returns true if it the IP is idle, false if not.
1228 */
2990a1fc
AD
1229bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1230 enum amd_ip_block_type block_type)
5dbbb60b
AD
1231{
1232 int i;
1233
1234 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1235 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1236 continue;
a1255107
AD
1237 if (adev->ip_blocks[i].version->type == block_type)
1238 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1239 }
1240 return true;
1241
1242}
1243
e3ecdffa
AD
1244/**
1245 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1246 *
1247 * @adev: amdgpu_device pointer
87e3f136 1248 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1249 *
1250 * Returns a pointer to the hardware IP block structure
1251 * if it exists for the asic, otherwise NULL.
1252 */
2990a1fc
AD
1253struct amdgpu_ip_block *
1254amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1255 enum amd_ip_block_type type)
d38ceaf9
AD
1256{
1257 int i;
1258
1259 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1260 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1261 return &adev->ip_blocks[i];
1262
1263 return NULL;
1264}
1265
1266/**
2990a1fc 1267 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1268 *
1269 * @adev: amdgpu_device pointer
5fc3aeeb 1270 * @type: enum amd_ip_block_type
d38ceaf9
AD
1271 * @major: major version
1272 * @minor: minor version
1273 *
1274 * return 0 if equal or greater
1275 * return 1 if smaller or the ip_block doesn't exist
1276 */
2990a1fc
AD
1277int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1278 enum amd_ip_block_type type,
1279 u32 major, u32 minor)
d38ceaf9 1280{
2990a1fc 1281 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1282
a1255107
AD
1283 if (ip_block && ((ip_block->version->major > major) ||
1284 ((ip_block->version->major == major) &&
1285 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1286 return 0;
1287
1288 return 1;
1289}
1290
a1255107 1291/**
2990a1fc 1292 * amdgpu_device_ip_block_add
a1255107
AD
1293 *
1294 * @adev: amdgpu_device pointer
1295 * @ip_block_version: pointer to the IP to add
1296 *
1297 * Adds the IP block driver information to the collection of IPs
1298 * on the asic.
1299 */
2990a1fc
AD
1300int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1301 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1302{
1303 if (!ip_block_version)
1304 return -EINVAL;
1305
e966a725 1306 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1307 ip_block_version->funcs->name);
1308
a1255107
AD
1309 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1310
1311 return 0;
1312}
1313
e3ecdffa
AD
1314/**
1315 * amdgpu_device_enable_virtual_display - enable virtual display feature
1316 *
1317 * @adev: amdgpu_device pointer
1318 *
1319 * Enabled the virtual display feature if the user has enabled it via
1320 * the module parameter virtual_display. This feature provides a virtual
1321 * display hardware on headless boards or in virtualized environments.
1322 * This function parses and validates the configuration string specified by
1323 * the user and configues the virtual display configuration (number of
1324 * virtual connectors, crtcs, etc.) specified.
1325 */
483ef985 1326static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1327{
1328 adev->enable_virtual_display = false;
1329
1330 if (amdgpu_virtual_display) {
1331 struct drm_device *ddev = adev->ddev;
1332 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1333 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1334
1335 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1336 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1337 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1338 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1339 if (!strcmp("all", pciaddname)
1340 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1341 long num_crtc;
1342 int res = -1;
1343
9accf2fd 1344 adev->enable_virtual_display = true;
0f66356d
ED
1345
1346 if (pciaddname_tmp)
1347 res = kstrtol(pciaddname_tmp, 10,
1348 &num_crtc);
1349
1350 if (!res) {
1351 if (num_crtc < 1)
1352 num_crtc = 1;
1353 if (num_crtc > 6)
1354 num_crtc = 6;
1355 adev->mode_info.num_crtc = num_crtc;
1356 } else {
1357 adev->mode_info.num_crtc = 1;
1358 }
9accf2fd
ED
1359 break;
1360 }
1361 }
1362
0f66356d
ED
1363 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1364 amdgpu_virtual_display, pci_address_name,
1365 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1366
1367 kfree(pciaddstr);
1368 }
1369}
1370
e3ecdffa
AD
1371/**
1372 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1373 *
1374 * @adev: amdgpu_device pointer
1375 *
1376 * Parses the asic configuration parameters specified in the gpu info
1377 * firmware and makes them availale to the driver for use in configuring
1378 * the asic.
1379 * Returns 0 on success, -EINVAL on failure.
1380 */
e2a75f88
AD
1381static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1382{
e2a75f88
AD
1383 const char *chip_name;
1384 char fw_name[30];
1385 int err;
1386 const struct gpu_info_firmware_header_v1_0 *hdr;
1387
ab4fe3e1
HR
1388 adev->firmware.gpu_info_fw = NULL;
1389
e2a75f88
AD
1390 switch (adev->asic_type) {
1391 case CHIP_TOPAZ:
1392 case CHIP_TONGA:
1393 case CHIP_FIJI:
e2a75f88 1394 case CHIP_POLARIS10:
cc07f18d 1395 case CHIP_POLARIS11:
e2a75f88 1396 case CHIP_POLARIS12:
cc07f18d 1397 case CHIP_VEGAM:
e2a75f88
AD
1398 case CHIP_CARRIZO:
1399 case CHIP_STONEY:
1400#ifdef CONFIG_DRM_AMDGPU_SI
1401 case CHIP_VERDE:
1402 case CHIP_TAHITI:
1403 case CHIP_PITCAIRN:
1404 case CHIP_OLAND:
1405 case CHIP_HAINAN:
1406#endif
1407#ifdef CONFIG_DRM_AMDGPU_CIK
1408 case CHIP_BONAIRE:
1409 case CHIP_HAWAII:
1410 case CHIP_KAVERI:
1411 case CHIP_KABINI:
1412 case CHIP_MULLINS:
1413#endif
27c0bc71 1414 case CHIP_VEGA20:
e2a75f88
AD
1415 default:
1416 return 0;
1417 case CHIP_VEGA10:
1418 chip_name = "vega10";
1419 break;
3f76dced
AD
1420 case CHIP_VEGA12:
1421 chip_name = "vega12";
1422 break;
2d2e5e7e 1423 case CHIP_RAVEN:
54c4d17e
FX
1424 if (adev->rev_id >= 8)
1425 chip_name = "raven2";
741deade
AD
1426 else if (adev->pdev->device == 0x15d8)
1427 chip_name = "picasso";
54c4d17e
FX
1428 else
1429 chip_name = "raven";
2d2e5e7e 1430 break;
65e60f6e
LM
1431 case CHIP_ARCTURUS:
1432 chip_name = "arcturus";
1433 break;
b51a26a0
HR
1434 case CHIP_RENOIR:
1435 chip_name = "renoir";
1436 break;
23c6268e
HR
1437 case CHIP_NAVI10:
1438 chip_name = "navi10";
1439 break;
ed42cfe1
XY
1440 case CHIP_NAVI14:
1441 chip_name = "navi14";
1442 break;
42b325e5
XY
1443 case CHIP_NAVI12:
1444 chip_name = "navi12";
1445 break;
e2a75f88
AD
1446 }
1447
1448 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1449 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1450 if (err) {
1451 dev_err(adev->dev,
1452 "Failed to load gpu_info firmware \"%s\"\n",
1453 fw_name);
1454 goto out;
1455 }
ab4fe3e1 1456 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1457 if (err) {
1458 dev_err(adev->dev,
1459 "Failed to validate gpu_info firmware \"%s\"\n",
1460 fw_name);
1461 goto out;
1462 }
1463
ab4fe3e1 1464 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1465 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1466
1467 switch (hdr->version_major) {
1468 case 1:
1469 {
1470 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1471 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1472 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1473
b5ab16bf
AD
1474 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1475 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1476 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1477 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1478 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1479 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1480 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1481 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1482 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1483 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1484 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1485 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1486 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1487 adev->gfx.cu_info.max_waves_per_simd =
1488 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1489 adev->gfx.cu_info.max_scratch_slots_per_cu =
1490 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1491 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1492 if (hdr->version_minor >= 1) {
35c2e910
HZ
1493 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1494 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1495 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1496 adev->gfx.config.num_sc_per_sh =
1497 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1498 adev->gfx.config.num_packer_per_sc =
1499 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1500 }
48321c3d
HW
1501#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1502 if (hdr->version_minor == 2) {
1503 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1504 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1505 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1506 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1507 }
1508#endif
e2a75f88
AD
1509 break;
1510 }
1511 default:
1512 dev_err(adev->dev,
1513 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1514 err = -EINVAL;
1515 goto out;
1516 }
1517out:
e2a75f88
AD
1518 return err;
1519}
1520
e3ecdffa
AD
1521/**
1522 * amdgpu_device_ip_early_init - run early init for hardware IPs
1523 *
1524 * @adev: amdgpu_device pointer
1525 *
1526 * Early initialization pass for hardware IPs. The hardware IPs that make
1527 * up each asic are discovered each IP's early_init callback is run. This
1528 * is the first stage in initializing the asic.
1529 * Returns 0 on success, negative error code on failure.
1530 */
06ec9070 1531static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1532{
aaa36a97 1533 int i, r;
d38ceaf9 1534
483ef985 1535 amdgpu_device_enable_virtual_display(adev);
a6be7570 1536
d38ceaf9 1537 switch (adev->asic_type) {
aaa36a97
AD
1538 case CHIP_TOPAZ:
1539 case CHIP_TONGA:
48299f95 1540 case CHIP_FIJI:
2cc0c0b5 1541 case CHIP_POLARIS10:
32cc7e53 1542 case CHIP_POLARIS11:
c4642a47 1543 case CHIP_POLARIS12:
32cc7e53 1544 case CHIP_VEGAM:
aaa36a97 1545 case CHIP_CARRIZO:
39bb0c92
SL
1546 case CHIP_STONEY:
1547 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1548 adev->family = AMDGPU_FAMILY_CZ;
1549 else
1550 adev->family = AMDGPU_FAMILY_VI;
1551
1552 r = vi_set_ip_blocks(adev);
1553 if (r)
1554 return r;
1555 break;
33f34802
KW
1556#ifdef CONFIG_DRM_AMDGPU_SI
1557 case CHIP_VERDE:
1558 case CHIP_TAHITI:
1559 case CHIP_PITCAIRN:
1560 case CHIP_OLAND:
1561 case CHIP_HAINAN:
295d0daf 1562 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1563 r = si_set_ip_blocks(adev);
1564 if (r)
1565 return r;
1566 break;
1567#endif
a2e73f56
AD
1568#ifdef CONFIG_DRM_AMDGPU_CIK
1569 case CHIP_BONAIRE:
1570 case CHIP_HAWAII:
1571 case CHIP_KAVERI:
1572 case CHIP_KABINI:
1573 case CHIP_MULLINS:
1574 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1575 adev->family = AMDGPU_FAMILY_CI;
1576 else
1577 adev->family = AMDGPU_FAMILY_KV;
1578
1579 r = cik_set_ip_blocks(adev);
1580 if (r)
1581 return r;
1582 break;
1583#endif
e48a3cd9
AD
1584 case CHIP_VEGA10:
1585 case CHIP_VEGA12:
e4bd8170 1586 case CHIP_VEGA20:
e48a3cd9 1587 case CHIP_RAVEN:
61cf44c1 1588 case CHIP_ARCTURUS:
b51a26a0
HR
1589 case CHIP_RENOIR:
1590 if (adev->asic_type == CHIP_RAVEN ||
1591 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1592 adev->family = AMDGPU_FAMILY_RV;
1593 else
1594 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1595
1596 r = soc15_set_ip_blocks(adev);
1597 if (r)
1598 return r;
1599 break;
0a5b8c7b 1600 case CHIP_NAVI10:
7ecb5cd4 1601 case CHIP_NAVI14:
4808cf9c 1602 case CHIP_NAVI12:
0a5b8c7b
HR
1603 adev->family = AMDGPU_FAMILY_NV;
1604
1605 r = nv_set_ip_blocks(adev);
1606 if (r)
1607 return r;
1608 break;
d38ceaf9
AD
1609 default:
1610 /* FIXME: not supported yet */
1611 return -EINVAL;
1612 }
1613
e2a75f88
AD
1614 r = amdgpu_device_parse_gpu_info_fw(adev);
1615 if (r)
1616 return r;
1617
1884734a 1618 amdgpu_amdkfd_device_probe(adev);
1619
3149d9da
XY
1620 if (amdgpu_sriov_vf(adev)) {
1621 r = amdgpu_virt_request_full_gpu(adev, true);
1622 if (r)
5ffa61c1 1623 return -EAGAIN;
3149d9da
XY
1624 }
1625
3b94fb10 1626 adev->pm.pp_feature = amdgpu_pp_feature_mask;
8c9f69bc
S
1627 if (amdgpu_sriov_vf(adev)
1628 #ifdef CONFIG_HSA_AMD
1629 || sched_policy == KFD_SCHED_POLICY_NO_HWS
1630 #endif
1631 )
00544006 1632 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1633
d38ceaf9
AD
1634 for (i = 0; i < adev->num_ip_blocks; i++) {
1635 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1636 DRM_ERROR("disabled ip block: %d <%s>\n",
1637 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1638 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1639 } else {
a1255107
AD
1640 if (adev->ip_blocks[i].version->funcs->early_init) {
1641 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1642 if (r == -ENOENT) {
a1255107 1643 adev->ip_blocks[i].status.valid = false;
2c1a2784 1644 } else if (r) {
a1255107
AD
1645 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1646 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1647 return r;
2c1a2784 1648 } else {
a1255107 1649 adev->ip_blocks[i].status.valid = true;
2c1a2784 1650 }
974e6b64 1651 } else {
a1255107 1652 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1653 }
d38ceaf9 1654 }
21a249ca
AD
1655 /* get the vbios after the asic_funcs are set up */
1656 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1657 /* Read BIOS */
1658 if (!amdgpu_get_bios(adev))
1659 return -EINVAL;
1660
1661 r = amdgpu_atombios_init(adev);
1662 if (r) {
1663 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1664 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1665 return r;
1666 }
1667 }
d38ceaf9
AD
1668 }
1669
395d1fb9
NH
1670 adev->cg_flags &= amdgpu_cg_mask;
1671 adev->pg_flags &= amdgpu_pg_mask;
1672
d38ceaf9
AD
1673 return 0;
1674}
1675
0a4f2520
RZ
1676static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1677{
1678 int i, r;
1679
1680 for (i = 0; i < adev->num_ip_blocks; i++) {
1681 if (!adev->ip_blocks[i].status.sw)
1682 continue;
1683 if (adev->ip_blocks[i].status.hw)
1684 continue;
1685 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1686 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1687 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1688 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1689 if (r) {
1690 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1691 adev->ip_blocks[i].version->funcs->name, r);
1692 return r;
1693 }
1694 adev->ip_blocks[i].status.hw = true;
1695 }
1696 }
1697
1698 return 0;
1699}
1700
1701static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1702{
1703 int i, r;
1704
1705 for (i = 0; i < adev->num_ip_blocks; i++) {
1706 if (!adev->ip_blocks[i].status.sw)
1707 continue;
1708 if (adev->ip_blocks[i].status.hw)
1709 continue;
1710 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1711 if (r) {
1712 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1713 adev->ip_blocks[i].version->funcs->name, r);
1714 return r;
1715 }
1716 adev->ip_blocks[i].status.hw = true;
1717 }
1718
1719 return 0;
1720}
1721
7a3e0bb2
RZ
1722static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1723{
1724 int r = 0;
1725 int i;
80f41f84 1726 uint32_t smu_version;
7a3e0bb2
RZ
1727
1728 if (adev->asic_type >= CHIP_VEGA10) {
1729 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1730 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1731 continue;
1732
1733 /* no need to do the fw loading again if already done*/
1734 if (adev->ip_blocks[i].status.hw == true)
1735 break;
1736
1737 if (adev->in_gpu_reset || adev->in_suspend) {
1738 r = adev->ip_blocks[i].version->funcs->resume(adev);
1739 if (r) {
1740 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1741 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1742 return r;
1743 }
1744 } else {
1745 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1746 if (r) {
1747 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1748 adev->ip_blocks[i].version->funcs->name, r);
1749 return r;
7a3e0bb2 1750 }
7a3e0bb2 1751 }
482f0e53
ML
1752
1753 adev->ip_blocks[i].status.hw = true;
1754 break;
7a3e0bb2
RZ
1755 }
1756 }
482f0e53 1757
80f41f84 1758 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1759
80f41f84 1760 return r;
7a3e0bb2
RZ
1761}
1762
e3ecdffa
AD
1763/**
1764 * amdgpu_device_ip_init - run init for hardware IPs
1765 *
1766 * @adev: amdgpu_device pointer
1767 *
1768 * Main initialization pass for hardware IPs. The list of all the hardware
1769 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1770 * are run. sw_init initializes the software state associated with each IP
1771 * and hw_init initializes the hardware associated with each IP.
1772 * Returns 0 on success, negative error code on failure.
1773 */
06ec9070 1774static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1775{
1776 int i, r;
1777
c030f2e4 1778 r = amdgpu_ras_init(adev);
1779 if (r)
1780 return r;
1781
d38ceaf9 1782 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1783 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1784 continue;
a1255107 1785 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1786 if (r) {
a1255107
AD
1787 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1788 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1789 goto init_failed;
2c1a2784 1790 }
a1255107 1791 adev->ip_blocks[i].status.sw = true;
bfca0289 1792
d38ceaf9 1793 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1794 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1795 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1796 if (r) {
1797 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1798 goto init_failed;
2c1a2784 1799 }
a1255107 1800 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1801 if (r) {
1802 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1803 goto init_failed;
2c1a2784 1804 }
06ec9070 1805 r = amdgpu_device_wb_init(adev);
2c1a2784 1806 if (r) {
06ec9070 1807 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1808 goto init_failed;
2c1a2784 1809 }
a1255107 1810 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1811
1812 /* right after GMC hw init, we create CSA */
f92d5c61 1813 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1814 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1815 AMDGPU_GEM_DOMAIN_VRAM,
1816 AMDGPU_CSA_SIZE);
2493664f
ML
1817 if (r) {
1818 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1819 goto init_failed;
2493664f
ML
1820 }
1821 }
d38ceaf9
AD
1822 }
1823 }
1824
533aed27
AG
1825 r = amdgpu_ib_pool_init(adev);
1826 if (r) {
1827 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1828 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1829 goto init_failed;
1830 }
1831
c8963ea4
RZ
1832 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1833 if (r)
72d3f592 1834 goto init_failed;
0a4f2520
RZ
1835
1836 r = amdgpu_device_ip_hw_init_phase1(adev);
1837 if (r)
72d3f592 1838 goto init_failed;
0a4f2520 1839
7a3e0bb2
RZ
1840 r = amdgpu_device_fw_loading(adev);
1841 if (r)
72d3f592 1842 goto init_failed;
7a3e0bb2 1843
0a4f2520
RZ
1844 r = amdgpu_device_ip_hw_init_phase2(adev);
1845 if (r)
72d3f592 1846 goto init_failed;
d38ceaf9 1847
3e2e2ab5
HZ
1848 if (adev->gmc.xgmi.num_physical_nodes > 1)
1849 amdgpu_xgmi_add_device(adev);
1884734a 1850 amdgpu_amdkfd_device_init(adev);
c6332b97 1851
72d3f592 1852init_failed:
d3c117e5 1853 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1854 if (!r)
1855 amdgpu_virt_init_data_exchange(adev);
c6332b97 1856 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1857 }
c6332b97 1858
72d3f592 1859 return r;
d38ceaf9
AD
1860}
1861
e3ecdffa
AD
1862/**
1863 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1864 *
1865 * @adev: amdgpu_device pointer
1866 *
1867 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1868 * this function before a GPU reset. If the value is retained after a
1869 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1870 */
06ec9070 1871static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1872{
1873 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1874}
1875
e3ecdffa
AD
1876/**
1877 * amdgpu_device_check_vram_lost - check if vram is valid
1878 *
1879 * @adev: amdgpu_device pointer
1880 *
1881 * Checks the reset magic value written to the gart pointer in VRAM.
1882 * The driver calls this after a GPU reset to see if the contents of
1883 * VRAM is lost or now.
1884 * returns true if vram is lost, false if not.
1885 */
06ec9070 1886static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1887{
1888 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1889 AMDGPU_RESET_MAGIC_NUM);
1890}
1891
e3ecdffa 1892/**
1112a46b 1893 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1894 *
1895 * @adev: amdgpu_device pointer
1896 *
e3ecdffa 1897 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1898 * set_clockgating_state callbacks are run.
1899 * Late initialization pass enabling clockgating for hardware IPs.
1900 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1901 * Returns 0 on success, negative error code on failure.
1902 */
fdd34271 1903
1112a46b
RZ
1904static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1905 enum amd_clockgating_state state)
d38ceaf9 1906{
1112a46b 1907 int i, j, r;
d38ceaf9 1908
4a2ba394
SL
1909 if (amdgpu_emu_mode == 1)
1910 return 0;
1911
1112a46b
RZ
1912 for (j = 0; j < adev->num_ip_blocks; j++) {
1913 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1914 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1915 continue;
4a446d55 1916 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1917 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1918 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1919 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1920 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1921 /* enable clockgating to save power */
a1255107 1922 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1923 state);
4a446d55
AD
1924 if (r) {
1925 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1926 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1927 return r;
1928 }
b0b00ff1 1929 }
d38ceaf9 1930 }
06b18f61 1931
c9f96fd5
RZ
1932 return 0;
1933}
1934
1112a46b 1935static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1936{
1112a46b 1937 int i, j, r;
06b18f61 1938
c9f96fd5
RZ
1939 if (amdgpu_emu_mode == 1)
1940 return 0;
1941
1112a46b
RZ
1942 for (j = 0; j < adev->num_ip_blocks; j++) {
1943 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1944 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1945 continue;
1946 /* skip CG for VCE/UVD, it's handled specially */
1947 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1948 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1949 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1950 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1951 /* enable powergating to save power */
1952 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1953 state);
c9f96fd5
RZ
1954 if (r) {
1955 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1956 adev->ip_blocks[i].version->funcs->name, r);
1957 return r;
1958 }
1959 }
1960 }
2dc80b00
S
1961 return 0;
1962}
1963
beff74bc
AD
1964static int amdgpu_device_enable_mgpu_fan_boost(void)
1965{
1966 struct amdgpu_gpu_instance *gpu_ins;
1967 struct amdgpu_device *adev;
1968 int i, ret = 0;
1969
1970 mutex_lock(&mgpu_info.mutex);
1971
1972 /*
1973 * MGPU fan boost feature should be enabled
1974 * only when there are two or more dGPUs in
1975 * the system
1976 */
1977 if (mgpu_info.num_dgpu < 2)
1978 goto out;
1979
1980 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1981 gpu_ins = &(mgpu_info.gpu_ins[i]);
1982 adev = gpu_ins->adev;
1983 if (!(adev->flags & AMD_IS_APU) &&
1984 !gpu_ins->mgpu_fan_enabled &&
1985 adev->powerplay.pp_funcs &&
1986 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1987 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1988 if (ret)
1989 break;
1990
1991 gpu_ins->mgpu_fan_enabled = 1;
1992 }
1993 }
1994
1995out:
1996 mutex_unlock(&mgpu_info.mutex);
1997
1998 return ret;
1999}
2000
e3ecdffa
AD
2001/**
2002 * amdgpu_device_ip_late_init - run late init for hardware IPs
2003 *
2004 * @adev: amdgpu_device pointer
2005 *
2006 * Late initialization pass for hardware IPs. The list of all the hardware
2007 * IPs that make up the asic is walked and the late_init callbacks are run.
2008 * late_init covers any special initialization that an IP requires
2009 * after all of the have been initialized or something that needs to happen
2010 * late in the init process.
2011 * Returns 0 on success, negative error code on failure.
2012 */
06ec9070 2013static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
2014{
2015 int i = 0, r;
2016
2017 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2018 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2019 continue;
2020 if (adev->ip_blocks[i].version->funcs->late_init) {
2021 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2022 if (r) {
2023 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2024 adev->ip_blocks[i].version->funcs->name, r);
2025 return r;
2026 }
2dc80b00 2027 }
73f847db 2028 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2029 }
2030
1112a46b
RZ
2031 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2032 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2033
06ec9070 2034 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2035
beff74bc
AD
2036 r = amdgpu_device_enable_mgpu_fan_boost();
2037 if (r)
2038 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2039
2040 /* set to low pstate by default */
2041 amdgpu_xgmi_set_pstate(adev, 0);
2042
d38ceaf9
AD
2043 return 0;
2044}
2045
e3ecdffa
AD
2046/**
2047 * amdgpu_device_ip_fini - run fini for hardware IPs
2048 *
2049 * @adev: amdgpu_device pointer
2050 *
2051 * Main teardown pass for hardware IPs. The list of all the hardware
2052 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2053 * are run. hw_fini tears down the hardware associated with each IP
2054 * and sw_fini tears down any software state associated with each IP.
2055 * Returns 0 on success, negative error code on failure.
2056 */
06ec9070 2057static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2058{
2059 int i, r;
2060
c030f2e4 2061 amdgpu_ras_pre_fini(adev);
2062
a82400b5
AG
2063 if (adev->gmc.xgmi.num_physical_nodes > 1)
2064 amdgpu_xgmi_remove_device(adev);
2065
1884734a 2066 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2067
2068 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2069 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2070
3e96dbfd
AD
2071 /* need to disable SMC first */
2072 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2073 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2074 continue;
fdd34271 2075 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2076 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2077 /* XXX handle errors */
2078 if (r) {
2079 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2080 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2081 }
a1255107 2082 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2083 break;
2084 }
2085 }
2086
d38ceaf9 2087 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2088 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2089 continue;
8201a67a 2090
a1255107 2091 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2092 /* XXX handle errors */
2c1a2784 2093 if (r) {
a1255107
AD
2094 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2095 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2096 }
8201a67a 2097
a1255107 2098 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2099 }
2100
9950cda2 2101
d38ceaf9 2102 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2103 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2104 continue;
c12aba3a
ML
2105
2106 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2107 amdgpu_ucode_free_bo(adev);
1e256e27 2108 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2109 amdgpu_device_wb_fini(adev);
2110 amdgpu_device_vram_scratch_fini(adev);
533aed27 2111 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2112 }
2113
a1255107 2114 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2115 /* XXX handle errors */
2c1a2784 2116 if (r) {
a1255107
AD
2117 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2118 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2119 }
a1255107
AD
2120 adev->ip_blocks[i].status.sw = false;
2121 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2122 }
2123
a6dcfd9c 2124 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2125 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2126 continue;
a1255107
AD
2127 if (adev->ip_blocks[i].version->funcs->late_fini)
2128 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2129 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2130 }
2131
c030f2e4 2132 amdgpu_ras_fini(adev);
2133
030308fc 2134 if (amdgpu_sriov_vf(adev))
24136135
ML
2135 if (amdgpu_virt_release_full_gpu(adev, false))
2136 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2137
d38ceaf9
AD
2138 return 0;
2139}
2140
e3ecdffa 2141/**
beff74bc 2142 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2143 *
1112a46b 2144 * @work: work_struct.
e3ecdffa 2145 */
beff74bc 2146static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2147{
2148 struct amdgpu_device *adev =
beff74bc 2149 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2150 int r;
2151
2152 r = amdgpu_ib_ring_tests(adev);
2153 if (r)
2154 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2155}
2156
1e317b99
RZ
2157static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2158{
2159 struct amdgpu_device *adev =
2160 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2161
2162 mutex_lock(&adev->gfx.gfx_off_mutex);
2163 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2164 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2165 adev->gfx.gfx_off_state = true;
2166 }
2167 mutex_unlock(&adev->gfx.gfx_off_mutex);
2168}
2169
e3ecdffa 2170/**
e7854a03 2171 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2172 *
2173 * @adev: amdgpu_device pointer
2174 *
2175 * Main suspend function for hardware IPs. The list of all the hardware
2176 * IPs that make up the asic is walked, clockgating is disabled and the
2177 * suspend callbacks are run. suspend puts the hardware and software state
2178 * in each IP into a state suitable for suspend.
2179 * Returns 0 on success, negative error code on failure.
2180 */
e7854a03
AD
2181static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2182{
2183 int i, r;
2184
05df1f01 2185 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2186 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2187
e7854a03
AD
2188 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2189 if (!adev->ip_blocks[i].status.valid)
2190 continue;
2191 /* displays are handled separately */
2192 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2193 /* XXX handle errors */
2194 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2195 /* XXX handle errors */
2196 if (r) {
2197 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2198 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2199 return r;
e7854a03 2200 }
482f0e53 2201 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2202 }
2203 }
2204
e7854a03
AD
2205 return 0;
2206}
2207
2208/**
2209 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2210 *
2211 * @adev: amdgpu_device pointer
2212 *
2213 * Main suspend function for hardware IPs. The list of all the hardware
2214 * IPs that make up the asic is walked, clockgating is disabled and the
2215 * suspend callbacks are run. suspend puts the hardware and software state
2216 * in each IP into a state suitable for suspend.
2217 * Returns 0 on success, negative error code on failure.
2218 */
2219static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2220{
2221 int i, r;
2222
2223 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2224 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2225 continue;
e7854a03
AD
2226 /* displays are handled in phase1 */
2227 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2228 continue;
d38ceaf9 2229 /* XXX handle errors */
a1255107 2230 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2231 /* XXX handle errors */
2c1a2784 2232 if (r) {
a1255107
AD
2233 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2234 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2235 }
876923fb 2236 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2237 /* handle putting the SMC in the appropriate state */
2238 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2239 if (is_support_sw_smu(adev)) {
2240 /* todo */
2241 } else if (adev->powerplay.pp_funcs &&
482f0e53 2242 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2243 r = adev->powerplay.pp_funcs->set_mp1_state(
2244 adev->powerplay.pp_handle,
2245 adev->mp1_state);
2246 if (r) {
2247 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2248 adev->mp1_state, r);
482f0e53 2249 return r;
a3a09142
AD
2250 }
2251 }
2252 }
b5507c7e
AG
2253
2254 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2255 }
2256
2257 return 0;
2258}
2259
e7854a03
AD
2260/**
2261 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2262 *
2263 * @adev: amdgpu_device pointer
2264 *
2265 * Main suspend function for hardware IPs. The list of all the hardware
2266 * IPs that make up the asic is walked, clockgating is disabled and the
2267 * suspend callbacks are run. suspend puts the hardware and software state
2268 * in each IP into a state suitable for suspend.
2269 * Returns 0 on success, negative error code on failure.
2270 */
2271int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2272{
2273 int r;
2274
e7819644
YT
2275 if (amdgpu_sriov_vf(adev))
2276 amdgpu_virt_request_full_gpu(adev, false);
2277
e7854a03
AD
2278 r = amdgpu_device_ip_suspend_phase1(adev);
2279 if (r)
2280 return r;
2281 r = amdgpu_device_ip_suspend_phase2(adev);
2282
e7819644
YT
2283 if (amdgpu_sriov_vf(adev))
2284 amdgpu_virt_release_full_gpu(adev, false);
2285
e7854a03
AD
2286 return r;
2287}
2288
06ec9070 2289static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2290{
2291 int i, r;
2292
2cb681b6
ML
2293 static enum amd_ip_block_type ip_order[] = {
2294 AMD_IP_BLOCK_TYPE_GMC,
2295 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2296 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2297 AMD_IP_BLOCK_TYPE_IH,
2298 };
a90ad3c2 2299
2cb681b6
ML
2300 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2301 int j;
2302 struct amdgpu_ip_block *block;
a90ad3c2 2303
2cb681b6
ML
2304 for (j = 0; j < adev->num_ip_blocks; j++) {
2305 block = &adev->ip_blocks[j];
2306
482f0e53 2307 block->status.hw = false;
2cb681b6
ML
2308 if (block->version->type != ip_order[i] ||
2309 !block->status.valid)
2310 continue;
2311
2312 r = block->version->funcs->hw_init(adev);
0aaeefcc 2313 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2314 if (r)
2315 return r;
482f0e53 2316 block->status.hw = true;
a90ad3c2
ML
2317 }
2318 }
2319
2320 return 0;
2321}
2322
06ec9070 2323static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2324{
2325 int i, r;
2326
2cb681b6
ML
2327 static enum amd_ip_block_type ip_order[] = {
2328 AMD_IP_BLOCK_TYPE_SMC,
2329 AMD_IP_BLOCK_TYPE_DCE,
2330 AMD_IP_BLOCK_TYPE_GFX,
2331 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2332 AMD_IP_BLOCK_TYPE_UVD,
2333 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2334 };
a90ad3c2 2335
2cb681b6
ML
2336 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2337 int j;
2338 struct amdgpu_ip_block *block;
a90ad3c2 2339
2cb681b6
ML
2340 for (j = 0; j < adev->num_ip_blocks; j++) {
2341 block = &adev->ip_blocks[j];
2342
2343 if (block->version->type != ip_order[i] ||
482f0e53
ML
2344 !block->status.valid ||
2345 block->status.hw)
2cb681b6
ML
2346 continue;
2347
2348 r = block->version->funcs->hw_init(adev);
0aaeefcc 2349 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2350 if (r)
2351 return r;
482f0e53 2352 block->status.hw = true;
a90ad3c2
ML
2353 }
2354 }
2355
2356 return 0;
2357}
2358
e3ecdffa
AD
2359/**
2360 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2361 *
2362 * @adev: amdgpu_device pointer
2363 *
2364 * First resume function for hardware IPs. The list of all the hardware
2365 * IPs that make up the asic is walked and the resume callbacks are run for
2366 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2367 * after a suspend and updates the software state as necessary. This
2368 * function is also used for restoring the GPU after a GPU reset.
2369 * Returns 0 on success, negative error code on failure.
2370 */
06ec9070 2371static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2372{
2373 int i, r;
2374
a90ad3c2 2375 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2376 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2377 continue;
a90ad3c2 2378 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2379 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2380 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2381
fcf0649f
CZ
2382 r = adev->ip_blocks[i].version->funcs->resume(adev);
2383 if (r) {
2384 DRM_ERROR("resume of IP block <%s> failed %d\n",
2385 adev->ip_blocks[i].version->funcs->name, r);
2386 return r;
2387 }
482f0e53 2388 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2389 }
2390 }
2391
2392 return 0;
2393}
2394
e3ecdffa
AD
2395/**
2396 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2397 *
2398 * @adev: amdgpu_device pointer
2399 *
2400 * First resume function for hardware IPs. The list of all the hardware
2401 * IPs that make up the asic is walked and the resume callbacks are run for
2402 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2403 * functional state after a suspend and updates the software state as
2404 * necessary. This function is also used for restoring the GPU after a GPU
2405 * reset.
2406 * Returns 0 on success, negative error code on failure.
2407 */
06ec9070 2408static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2409{
2410 int i, r;
2411
2412 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2413 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2414 continue;
fcf0649f 2415 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2416 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2417 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2418 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2419 continue;
a1255107 2420 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2421 if (r) {
a1255107
AD
2422 DRM_ERROR("resume of IP block <%s> failed %d\n",
2423 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2424 return r;
2c1a2784 2425 }
482f0e53 2426 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2427 }
2428
2429 return 0;
2430}
2431
e3ecdffa
AD
2432/**
2433 * amdgpu_device_ip_resume - run resume for hardware IPs
2434 *
2435 * @adev: amdgpu_device pointer
2436 *
2437 * Main resume function for hardware IPs. The hardware IPs
2438 * are split into two resume functions because they are
2439 * are also used in in recovering from a GPU reset and some additional
2440 * steps need to be take between them. In this case (S3/S4) they are
2441 * run sequentially.
2442 * Returns 0 on success, negative error code on failure.
2443 */
06ec9070 2444static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2445{
2446 int r;
2447
06ec9070 2448 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2449 if (r)
2450 return r;
7a3e0bb2
RZ
2451
2452 r = amdgpu_device_fw_loading(adev);
2453 if (r)
2454 return r;
2455
06ec9070 2456 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2457
2458 return r;
2459}
2460
e3ecdffa
AD
2461/**
2462 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2463 *
2464 * @adev: amdgpu_device pointer
2465 *
2466 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2467 */
4e99a44e 2468static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2469{
6867e1b5
ML
2470 if (amdgpu_sriov_vf(adev)) {
2471 if (adev->is_atom_fw) {
2472 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2473 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2474 } else {
2475 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2476 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2477 }
2478
2479 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2480 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2481 }
048765ad
AR
2482}
2483
e3ecdffa
AD
2484/**
2485 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2486 *
2487 * @asic_type: AMD asic type
2488 *
2489 * Check if there is DC (new modesetting infrastructre) support for an asic.
2490 * returns true if DC has support, false if not.
2491 */
4562236b
HW
2492bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2493{
2494 switch (asic_type) {
2495#if defined(CONFIG_DRM_AMD_DC)
2496 case CHIP_BONAIRE:
0d6fbccb 2497 case CHIP_KAVERI:
367e6687
AD
2498 case CHIP_KABINI:
2499 case CHIP_MULLINS:
d9fda248
HW
2500 /*
2501 * We have systems in the wild with these ASICs that require
2502 * LVDS and VGA support which is not supported with DC.
2503 *
2504 * Fallback to the non-DC driver here by default so as not to
2505 * cause regressions.
2506 */
2507 return amdgpu_dc > 0;
2508 case CHIP_HAWAII:
4562236b
HW
2509 case CHIP_CARRIZO:
2510 case CHIP_STONEY:
4562236b 2511 case CHIP_POLARIS10:
675fd32b 2512 case CHIP_POLARIS11:
2c8ad2d5 2513 case CHIP_POLARIS12:
675fd32b 2514 case CHIP_VEGAM:
4562236b
HW
2515 case CHIP_TONGA:
2516 case CHIP_FIJI:
42f8ffa1 2517 case CHIP_VEGA10:
dca7b401 2518 case CHIP_VEGA12:
c6034aa2 2519 case CHIP_VEGA20:
dc37a9a0 2520#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2521 case CHIP_RAVEN:
b4f199c7
HW
2522#endif
2523#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2524 case CHIP_NAVI10:
8fceceb6 2525 case CHIP_NAVI14:
078655d9 2526 case CHIP_NAVI12:
e1c14c43
RL
2527#endif
2528#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
2529 case CHIP_RENOIR:
42f8ffa1 2530#endif
fd187853 2531 return amdgpu_dc != 0;
4562236b
HW
2532#endif
2533 default:
2534 return false;
2535 }
2536}
2537
2538/**
2539 * amdgpu_device_has_dc_support - check if dc is supported
2540 *
2541 * @adev: amdgpu_device_pointer
2542 *
2543 * Returns true for supported, false for not supported
2544 */
2545bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2546{
2555039d
XY
2547 if (amdgpu_sriov_vf(adev))
2548 return false;
2549
4562236b
HW
2550 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2551}
2552
d4535e2c
AG
2553
2554static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2555{
2556 struct amdgpu_device *adev =
2557 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2558
2559 adev->asic_reset_res = amdgpu_asic_reset(adev);
2560 if (adev->asic_reset_res)
fed184e9 2561 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2562 adev->asic_reset_res, adev->ddev->unique);
2563}
2564
2565
d38ceaf9
AD
2566/**
2567 * amdgpu_device_init - initialize the driver
2568 *
2569 * @adev: amdgpu_device pointer
87e3f136 2570 * @ddev: drm dev pointer
d38ceaf9
AD
2571 * @pdev: pci dev pointer
2572 * @flags: driver flags
2573 *
2574 * Initializes the driver info and hw (all asics).
2575 * Returns 0 for success or an error on failure.
2576 * Called at driver startup.
2577 */
2578int amdgpu_device_init(struct amdgpu_device *adev,
2579 struct drm_device *ddev,
2580 struct pci_dev *pdev,
2581 uint32_t flags)
2582{
2583 int r, i;
2584 bool runtime = false;
95844d20 2585 u32 max_MBps;
d38ceaf9
AD
2586
2587 adev->shutdown = false;
2588 adev->dev = &pdev->dev;
2589 adev->ddev = ddev;
2590 adev->pdev = pdev;
2591 adev->flags = flags;
4e66d7d2
YZ
2592
2593 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2594 adev->asic_type = amdgpu_force_asic_type;
2595 else
2596 adev->asic_type = flags & AMD_ASIC_MASK;
2597
d38ceaf9 2598 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2599 if (amdgpu_emu_mode == 1)
2600 adev->usec_timeout *= 2;
770d13b1 2601 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2602 adev->accel_working = false;
2603 adev->num_rings = 0;
2604 adev->mman.buffer_funcs = NULL;
2605 adev->mman.buffer_funcs_ring = NULL;
2606 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2607 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2608 adev->gmc.gmc_funcs = NULL;
f54d1867 2609 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2610 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2611
2612 adev->smc_rreg = &amdgpu_invalid_rreg;
2613 adev->smc_wreg = &amdgpu_invalid_wreg;
2614 adev->pcie_rreg = &amdgpu_invalid_rreg;
2615 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2616 adev->pciep_rreg = &amdgpu_invalid_rreg;
2617 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2618 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2619 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2620 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2621 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2622 adev->didt_rreg = &amdgpu_invalid_rreg;
2623 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2624 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2625 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2626 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2627 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2628
3e39ab90
AD
2629 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2630 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2631 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2632
2633 /* mutex initialization are all done here so we
2634 * can recall function without having locking issues */
d38ceaf9 2635 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2636 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2637 mutex_init(&adev->pm.mutex);
2638 mutex_init(&adev->gfx.gpu_clock_mutex);
2639 mutex_init(&adev->srbm_mutex);
b8866c26 2640 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2641 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2642 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2643 mutex_init(&adev->mn_lock);
e23b74aa 2644 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2645 hash_init(adev->mn_hash);
13a752e3 2646 mutex_init(&adev->lock_reset);
bb5a2bdf 2647 mutex_init(&adev->virt.dpm_mutex);
32eaeae0 2648 mutex_init(&adev->psp.mutex);
d38ceaf9 2649
912dfc84
EQ
2650 r = amdgpu_device_check_arguments(adev);
2651 if (r)
2652 return r;
d38ceaf9 2653
d38ceaf9
AD
2654 spin_lock_init(&adev->mmio_idx_lock);
2655 spin_lock_init(&adev->smc_idx_lock);
2656 spin_lock_init(&adev->pcie_idx_lock);
2657 spin_lock_init(&adev->uvd_ctx_idx_lock);
2658 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2659 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2660 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2661 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2662 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2663
0c4e7fa5
CZ
2664 INIT_LIST_HEAD(&adev->shadow_list);
2665 mutex_init(&adev->shadow_list_lock);
2666
795f2813
AR
2667 INIT_LIST_HEAD(&adev->ring_lru_list);
2668 spin_lock_init(&adev->ring_lru_list_lock);
2669
beff74bc
AD
2670 INIT_DELAYED_WORK(&adev->delayed_init_work,
2671 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2672 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2673 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2674
d4535e2c
AG
2675 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2676
d23ee13f 2677 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2678 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2679
0fa49558
AX
2680 /* Registers mapping */
2681 /* TODO: block userspace mapping of io register */
da69c161
KW
2682 if (adev->asic_type >= CHIP_BONAIRE) {
2683 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2684 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2685 } else {
2686 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2687 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2688 }
d38ceaf9 2689
d38ceaf9
AD
2690 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2691 if (adev->rmmio == NULL) {
2692 return -ENOMEM;
2693 }
2694 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2695 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2696
d38ceaf9
AD
2697 /* io port mapping */
2698 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2699 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2700 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2701 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2702 break;
2703 }
2704 }
2705 if (adev->rio_mem == NULL)
b64a18c5 2706 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2707
b2109d8e
JX
2708 /* enable PCIE atomic ops */
2709 r = pci_enable_atomic_ops_to_root(adev->pdev,
2710 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2711 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2712 if (r) {
2713 adev->have_atomics_support = false;
2714 DRM_INFO("PCIE atomic ops is not supported\n");
2715 } else {
2716 adev->have_atomics_support = true;
2717 }
2718
5494d864
AD
2719 amdgpu_device_get_pcie_info(adev);
2720
b239c017
JX
2721 if (amdgpu_mcbp)
2722 DRM_INFO("MCBP is enabled\n");
2723
5f84cc63
JX
2724 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2725 adev->enable_mes = true;
2726
f54eeab4 2727 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2728 r = amdgpu_discovery_init(adev);
2729 if (r) {
2730 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2731 return r;
2732 }
2733 }
2734
d38ceaf9 2735 /* early init functions */
06ec9070 2736 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2737 if (r)
2738 return r;
2739
6585661d
OZ
2740 /* doorbell bar mapping and doorbell index init*/
2741 amdgpu_device_doorbell_init(adev);
2742
d38ceaf9
AD
2743 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2744 /* this will fail for cards that aren't VGA class devices, just
2745 * ignore it */
06ec9070 2746 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2747
e9bef455 2748 if (amdgpu_device_is_px(ddev))
d38ceaf9 2749 runtime = true;
84c8b22e
LW
2750 if (!pci_is_thunderbolt_attached(adev->pdev))
2751 vga_switcheroo_register_client(adev->pdev,
2752 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2753 if (runtime)
2754 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2755
9475a943
SL
2756 if (amdgpu_emu_mode == 1) {
2757 /* post the asic on emulation mode */
2758 emu_soc_asic_init(adev);
bfca0289 2759 goto fence_driver_init;
9475a943 2760 }
bfca0289 2761
4e99a44e
ML
2762 /* detect if we are with an SRIOV vbios */
2763 amdgpu_device_detect_sriov_bios(adev);
048765ad 2764
95e8e59e
AD
2765 /* check if we need to reset the asic
2766 * E.g., driver was not cleanly unloaded previously, etc.
2767 */
f14899fd 2768 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2769 r = amdgpu_asic_reset(adev);
2770 if (r) {
2771 dev_err(adev->dev, "asic reset on init failed\n");
2772 goto failed;
2773 }
2774 }
2775
d38ceaf9 2776 /* Post card if necessary */
39c640c0 2777 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2778 if (!adev->bios) {
bec86378 2779 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2780 r = -EINVAL;
2781 goto failed;
d38ceaf9 2782 }
bec86378 2783 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2784 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2785 if (r) {
2786 dev_err(adev->dev, "gpu post error!\n");
2787 goto failed;
2788 }
d38ceaf9
AD
2789 }
2790
88b64e95
AD
2791 if (adev->is_atom_fw) {
2792 /* Initialize clocks */
2793 r = amdgpu_atomfirmware_get_clock_info(adev);
2794 if (r) {
2795 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2796 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2797 goto failed;
2798 }
2799 } else {
a5bde2f9
AD
2800 /* Initialize clocks */
2801 r = amdgpu_atombios_get_clock_info(adev);
2802 if (r) {
2803 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2804 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2805 goto failed;
a5bde2f9
AD
2806 }
2807 /* init i2c buses */
4562236b
HW
2808 if (!amdgpu_device_has_dc_support(adev))
2809 amdgpu_atombios_i2c_init(adev);
2c1a2784 2810 }
d38ceaf9 2811
bfca0289 2812fence_driver_init:
d38ceaf9
AD
2813 /* Fence driver */
2814 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2815 if (r) {
2816 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2817 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2818 goto failed;
2c1a2784 2819 }
d38ceaf9
AD
2820
2821 /* init the mode config */
2822 drm_mode_config_init(adev->ddev);
2823
06ec9070 2824 r = amdgpu_device_ip_init(adev);
d38ceaf9 2825 if (r) {
8840a387 2826 /* failed in exclusive mode due to timeout */
2827 if (amdgpu_sriov_vf(adev) &&
2828 !amdgpu_sriov_runtime(adev) &&
2829 amdgpu_virt_mmio_blocked(adev) &&
2830 !amdgpu_virt_wait_reset(adev)) {
2831 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2832 /* Don't send request since VF is inactive. */
2833 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2834 adev->virt.ops = NULL;
8840a387 2835 r = -EAGAIN;
2836 goto failed;
2837 }
06ec9070 2838 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2839 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2840 if (amdgpu_virt_request_full_gpu(adev, false))
2841 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2842 goto failed;
d38ceaf9
AD
2843 }
2844
2845 adev->accel_working = true;
2846
e59c0205
AX
2847 amdgpu_vm_check_compute_bug(adev);
2848
95844d20
MO
2849 /* Initialize the buffer migration limit. */
2850 if (amdgpu_moverate >= 0)
2851 max_MBps = amdgpu_moverate;
2852 else
2853 max_MBps = 8; /* Allow 8 MB/s. */
2854 /* Get a log2 for easy divisions. */
2855 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2856
9bc92b9c
ML
2857 amdgpu_fbdev_init(adev);
2858
e9bc1bf7
YT
2859 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2860 amdgpu_pm_virt_sysfs_init(adev);
2861
d2f52ac8
RZ
2862 r = amdgpu_pm_sysfs_init(adev);
2863 if (r)
2864 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2865
5bb23532
OM
2866 r = amdgpu_ucode_sysfs_init(adev);
2867 if (r)
2868 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2869
75758255 2870 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2871 if (r)
d38ceaf9 2872 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2873
2874 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2875 if (r)
d38ceaf9 2876 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2877
50ab2533 2878 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2879 if (r)
50ab2533 2880 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2881
763efb6c 2882 r = amdgpu_debugfs_init(adev);
db95e218 2883 if (r)
763efb6c 2884 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2885
d38ceaf9
AD
2886 if ((amdgpu_testing & 1)) {
2887 if (adev->accel_working)
2888 amdgpu_test_moves(adev);
2889 else
2890 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2891 }
d38ceaf9
AD
2892 if (amdgpu_benchmarking) {
2893 if (adev->accel_working)
2894 amdgpu_benchmark(adev, amdgpu_benchmarking);
2895 else
2896 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2897 }
2898
2899 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2900 * explicit gating rather than handling it automatically.
2901 */
06ec9070 2902 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2903 if (r) {
06ec9070 2904 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2905 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2906 goto failed;
2c1a2784 2907 }
d38ceaf9 2908
108c6a63 2909 /* must succeed. */
511fdbc3 2910 amdgpu_ras_resume(adev);
108c6a63 2911
beff74bc
AD
2912 queue_delayed_work(system_wq, &adev->delayed_init_work,
2913 msecs_to_jiffies(AMDGPU_RESUME_MS));
2914
dcea6e65
KR
2915 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2916 if (r) {
2917 dev_err(adev->dev, "Could not create pcie_replay_count");
2918 return r;
2919 }
108c6a63 2920
d155bef0
AB
2921 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2922 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
2923 if (r)
2924 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2925
d38ceaf9 2926 return 0;
83ba126a
AD
2927
2928failed:
89041940 2929 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2930 if (runtime)
2931 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2932
83ba126a 2933 return r;
d38ceaf9
AD
2934}
2935
d38ceaf9
AD
2936/**
2937 * amdgpu_device_fini - tear down the driver
2938 *
2939 * @adev: amdgpu_device pointer
2940 *
2941 * Tear down the driver info (all asics).
2942 * Called at driver shutdown.
2943 */
2944void amdgpu_device_fini(struct amdgpu_device *adev)
2945{
2946 int r;
2947
2948 DRM_INFO("amdgpu: finishing device.\n");
2949 adev->shutdown = true;
e5b03032
ML
2950 /* disable all interrupts */
2951 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2952 if (adev->mode_info.mode_config_initialized){
2953 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2954 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2955 else
2956 drm_atomic_helper_shutdown(adev->ddev);
2957 }
d38ceaf9 2958 amdgpu_fence_driver_fini(adev);
58e955d9 2959 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2960 amdgpu_fbdev_fini(adev);
06ec9070 2961 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2962 if (adev->firmware.gpu_info_fw) {
2963 release_firmware(adev->firmware.gpu_info_fw);
2964 adev->firmware.gpu_info_fw = NULL;
2965 }
d38ceaf9 2966 adev->accel_working = false;
beff74bc 2967 cancel_delayed_work_sync(&adev->delayed_init_work);
d38ceaf9 2968 /* free i2c buses */
4562236b
HW
2969 if (!amdgpu_device_has_dc_support(adev))
2970 amdgpu_i2c_fini(adev);
bfca0289
SL
2971
2972 if (amdgpu_emu_mode != 1)
2973 amdgpu_atombios_fini(adev);
2974
d38ceaf9
AD
2975 kfree(adev->bios);
2976 adev->bios = NULL;
84c8b22e
LW
2977 if (!pci_is_thunderbolt_attached(adev->pdev))
2978 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2979 if (adev->flags & AMD_IS_PX)
2980 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2981 vga_client_register(adev->pdev, NULL, NULL, NULL);
2982 if (adev->rio_mem)
2983 pci_iounmap(adev->pdev, adev->rio_mem);
2984 adev->rio_mem = NULL;
2985 iounmap(adev->rmmio);
2986 adev->rmmio = NULL;
06ec9070 2987 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
2988 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2989 amdgpu_pm_virt_sysfs_fini(adev);
2990
d38ceaf9 2991 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 2992 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 2993 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
2994 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2995 amdgpu_pmu_fini(adev);
6698a3d0 2996 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 2997 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 2998 amdgpu_discovery_fini(adev);
d38ceaf9
AD
2999}
3000
3001
3002/*
3003 * Suspend & resume.
3004 */
3005/**
810ddc3a 3006 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3007 *
87e3f136
DP
3008 * @dev: drm dev pointer
3009 * @suspend: suspend state
3010 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3011 *
3012 * Puts the hw in the suspend state (all asics).
3013 * Returns 0 for success or an error on failure.
3014 * Called at driver suspend.
3015 */
810ddc3a 3016int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
3017{
3018 struct amdgpu_device *adev;
3019 struct drm_crtc *crtc;
3020 struct drm_connector *connector;
5ceb54c6 3021 int r;
d38ceaf9
AD
3022
3023 if (dev == NULL || dev->dev_private == NULL) {
3024 return -ENODEV;
3025 }
3026
3027 adev = dev->dev_private;
3028
3029 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3030 return 0;
3031
44779b43 3032 adev->in_suspend = true;
d38ceaf9
AD
3033 drm_kms_helper_poll_disable(dev);
3034
5f818173
S
3035 if (fbcon)
3036 amdgpu_fbdev_set_suspend(adev, 1);
3037
beff74bc 3038 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3039
4562236b
HW
3040 if (!amdgpu_device_has_dc_support(adev)) {
3041 /* turn off display hw */
3042 drm_modeset_lock_all(dev);
3043 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3044 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
3045 }
3046 drm_modeset_unlock_all(dev);
fe1053b7
AD
3047 /* unpin the front buffers and cursors */
3048 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3049 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3050 struct drm_framebuffer *fb = crtc->primary->fb;
3051 struct amdgpu_bo *robj;
3052
91334223 3053 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3054 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3055 r = amdgpu_bo_reserve(aobj, true);
3056 if (r == 0) {
3057 amdgpu_bo_unpin(aobj);
3058 amdgpu_bo_unreserve(aobj);
3059 }
756e6880 3060 }
756e6880 3061
fe1053b7
AD
3062 if (fb == NULL || fb->obj[0] == NULL) {
3063 continue;
3064 }
3065 robj = gem_to_amdgpu_bo(fb->obj[0]);
3066 /* don't unpin kernel fb objects */
3067 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3068 r = amdgpu_bo_reserve(robj, true);
3069 if (r == 0) {
3070 amdgpu_bo_unpin(robj);
3071 amdgpu_bo_unreserve(robj);
3072 }
d38ceaf9
AD
3073 }
3074 }
3075 }
fe1053b7
AD
3076
3077 amdgpu_amdkfd_suspend(adev);
3078
5e6932fe 3079 amdgpu_ras_suspend(adev);
3080
fe1053b7
AD
3081 r = amdgpu_device_ip_suspend_phase1(adev);
3082
d38ceaf9
AD
3083 /* evict vram memory */
3084 amdgpu_bo_evict_vram(adev);
3085
5ceb54c6 3086 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3087
fe1053b7 3088 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3089
a0a71e49
AD
3090 /* evict remaining vram memory
3091 * This second call to evict vram is to evict the gart page table
3092 * using the CPU.
3093 */
d38ceaf9
AD
3094 amdgpu_bo_evict_vram(adev);
3095
3096 pci_save_state(dev->pdev);
3097 if (suspend) {
3098 /* Shut down the device */
3099 pci_disable_device(dev->pdev);
3100 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 3101 } else {
3102 r = amdgpu_asic_reset(adev);
3103 if (r)
3104 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
3105 }
3106
d38ceaf9
AD
3107 return 0;
3108}
3109
3110/**
810ddc3a 3111 * amdgpu_device_resume - initiate device resume
d38ceaf9 3112 *
87e3f136
DP
3113 * @dev: drm dev pointer
3114 * @resume: resume state
3115 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3116 *
3117 * Bring the hw back to operating state (all asics).
3118 * Returns 0 for success or an error on failure.
3119 * Called at driver resume.
3120 */
810ddc3a 3121int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
3122{
3123 struct drm_connector *connector;
3124 struct amdgpu_device *adev = dev->dev_private;
756e6880 3125 struct drm_crtc *crtc;
03161a6e 3126 int r = 0;
d38ceaf9
AD
3127
3128 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3129 return 0;
3130
d38ceaf9
AD
3131 if (resume) {
3132 pci_set_power_state(dev->pdev, PCI_D0);
3133 pci_restore_state(dev->pdev);
74b0b157 3134 r = pci_enable_device(dev->pdev);
03161a6e 3135 if (r)
4d3b9ae5 3136 return r;
d38ceaf9
AD
3137 }
3138
3139 /* post card */
39c640c0 3140 if (amdgpu_device_need_post(adev)) {
74b0b157 3141 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3142 if (r)
3143 DRM_ERROR("amdgpu asic init failed\n");
3144 }
d38ceaf9 3145
06ec9070 3146 r = amdgpu_device_ip_resume(adev);
e6707218 3147 if (r) {
06ec9070 3148 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3149 return r;
e6707218 3150 }
5ceb54c6
AD
3151 amdgpu_fence_driver_resume(adev);
3152
d38ceaf9 3153
06ec9070 3154 r = amdgpu_device_ip_late_init(adev);
03161a6e 3155 if (r)
4d3b9ae5 3156 return r;
d38ceaf9 3157
beff74bc
AD
3158 queue_delayed_work(system_wq, &adev->delayed_init_work,
3159 msecs_to_jiffies(AMDGPU_RESUME_MS));
3160
fe1053b7
AD
3161 if (!amdgpu_device_has_dc_support(adev)) {
3162 /* pin cursors */
3163 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3164 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3165
91334223 3166 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3167 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3168 r = amdgpu_bo_reserve(aobj, true);
3169 if (r == 0) {
3170 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3171 if (r != 0)
3172 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3173 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3174 amdgpu_bo_unreserve(aobj);
3175 }
756e6880
AD
3176 }
3177 }
3178 }
ba997709
YZ
3179 r = amdgpu_amdkfd_resume(adev);
3180 if (r)
3181 return r;
756e6880 3182
96a5d8d4 3183 /* Make sure IB tests flushed */
beff74bc 3184 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3185
d38ceaf9
AD
3186 /* blat the mode back in */
3187 if (fbcon) {
4562236b
HW
3188 if (!amdgpu_device_has_dc_support(adev)) {
3189 /* pre DCE11 */
3190 drm_helper_resume_force_mode(dev);
3191
3192 /* turn on display hw */
3193 drm_modeset_lock_all(dev);
3194 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3195 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3196 }
3197 drm_modeset_unlock_all(dev);
d38ceaf9 3198 }
4d3b9ae5 3199 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3200 }
3201
3202 drm_kms_helper_poll_enable(dev);
23a1a9e5 3203
5e6932fe 3204 amdgpu_ras_resume(adev);
3205
23a1a9e5
L
3206 /*
3207 * Most of the connector probing functions try to acquire runtime pm
3208 * refs to ensure that the GPU is powered on when connector polling is
3209 * performed. Since we're calling this from a runtime PM callback,
3210 * trying to acquire rpm refs will cause us to deadlock.
3211 *
3212 * Since we're guaranteed to be holding the rpm lock, it's safe to
3213 * temporarily disable the rpm helpers so this doesn't deadlock us.
3214 */
3215#ifdef CONFIG_PM
3216 dev->dev->power.disable_depth++;
3217#endif
4562236b
HW
3218 if (!amdgpu_device_has_dc_support(adev))
3219 drm_helper_hpd_irq_event(dev);
3220 else
3221 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3222#ifdef CONFIG_PM
3223 dev->dev->power.disable_depth--;
3224#endif
44779b43
RZ
3225 adev->in_suspend = false;
3226
4d3b9ae5 3227 return 0;
d38ceaf9
AD
3228}
3229
e3ecdffa
AD
3230/**
3231 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3232 *
3233 * @adev: amdgpu_device pointer
3234 *
3235 * The list of all the hardware IPs that make up the asic is walked and
3236 * the check_soft_reset callbacks are run. check_soft_reset determines
3237 * if the asic is still hung or not.
3238 * Returns true if any of the IPs are still in a hung state, false if not.
3239 */
06ec9070 3240static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3241{
3242 int i;
3243 bool asic_hang = false;
3244
f993d628
ML
3245 if (amdgpu_sriov_vf(adev))
3246 return true;
3247
8bc04c29
AD
3248 if (amdgpu_asic_need_full_reset(adev))
3249 return true;
3250
63fbf42f 3251 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3252 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3253 continue;
a1255107
AD
3254 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3255 adev->ip_blocks[i].status.hang =
3256 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3257 if (adev->ip_blocks[i].status.hang) {
3258 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3259 asic_hang = true;
3260 }
3261 }
3262 return asic_hang;
3263}
3264
e3ecdffa
AD
3265/**
3266 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3267 *
3268 * @adev: amdgpu_device pointer
3269 *
3270 * The list of all the hardware IPs that make up the asic is walked and the
3271 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3272 * handles any IP specific hardware or software state changes that are
3273 * necessary for a soft reset to succeed.
3274 * Returns 0 on success, negative error code on failure.
3275 */
06ec9070 3276static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3277{
3278 int i, r = 0;
3279
3280 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3281 if (!adev->ip_blocks[i].status.valid)
d31a501e 3282 continue;
a1255107
AD
3283 if (adev->ip_blocks[i].status.hang &&
3284 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3285 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3286 if (r)
3287 return r;
3288 }
3289 }
3290
3291 return 0;
3292}
3293
e3ecdffa
AD
3294/**
3295 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3296 *
3297 * @adev: amdgpu_device pointer
3298 *
3299 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3300 * reset is necessary to recover.
3301 * Returns true if a full asic reset is required, false if not.
3302 */
06ec9070 3303static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3304{
da146d3b
AD
3305 int i;
3306
8bc04c29
AD
3307 if (amdgpu_asic_need_full_reset(adev))
3308 return true;
3309
da146d3b 3310 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3311 if (!adev->ip_blocks[i].status.valid)
da146d3b 3312 continue;
a1255107
AD
3313 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3314 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3315 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3316 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3317 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3318 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3319 DRM_INFO("Some block need full reset!\n");
3320 return true;
3321 }
3322 }
35d782fe
CZ
3323 }
3324 return false;
3325}
3326
e3ecdffa
AD
3327/**
3328 * amdgpu_device_ip_soft_reset - do a soft reset
3329 *
3330 * @adev: amdgpu_device pointer
3331 *
3332 * The list of all the hardware IPs that make up the asic is walked and the
3333 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3334 * IP specific hardware or software state changes that are necessary to soft
3335 * reset the IP.
3336 * Returns 0 on success, negative error code on failure.
3337 */
06ec9070 3338static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3339{
3340 int i, r = 0;
3341
3342 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3343 if (!adev->ip_blocks[i].status.valid)
35d782fe 3344 continue;
a1255107
AD
3345 if (adev->ip_blocks[i].status.hang &&
3346 adev->ip_blocks[i].version->funcs->soft_reset) {
3347 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3348 if (r)
3349 return r;
3350 }
3351 }
3352
3353 return 0;
3354}
3355
e3ecdffa
AD
3356/**
3357 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3358 *
3359 * @adev: amdgpu_device pointer
3360 *
3361 * The list of all the hardware IPs that make up the asic is walked and the
3362 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3363 * handles any IP specific hardware or software state changes that are
3364 * necessary after the IP has been soft reset.
3365 * Returns 0 on success, negative error code on failure.
3366 */
06ec9070 3367static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3368{
3369 int i, r = 0;
3370
3371 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3372 if (!adev->ip_blocks[i].status.valid)
35d782fe 3373 continue;
a1255107
AD
3374 if (adev->ip_blocks[i].status.hang &&
3375 adev->ip_blocks[i].version->funcs->post_soft_reset)
3376 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3377 if (r)
3378 return r;
3379 }
3380
3381 return 0;
3382}
3383
e3ecdffa 3384/**
c33adbc7 3385 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3386 *
3387 * @adev: amdgpu_device pointer
3388 *
3389 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3390 * restore things like GPUVM page tables after a GPU reset where
3391 * the contents of VRAM might be lost.
403009bf
CK
3392 *
3393 * Returns:
3394 * 0 on success, negative error code on failure.
e3ecdffa 3395 */
c33adbc7 3396static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3397{
c41d1cf6 3398 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3399 struct amdgpu_bo *shadow;
3400 long r = 1, tmo;
c41d1cf6
ML
3401
3402 if (amdgpu_sriov_runtime(adev))
b045d3af 3403 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3404 else
3405 tmo = msecs_to_jiffies(100);
3406
3407 DRM_INFO("recover vram bo from shadow start\n");
3408 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3409 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3410
3411 /* No need to recover an evicted BO */
3412 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3413 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3414 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3415 continue;
3416
3417 r = amdgpu_bo_restore_shadow(shadow, &next);
3418 if (r)
3419 break;
3420
c41d1cf6 3421 if (fence) {
1712fb1a 3422 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3423 dma_fence_put(fence);
3424 fence = next;
1712fb1a 3425 if (tmo == 0) {
3426 r = -ETIMEDOUT;
c41d1cf6 3427 break;
1712fb1a 3428 } else if (tmo < 0) {
3429 r = tmo;
3430 break;
3431 }
403009bf
CK
3432 } else {
3433 fence = next;
c41d1cf6 3434 }
c41d1cf6
ML
3435 }
3436 mutex_unlock(&adev->shadow_list_lock);
3437
403009bf
CK
3438 if (fence)
3439 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3440 dma_fence_put(fence);
3441
1712fb1a 3442 if (r < 0 || tmo <= 0) {
3443 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3444 return -EIO;
3445 }
c41d1cf6 3446
403009bf
CK
3447 DRM_INFO("recover vram bo from shadow done\n");
3448 return 0;
c41d1cf6
ML
3449}
3450
a90ad3c2 3451
e3ecdffa 3452/**
06ec9070 3453 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3454 *
3455 * @adev: amdgpu device pointer
87e3f136 3456 * @from_hypervisor: request from hypervisor
5740682e
ML
3457 *
3458 * do VF FLR and reinitialize Asic
3f48c681 3459 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3460 */
3461static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3462 bool from_hypervisor)
5740682e
ML
3463{
3464 int r;
3465
3466 if (from_hypervisor)
3467 r = amdgpu_virt_request_full_gpu(adev, true);
3468 else
3469 r = amdgpu_virt_reset_gpu(adev);
3470 if (r)
3471 return r;
a90ad3c2 3472
f81e8d53
WL
3473 amdgpu_amdkfd_pre_reset(adev);
3474
a90ad3c2 3475 /* Resume IP prior to SMC */
06ec9070 3476 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3477 if (r)
3478 goto error;
a90ad3c2
ML
3479
3480 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3481 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3482
7a3e0bb2
RZ
3483 r = amdgpu_device_fw_loading(adev);
3484 if (r)
3485 return r;
3486
a90ad3c2 3487 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3488 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3489 if (r)
3490 goto error;
a90ad3c2
ML
3491
3492 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3493 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3494 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3495
abc34253 3496error:
d3c117e5 3497 amdgpu_virt_init_data_exchange(adev);
abc34253 3498 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3499 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3500 amdgpu_inc_vram_lost(adev);
c33adbc7 3501 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3502 }
3503
3504 return r;
3505}
3506
12938fad
CK
3507/**
3508 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3509 *
3510 * @adev: amdgpu device pointer
3511 *
3512 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3513 * a hung GPU.
3514 */
3515bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3516{
3517 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3518 DRM_INFO("Timeout, but no hardware hang detected.\n");
3519 return false;
3520 }
3521
3ba7b418
AG
3522 if (amdgpu_gpu_recovery == 0)
3523 goto disabled;
3524
3525 if (amdgpu_sriov_vf(adev))
3526 return true;
3527
3528 if (amdgpu_gpu_recovery == -1) {
3529 switch (adev->asic_type) {
fc42d47c
AG
3530 case CHIP_BONAIRE:
3531 case CHIP_HAWAII:
3ba7b418
AG
3532 case CHIP_TOPAZ:
3533 case CHIP_TONGA:
3534 case CHIP_FIJI:
3535 case CHIP_POLARIS10:
3536 case CHIP_POLARIS11:
3537 case CHIP_POLARIS12:
3538 case CHIP_VEGAM:
3539 case CHIP_VEGA20:
3540 case CHIP_VEGA10:
3541 case CHIP_VEGA12:
c43b849f 3542 case CHIP_RAVEN:
3ba7b418
AG
3543 break;
3544 default:
3545 goto disabled;
3546 }
12938fad
CK
3547 }
3548
3549 return true;
3ba7b418
AG
3550
3551disabled:
3552 DRM_INFO("GPU recovery disabled.\n");
3553 return false;
12938fad
CK
3554}
3555
5c6dd71e 3556
26bc5340
AG
3557static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3558 struct amdgpu_job *job,
3559 bool *need_full_reset_arg)
3560{
3561 int i, r = 0;
3562 bool need_full_reset = *need_full_reset_arg;
71182665 3563
71182665 3564 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3565 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3566 struct amdgpu_ring *ring = adev->rings[i];
3567
51687759 3568 if (!ring || !ring->sched.thread)
0875dc9e 3569 continue;
5740682e 3570
2f9d4084
ML
3571 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3572 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3573 }
d38ceaf9 3574
222b5f04
AG
3575 if(job)
3576 drm_sched_increase_karma(&job->base);
3577
1d721ed6 3578 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3579 if (!amdgpu_sriov_vf(adev)) {
3580
3581 if (!need_full_reset)
3582 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3583
3584 if (!need_full_reset) {
3585 amdgpu_device_ip_pre_soft_reset(adev);
3586 r = amdgpu_device_ip_soft_reset(adev);
3587 amdgpu_device_ip_post_soft_reset(adev);
3588 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3589 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3590 need_full_reset = true;
3591 }
3592 }
3593
3594 if (need_full_reset)
3595 r = amdgpu_device_ip_suspend(adev);
3596
3597 *need_full_reset_arg = need_full_reset;
3598 }
3599
3600 return r;
3601}
3602
3603static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3604 struct list_head *device_list_handle,
3605 bool *need_full_reset_arg)
3606{
3607 struct amdgpu_device *tmp_adev = NULL;
3608 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3609 int r = 0;
3610
3611 /*
3612 * ASIC reset has to be done on all HGMI hive nodes ASAP
3613 * to allow proper links negotiation in FW (within 1 sec)
3614 */
3615 if (need_full_reset) {
3616 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3617 /* For XGMI run all resets in parallel to speed up the process */
3618 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3619 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3620 r = -EALREADY;
3621 } else
3622 r = amdgpu_asic_reset(tmp_adev);
3623
3624 if (r) {
fed184e9 3625 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3626 r, tmp_adev->ddev->unique);
d4535e2c
AG
3627 break;
3628 }
3629 }
3630
3631 /* For XGMI wait for all PSP resets to complete before proceed */
3632 if (!r) {
3633 list_for_each_entry(tmp_adev, device_list_handle,
3634 gmc.xgmi.head) {
3635 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3636 flush_work(&tmp_adev->xgmi_reset_work);
3637 r = tmp_adev->asic_reset_res;
3638 if (r)
3639 break;
3640 }
3641 }
26bc5340
AG
3642 }
3643 }
3644
3645
3646 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3647 if (need_full_reset) {
3648 /* post card */
3649 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3650 DRM_WARN("asic atom init failed!");
3651
3652 if (!r) {
3653 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3654 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3655 if (r)
3656 goto out;
3657
3658 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3659 if (vram_lost) {
77e7f829 3660 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3661 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3662 }
3663
3664 r = amdgpu_gtt_mgr_recover(
3665 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3666 if (r)
3667 goto out;
3668
3669 r = amdgpu_device_fw_loading(tmp_adev);
3670 if (r)
3671 return r;
3672
3673 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3674 if (r)
3675 goto out;
3676
3677 if (vram_lost)
3678 amdgpu_device_fill_reset_magic(tmp_adev);
3679
fdafb359
EQ
3680 /*
3681 * Add this ASIC as tracked as reset was already
3682 * complete successfully.
3683 */
3684 amdgpu_register_gpu_instance(tmp_adev);
3685
7c04ca50 3686 r = amdgpu_device_ip_late_init(tmp_adev);
3687 if (r)
3688 goto out;
3689
e79a04d5 3690 /* must succeed. */
511fdbc3 3691 amdgpu_ras_resume(tmp_adev);
e79a04d5 3692
26bc5340
AG
3693 /* Update PSP FW topology after reset */
3694 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3695 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3696 }
3697 }
3698
3699
3700out:
3701 if (!r) {
3702 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3703 r = amdgpu_ib_ring_tests(tmp_adev);
3704 if (r) {
3705 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3706 r = amdgpu_device_ip_suspend(tmp_adev);
3707 need_full_reset = true;
3708 r = -EAGAIN;
3709 goto end;
3710 }
3711 }
3712
3713 if (!r)
3714 r = amdgpu_device_recover_vram(tmp_adev);
3715 else
3716 tmp_adev->asic_reset_res = r;
3717 }
3718
3719end:
3720 *need_full_reset_arg = need_full_reset;
3721 return r;
3722}
3723
1d721ed6 3724static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3725{
1d721ed6
AG
3726 if (trylock) {
3727 if (!mutex_trylock(&adev->lock_reset))
3728 return false;
3729 } else
3730 mutex_lock(&adev->lock_reset);
5740682e 3731
26bc5340
AG
3732 atomic_inc(&adev->gpu_reset_counter);
3733 adev->in_gpu_reset = 1;
a3a09142
AD
3734 switch (amdgpu_asic_reset_method(adev)) {
3735 case AMD_RESET_METHOD_MODE1:
3736 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3737 break;
3738 case AMD_RESET_METHOD_MODE2:
3739 adev->mp1_state = PP_MP1_STATE_RESET;
3740 break;
3741 default:
3742 adev->mp1_state = PP_MP1_STATE_NONE;
3743 break;
3744 }
1d721ed6
AG
3745
3746 return true;
26bc5340 3747}
d38ceaf9 3748
26bc5340
AG
3749static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3750{
89041940 3751 amdgpu_vf_error_trans_all(adev);
a3a09142 3752 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
3753 adev->in_gpu_reset = 0;
3754 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3755}
3756
26bc5340
AG
3757/**
3758 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3759 *
3760 * @adev: amdgpu device pointer
3761 * @job: which job trigger hang
3762 *
3763 * Attempt to reset the GPU if it has hung (all asics).
3764 * Attempt to do soft-reset or full-reset and reinitialize Asic
3765 * Returns 0 for success or an error on failure.
3766 */
3767
3768int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3769 struct amdgpu_job *job)
3770{
1d721ed6
AG
3771 struct list_head device_list, *device_list_handle = NULL;
3772 bool need_full_reset, job_signaled;
26bc5340 3773 struct amdgpu_hive_info *hive = NULL;
26bc5340 3774 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3775 int i, r = 0;
7c6e68c7 3776 bool in_ras_intr = amdgpu_ras_intr_triggered();
26bc5340 3777
d5ea093e
AG
3778 /*
3779 * Flush RAM to disk so that after reboot
3780 * the user can read log and see why the system rebooted.
3781 */
3782 if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
3783
3784 DRM_WARN("Emergency reboot.");
3785
3786 ksys_sync_helper();
3787 emergency_restart();
3788 }
3789
1d721ed6 3790 need_full_reset = job_signaled = false;
26bc5340
AG
3791 INIT_LIST_HEAD(&device_list);
3792
7c6e68c7 3793 dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset");
26bc5340 3794
beff74bc 3795 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3796
1d721ed6
AG
3797 hive = amdgpu_get_xgmi_hive(adev, false);
3798
26bc5340 3799 /*
1d721ed6
AG
3800 * Here we trylock to avoid chain of resets executing from
3801 * either trigger by jobs on different adevs in XGMI hive or jobs on
3802 * different schedulers for same device while this TO handler is running.
3803 * We always reset all schedulers for device and all devices for XGMI
3804 * hive so that should take care of them too.
26bc5340 3805 */
1d721ed6
AG
3806
3807 if (hive && !mutex_trylock(&hive->reset_lock)) {
3808 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 3809 job ? job->base.id : -1, hive->hive_id);
26bc5340 3810 return 0;
1d721ed6 3811 }
26bc5340
AG
3812
3813 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3814 if (!amdgpu_device_lock_adev(adev, !hive)) {
3815 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 3816 job ? job->base.id : -1);
1d721ed6 3817 return 0;
26bc5340
AG
3818 }
3819
7c6e68c7
AG
3820 /* Block kfd: SRIOV would do it separately */
3821 if (!amdgpu_sriov_vf(adev))
3822 amdgpu_amdkfd_pre_reset(adev);
3823
26bc5340 3824 /* Build list of devices to reset */
1d721ed6 3825 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 3826 if (!hive) {
7c6e68c7
AG
3827 /*unlock kfd: SRIOV would do it separately */
3828 if (!amdgpu_sriov_vf(adev))
3829 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
3830 amdgpu_device_unlock_adev(adev);
3831 return -ENODEV;
3832 }
3833
3834 /*
3835 * In case we are in XGMI hive mode device reset is done for all the
3836 * nodes in the hive to retrain all XGMI links and hence the reset
3837 * sequence is executed in loop on all nodes.
3838 */
3839 device_list_handle = &hive->device_list;
3840 } else {
3841 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3842 device_list_handle = &device_list;
3843 }
3844
1d721ed6
AG
3845 /* block all schedulers and reset given job's ring */
3846 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 3847 if (tmp_adev != adev) {
12ffa55d 3848 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
3849 if (!amdgpu_sriov_vf(tmp_adev))
3850 amdgpu_amdkfd_pre_reset(tmp_adev);
3851 }
3852
12ffa55d
AG
3853 /*
3854 * Mark these ASICs to be reseted as untracked first
3855 * And add them back after reset completed
3856 */
3857 amdgpu_unregister_gpu_instance(tmp_adev);
3858
f1c1314b 3859 /* disable ras on ALL IPs */
7c6e68c7 3860 if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 3861 amdgpu_ras_suspend(tmp_adev);
3862
1d721ed6
AG
3863 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3864 struct amdgpu_ring *ring = tmp_adev->rings[i];
3865
3866 if (!ring || !ring->sched.thread)
3867 continue;
3868
0b2d2c2e 3869 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7
AG
3870
3871 if (in_ras_intr)
3872 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
3873 }
3874 }
3875
3876
7c6e68c7
AG
3877 if (in_ras_intr)
3878 goto skip_sched_resume;
3879
1d721ed6
AG
3880 /*
3881 * Must check guilty signal here since after this point all old
3882 * HW fences are force signaled.
3883 *
3884 * job->base holds a reference to parent fence
3885 */
3886 if (job && job->base.s_fence->parent &&
3887 dma_fence_is_signaled(job->base.s_fence->parent))
3888 job_signaled = true;
3889
1d721ed6
AG
3890 if (job_signaled) {
3891 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3892 goto skip_hw_reset;
3893 }
3894
3895
3896 /* Guilty job will be freed after this*/
0b2d2c2e 3897 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
3898 if (r) {
3899 /*TODO Should we stop ?*/
3900 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3901 r, adev->ddev->unique);
3902 adev->asic_reset_res = r;
3903 }
3904
26bc5340
AG
3905retry: /* Rest of adevs pre asic reset from XGMI hive. */
3906 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3907
3908 if (tmp_adev == adev)
3909 continue;
3910
26bc5340
AG
3911 r = amdgpu_device_pre_asic_reset(tmp_adev,
3912 NULL,
3913 &need_full_reset);
3914 /*TODO Should we stop ?*/
3915 if (r) {
3916 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3917 r, tmp_adev->ddev->unique);
3918 tmp_adev->asic_reset_res = r;
3919 }
3920 }
3921
3922 /* Actual ASIC resets if needed.*/
3923 /* TODO Implement XGMI hive reset logic for SRIOV */
3924 if (amdgpu_sriov_vf(adev)) {
3925 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3926 if (r)
3927 adev->asic_reset_res = r;
3928 } else {
3929 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3930 if (r && r == -EAGAIN)
3931 goto retry;
3932 }
3933
1d721ed6
AG
3934skip_hw_reset:
3935
26bc5340
AG
3936 /* Post ASIC reset for all devs .*/
3937 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 3938
1d721ed6
AG
3939 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3940 struct amdgpu_ring *ring = tmp_adev->rings[i];
3941
3942 if (!ring || !ring->sched.thread)
3943 continue;
3944
3945 /* No point to resubmit jobs if we didn't HW reset*/
3946 if (!tmp_adev->asic_reset_res && !job_signaled)
3947 drm_sched_resubmit_jobs(&ring->sched);
3948
3949 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3950 }
3951
3952 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3953 drm_helper_resume_force_mode(tmp_adev->ddev);
3954 }
3955
3956 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3957
3958 if (r) {
3959 /* bad news, how to tell it to userspace ? */
12ffa55d 3960 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
3961 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3962 } else {
12ffa55d 3963 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 3964 }
7c6e68c7 3965 }
26bc5340 3966
7c6e68c7
AG
3967skip_sched_resume:
3968 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3969 /*unlock kfd: SRIOV would do it separately */
3970 if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
3971 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
3972 amdgpu_device_unlock_adev(tmp_adev);
3973 }
3974
1d721ed6 3975 if (hive)
22d6575b 3976 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3977
3978 if (r)
3979 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3980 return r;
3981}
3982
e3ecdffa
AD
3983/**
3984 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3985 *
3986 * @adev: amdgpu_device pointer
3987 *
3988 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3989 * and lanes) of the slot the device is in. Handles APUs and
3990 * virtualized environments where PCIE config space may not be available.
3991 */
5494d864 3992static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3993{
5d9a6330 3994 struct pci_dev *pdev;
c5313457
HK
3995 enum pci_bus_speed speed_cap, platform_speed_cap;
3996 enum pcie_link_width platform_link_width;
d0dd7f0c 3997
cd474ba0
AD
3998 if (amdgpu_pcie_gen_cap)
3999 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4000
cd474ba0
AD
4001 if (amdgpu_pcie_lane_cap)
4002 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4003
cd474ba0
AD
4004 /* covers APUs as well */
4005 if (pci_is_root_bus(adev->pdev->bus)) {
4006 if (adev->pm.pcie_gen_mask == 0)
4007 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4008 if (adev->pm.pcie_mlw_mask == 0)
4009 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4010 return;
cd474ba0 4011 }
d0dd7f0c 4012
c5313457
HK
4013 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4014 return;
4015
dbaa922b
AD
4016 pcie_bandwidth_available(adev->pdev, NULL,
4017 &platform_speed_cap, &platform_link_width);
c5313457 4018
cd474ba0 4019 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4020 /* asic caps */
4021 pdev = adev->pdev;
4022 speed_cap = pcie_get_speed_cap(pdev);
4023 if (speed_cap == PCI_SPEED_UNKNOWN) {
4024 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4025 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4026 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4027 } else {
5d9a6330
AD
4028 if (speed_cap == PCIE_SPEED_16_0GT)
4029 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4030 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4031 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4032 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4033 else if (speed_cap == PCIE_SPEED_8_0GT)
4034 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4035 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4036 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4037 else if (speed_cap == PCIE_SPEED_5_0GT)
4038 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4039 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4040 else
4041 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4042 }
4043 /* platform caps */
c5313457 4044 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4045 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4046 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4047 } else {
c5313457 4048 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4049 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4050 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4051 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4052 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4053 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4054 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4055 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4056 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4057 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4058 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4059 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4060 else
4061 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4062
cd474ba0
AD
4063 }
4064 }
4065 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4066 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4067 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4068 } else {
c5313457 4069 switch (platform_link_width) {
5d9a6330 4070 case PCIE_LNK_X32:
cd474ba0
AD
4071 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4072 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4073 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4074 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4075 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4076 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4077 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4078 break;
5d9a6330 4079 case PCIE_LNK_X16:
cd474ba0
AD
4080 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4081 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4082 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4083 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4084 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4085 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4086 break;
5d9a6330 4087 case PCIE_LNK_X12:
cd474ba0
AD
4088 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4089 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4090 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4091 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4092 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4093 break;
5d9a6330 4094 case PCIE_LNK_X8:
cd474ba0
AD
4095 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4096 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4097 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4098 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4099 break;
5d9a6330 4100 case PCIE_LNK_X4:
cd474ba0
AD
4101 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4102 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4103 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4104 break;
5d9a6330 4105 case PCIE_LNK_X2:
cd474ba0
AD
4106 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4107 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4108 break;
5d9a6330 4109 case PCIE_LNK_X1:
cd474ba0
AD
4110 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4111 break;
4112 default:
4113 break;
4114 }
d0dd7f0c
AD
4115 }
4116 }
4117}
d38ceaf9 4118