drm/amdgpu/irq: check if nbio funcs exist
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e
AG
68#include <linux/suspend.h>
69
e2a75f88 70MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 71MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 72MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 73MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 74MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 75MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 76MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 77MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 78MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 79MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 80
2dc80b00
S
81#define AMDGPU_RESUME_MS 2000
82
050091ab 83const char *amdgpu_asic_name[] = {
da69c161
KW
84 "TAHITI",
85 "PITCAIRN",
86 "VERDE",
87 "OLAND",
88 "HAINAN",
d38ceaf9
AD
89 "BONAIRE",
90 "KAVERI",
91 "KABINI",
92 "HAWAII",
93 "MULLINS",
94 "TOPAZ",
95 "TONGA",
48299f95 96 "FIJI",
d38ceaf9 97 "CARRIZO",
139f4917 98 "STONEY",
2cc0c0b5
FC
99 "POLARIS10",
100 "POLARIS11",
c4642a47 101 "POLARIS12",
48ff108d 102 "VEGAM",
d4196f01 103 "VEGA10",
8fab806a 104 "VEGA12",
956fcddc 105 "VEGA20",
2ca8a5d2 106 "RAVEN",
d6c3b24e 107 "ARCTURUS",
1eee4228 108 "RENOIR",
852a6626 109 "NAVI10",
87dbad02 110 "NAVI14",
9802f5d7 111 "NAVI12",
d38ceaf9
AD
112 "LAST",
113};
114
dcea6e65
KR
115/**
116 * DOC: pcie_replay_count
117 *
118 * The amdgpu driver provides a sysfs API for reporting the total number
119 * of PCIe replays (NAKs)
120 * The file pcie_replay_count is used for this and returns the total
121 * number of replays as a sum of the NAKs generated and NAKs received
122 */
123
124static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
125 struct device_attribute *attr, char *buf)
126{
127 struct drm_device *ddev = dev_get_drvdata(dev);
128 struct amdgpu_device *adev = ddev->dev_private;
129 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
130
131 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
132}
133
134static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
135 amdgpu_device_get_pcie_replay_count, NULL);
136
5494d864
AD
137static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
138
e3ecdffa
AD
139/**
140 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
141 *
142 * @dev: drm_device pointer
143 *
144 * Returns true if the device is a dGPU with HG/PX power control,
145 * otherwise return false.
146 */
d38ceaf9
AD
147bool amdgpu_device_is_px(struct drm_device *dev)
148{
149 struct amdgpu_device *adev = dev->dev_private;
150
2f7d10b3 151 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
152 return true;
153 return false;
154}
155
156/*
157 * MMIO register access helper functions.
158 */
e3ecdffa
AD
159/**
160 * amdgpu_mm_rreg - read a memory mapped IO register
161 *
162 * @adev: amdgpu_device pointer
163 * @reg: dword aligned register offset
164 * @acc_flags: access flags which require special behavior
165 *
166 * Returns the 32 bit value from the offset specified.
167 */
d38ceaf9 168uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 169 uint32_t acc_flags)
d38ceaf9 170{
f4b373f4
TSD
171 uint32_t ret;
172
43ca8efa 173 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 174 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 175
15d72fd7 176 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 177 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
178 else {
179 unsigned long flags;
d38ceaf9
AD
180
181 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
182 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
183 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
184 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 185 }
f4b373f4
TSD
186 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
187 return ret;
d38ceaf9
AD
188}
189
421a2a30
ML
190/*
191 * MMIO register read with bytes helper functions
192 * @offset:bytes offset from MMIO start
193 *
194*/
195
e3ecdffa
AD
196/**
197 * amdgpu_mm_rreg8 - read a memory mapped IO register
198 *
199 * @adev: amdgpu_device pointer
200 * @offset: byte aligned register offset
201 *
202 * Returns the 8 bit value from the offset specified.
203 */
421a2a30
ML
204uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
205 if (offset < adev->rmmio_size)
206 return (readb(adev->rmmio + offset));
207 BUG();
208}
209
210/*
211 * MMIO register write with bytes helper functions
212 * @offset:bytes offset from MMIO start
213 * @value: the value want to be written to the register
214 *
215*/
e3ecdffa
AD
216/**
217 * amdgpu_mm_wreg8 - read a memory mapped IO register
218 *
219 * @adev: amdgpu_device pointer
220 * @offset: byte aligned register offset
221 * @value: 8 bit value to write
222 *
223 * Writes the value specified to the offset specified.
224 */
421a2a30
ML
225void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
226 if (offset < adev->rmmio_size)
227 writeb(value, adev->rmmio + offset);
228 else
229 BUG();
230}
231
e3ecdffa
AD
232/**
233 * amdgpu_mm_wreg - write to a memory mapped IO register
234 *
235 * @adev: amdgpu_device pointer
236 * @reg: dword aligned register offset
237 * @v: 32 bit value to write to the register
238 * @acc_flags: access flags which require special behavior
239 *
240 * Writes the value specified to the offset specified.
241 */
d38ceaf9 242void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 243 uint32_t acc_flags)
d38ceaf9 244{
f4b373f4 245 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 246
47ed4e1c
KW
247 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
248 adev->last_mm_index = v;
249 }
250
43ca8efa 251 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 252 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 253
15d72fd7 254 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
255 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
256 else {
257 unsigned long flags;
258
259 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
260 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
261 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
262 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
263 }
47ed4e1c
KW
264
265 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
266 udelay(500);
267 }
d38ceaf9
AD
268}
269
e3ecdffa
AD
270/**
271 * amdgpu_io_rreg - read an IO register
272 *
273 * @adev: amdgpu_device pointer
274 * @reg: dword aligned register offset
275 *
276 * Returns the 32 bit value from the offset specified.
277 */
d38ceaf9
AD
278u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
279{
280 if ((reg * 4) < adev->rio_mem_size)
281 return ioread32(adev->rio_mem + (reg * 4));
282 else {
283 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
284 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
285 }
286}
287
e3ecdffa
AD
288/**
289 * amdgpu_io_wreg - write to an IO register
290 *
291 * @adev: amdgpu_device pointer
292 * @reg: dword aligned register offset
293 * @v: 32 bit value to write to the register
294 *
295 * Writes the value specified to the offset specified.
296 */
d38ceaf9
AD
297void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
298{
47ed4e1c
KW
299 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
300 adev->last_mm_index = v;
301 }
d38ceaf9
AD
302
303 if ((reg * 4) < adev->rio_mem_size)
304 iowrite32(v, adev->rio_mem + (reg * 4));
305 else {
306 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
307 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
308 }
47ed4e1c
KW
309
310 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
311 udelay(500);
312 }
d38ceaf9
AD
313}
314
315/**
316 * amdgpu_mm_rdoorbell - read a doorbell dword
317 *
318 * @adev: amdgpu_device pointer
319 * @index: doorbell index
320 *
321 * Returns the value in the doorbell aperture at the
322 * requested doorbell index (CIK).
323 */
324u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
325{
326 if (index < adev->doorbell.num_doorbells) {
327 return readl(adev->doorbell.ptr + index);
328 } else {
329 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
330 return 0;
331 }
332}
333
334/**
335 * amdgpu_mm_wdoorbell - write a doorbell dword
336 *
337 * @adev: amdgpu_device pointer
338 * @index: doorbell index
339 * @v: value to write
340 *
341 * Writes @v to the doorbell aperture at the
342 * requested doorbell index (CIK).
343 */
344void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
345{
346 if (index < adev->doorbell.num_doorbells) {
347 writel(v, adev->doorbell.ptr + index);
348 } else {
349 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
350 }
351}
352
832be404
KW
353/**
354 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
355 *
356 * @adev: amdgpu_device pointer
357 * @index: doorbell index
358 *
359 * Returns the value in the doorbell aperture at the
360 * requested doorbell index (VEGA10+).
361 */
362u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
363{
364 if (index < adev->doorbell.num_doorbells) {
365 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
366 } else {
367 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
368 return 0;
369 }
370}
371
372/**
373 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
374 *
375 * @adev: amdgpu_device pointer
376 * @index: doorbell index
377 * @v: value to write
378 *
379 * Writes @v to the doorbell aperture at the
380 * requested doorbell index (VEGA10+).
381 */
382void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
383{
384 if (index < adev->doorbell.num_doorbells) {
385 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
386 } else {
387 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
388 }
389}
390
d38ceaf9
AD
391/**
392 * amdgpu_invalid_rreg - dummy reg read function
393 *
394 * @adev: amdgpu device pointer
395 * @reg: offset of register
396 *
397 * Dummy register read function. Used for register blocks
398 * that certain asics don't have (all asics).
399 * Returns the value in the register.
400 */
401static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
402{
403 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
404 BUG();
405 return 0;
406}
407
408/**
409 * amdgpu_invalid_wreg - dummy reg write function
410 *
411 * @adev: amdgpu device pointer
412 * @reg: offset of register
413 * @v: value to write to the register
414 *
415 * Dummy register read function. Used for register blocks
416 * that certain asics don't have (all asics).
417 */
418static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
419{
420 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
421 reg, v);
422 BUG();
423}
424
4fa1c6a6
TZ
425/**
426 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
427 *
428 * @adev: amdgpu device pointer
429 * @reg: offset of register
430 *
431 * Dummy register read function. Used for register blocks
432 * that certain asics don't have (all asics).
433 * Returns the value in the register.
434 */
435static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
436{
437 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
438 BUG();
439 return 0;
440}
441
442/**
443 * amdgpu_invalid_wreg64 - dummy reg write function
444 *
445 * @adev: amdgpu device pointer
446 * @reg: offset of register
447 * @v: value to write to the register
448 *
449 * Dummy register read function. Used for register blocks
450 * that certain asics don't have (all asics).
451 */
452static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
453{
454 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
455 reg, v);
456 BUG();
457}
458
d38ceaf9
AD
459/**
460 * amdgpu_block_invalid_rreg - dummy reg read function
461 *
462 * @adev: amdgpu device pointer
463 * @block: offset of instance
464 * @reg: offset of register
465 *
466 * Dummy register read function. Used for register blocks
467 * that certain asics don't have (all asics).
468 * Returns the value in the register.
469 */
470static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
471 uint32_t block, uint32_t reg)
472{
473 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
474 reg, block);
475 BUG();
476 return 0;
477}
478
479/**
480 * amdgpu_block_invalid_wreg - dummy reg write function
481 *
482 * @adev: amdgpu device pointer
483 * @block: offset of instance
484 * @reg: offset of register
485 * @v: value to write to the register
486 *
487 * Dummy register read function. Used for register blocks
488 * that certain asics don't have (all asics).
489 */
490static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
491 uint32_t block,
492 uint32_t reg, uint32_t v)
493{
494 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
495 reg, block, v);
496 BUG();
497}
498
e3ecdffa
AD
499/**
500 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
501 *
502 * @adev: amdgpu device pointer
503 *
504 * Allocates a scratch page of VRAM for use by various things in the
505 * driver.
506 */
06ec9070 507static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 508{
a4a02777
CK
509 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
510 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
511 &adev->vram_scratch.robj,
512 &adev->vram_scratch.gpu_addr,
513 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
514}
515
e3ecdffa
AD
516/**
517 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
518 *
519 * @adev: amdgpu device pointer
520 *
521 * Frees the VRAM scratch page.
522 */
06ec9070 523static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 524{
078af1a3 525 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
526}
527
528/**
9c3f2b54 529 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
530 *
531 * @adev: amdgpu_device pointer
532 * @registers: pointer to the register array
533 * @array_size: size of the register array
534 *
535 * Programs an array or registers with and and or masks.
536 * This is a helper for setting golden registers.
537 */
9c3f2b54
AD
538void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
539 const u32 *registers,
540 const u32 array_size)
d38ceaf9
AD
541{
542 u32 tmp, reg, and_mask, or_mask;
543 int i;
544
545 if (array_size % 3)
546 return;
547
548 for (i = 0; i < array_size; i +=3) {
549 reg = registers[i + 0];
550 and_mask = registers[i + 1];
551 or_mask = registers[i + 2];
552
553 if (and_mask == 0xffffffff) {
554 tmp = or_mask;
555 } else {
556 tmp = RREG32(reg);
557 tmp &= ~and_mask;
e0d07657
HZ
558 if (adev->family >= AMDGPU_FAMILY_AI)
559 tmp |= (or_mask & and_mask);
560 else
561 tmp |= or_mask;
d38ceaf9
AD
562 }
563 WREG32(reg, tmp);
564 }
565}
566
e3ecdffa
AD
567/**
568 * amdgpu_device_pci_config_reset - reset the GPU
569 *
570 * @adev: amdgpu_device pointer
571 *
572 * Resets the GPU using the pci config reset sequence.
573 * Only applicable to asics prior to vega10.
574 */
8111c387 575void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
576{
577 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
578}
579
580/*
581 * GPU doorbell aperture helpers function.
582 */
583/**
06ec9070 584 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
585 *
586 * @adev: amdgpu_device pointer
587 *
588 * Init doorbell driver information (CIK)
589 * Returns 0 on success, error on failure.
590 */
06ec9070 591static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 592{
6585661d 593
705e519e
CK
594 /* No doorbell on SI hardware generation */
595 if (adev->asic_type < CHIP_BONAIRE) {
596 adev->doorbell.base = 0;
597 adev->doorbell.size = 0;
598 adev->doorbell.num_doorbells = 0;
599 adev->doorbell.ptr = NULL;
600 return 0;
601 }
602
d6895ad3
CK
603 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
604 return -EINVAL;
605
22357775
AD
606 amdgpu_asic_init_doorbell_index(adev);
607
d38ceaf9
AD
608 /* doorbell bar mapping */
609 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
610 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
611
edf600da 612 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 613 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
614 if (adev->doorbell.num_doorbells == 0)
615 return -EINVAL;
616
ec3db8a6 617 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
618 * paging queue doorbell use the second page. The
619 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
620 * doorbells are in the first page. So with paging queue enabled,
621 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
622 */
623 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 624 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 625
8972e5d2
CK
626 adev->doorbell.ptr = ioremap(adev->doorbell.base,
627 adev->doorbell.num_doorbells *
628 sizeof(u32));
629 if (adev->doorbell.ptr == NULL)
d38ceaf9 630 return -ENOMEM;
d38ceaf9
AD
631
632 return 0;
633}
634
635/**
06ec9070 636 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
637 *
638 * @adev: amdgpu_device pointer
639 *
640 * Tear down doorbell driver information (CIK)
641 */
06ec9070 642static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
643{
644 iounmap(adev->doorbell.ptr);
645 adev->doorbell.ptr = NULL;
646}
647
22cb0164 648
d38ceaf9
AD
649
650/*
06ec9070 651 * amdgpu_device_wb_*()
455a7bc2 652 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 653 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
654 */
655
656/**
06ec9070 657 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
658 *
659 * @adev: amdgpu_device pointer
660 *
661 * Disables Writeback and frees the Writeback memory (all asics).
662 * Used at driver shutdown.
663 */
06ec9070 664static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
665{
666 if (adev->wb.wb_obj) {
a76ed485
AD
667 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
668 &adev->wb.gpu_addr,
669 (void **)&adev->wb.wb);
d38ceaf9
AD
670 adev->wb.wb_obj = NULL;
671 }
672}
673
674/**
06ec9070 675 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
676 *
677 * @adev: amdgpu_device pointer
678 *
455a7bc2 679 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
680 * Used at driver startup.
681 * Returns 0 on success or an -error on failure.
682 */
06ec9070 683static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
684{
685 int r;
686
687 if (adev->wb.wb_obj == NULL) {
97407b63
AD
688 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
689 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
690 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
691 &adev->wb.wb_obj, &adev->wb.gpu_addr,
692 (void **)&adev->wb.wb);
d38ceaf9
AD
693 if (r) {
694 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
695 return r;
696 }
d38ceaf9
AD
697
698 adev->wb.num_wb = AMDGPU_MAX_WB;
699 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
700
701 /* clear wb memory */
73469585 702 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
703 }
704
705 return 0;
706}
707
708/**
131b4b36 709 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
710 *
711 * @adev: amdgpu_device pointer
712 * @wb: wb index
713 *
714 * Allocate a wb slot for use by the driver (all asics).
715 * Returns 0 on success or -EINVAL on failure.
716 */
131b4b36 717int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
718{
719 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 720
97407b63 721 if (offset < adev->wb.num_wb) {
7014285a 722 __set_bit(offset, adev->wb.used);
63ae07ca 723 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
724 return 0;
725 } else {
726 return -EINVAL;
727 }
728}
729
d38ceaf9 730/**
131b4b36 731 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
732 *
733 * @adev: amdgpu_device pointer
734 * @wb: wb index
735 *
736 * Free a wb slot allocated for use by the driver (all asics)
737 */
131b4b36 738void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 739{
73469585 740 wb >>= 3;
d38ceaf9 741 if (wb < adev->wb.num_wb)
73469585 742 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
743}
744
d6895ad3
CK
745/**
746 * amdgpu_device_resize_fb_bar - try to resize FB BAR
747 *
748 * @adev: amdgpu_device pointer
749 *
750 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
751 * to fail, but if any of the BARs is not accessible after the size we abort
752 * driver loading by returning -ENODEV.
753 */
754int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
755{
770d13b1 756 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 757 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
758 struct pci_bus *root;
759 struct resource *res;
760 unsigned i;
d6895ad3
CK
761 u16 cmd;
762 int r;
763
0c03b912 764 /* Bypass for VF */
765 if (amdgpu_sriov_vf(adev))
766 return 0;
767
31b8adab
CK
768 /* Check if the root BUS has 64bit memory resources */
769 root = adev->pdev->bus;
770 while (root->parent)
771 root = root->parent;
772
773 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 774 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
775 res->start > 0x100000000ull)
776 break;
777 }
778
779 /* Trying to resize is pointless without a root hub window above 4GB */
780 if (!res)
781 return 0;
782
d6895ad3
CK
783 /* Disable memory decoding while we change the BAR addresses and size */
784 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
785 pci_write_config_word(adev->pdev, PCI_COMMAND,
786 cmd & ~PCI_COMMAND_MEMORY);
787
788 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 789 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
790 if (adev->asic_type >= CHIP_BONAIRE)
791 pci_release_resource(adev->pdev, 2);
792
793 pci_release_resource(adev->pdev, 0);
794
795 r = pci_resize_resource(adev->pdev, 0, rbar_size);
796 if (r == -ENOSPC)
797 DRM_INFO("Not enough PCI address space for a large BAR.");
798 else if (r && r != -ENOTSUPP)
799 DRM_ERROR("Problem resizing BAR0 (%d).", r);
800
801 pci_assign_unassigned_bus_resources(adev->pdev->bus);
802
803 /* When the doorbell or fb BAR isn't available we have no chance of
804 * using the device.
805 */
06ec9070 806 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
807 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
808 return -ENODEV;
809
810 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
811
812 return 0;
813}
a05502e5 814
d38ceaf9
AD
815/*
816 * GPU helpers function.
817 */
818/**
39c640c0 819 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
820 *
821 * @adev: amdgpu_device pointer
822 *
c836fec5
JQ
823 * Check if the asic has been initialized (all asics) at driver startup
824 * or post is needed if hw reset is performed.
825 * Returns true if need or false if not.
d38ceaf9 826 */
39c640c0 827bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
828{
829 uint32_t reg;
830
bec86378
ML
831 if (amdgpu_sriov_vf(adev))
832 return false;
833
834 if (amdgpu_passthrough(adev)) {
1da2c326
ML
835 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
836 * some old smc fw still need driver do vPost otherwise gpu hang, while
837 * those smc fw version above 22.15 doesn't have this flaw, so we force
838 * vpost executed for smc version below 22.15
bec86378
ML
839 */
840 if (adev->asic_type == CHIP_FIJI) {
841 int err;
842 uint32_t fw_ver;
843 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
844 /* force vPost if error occured */
845 if (err)
846 return true;
847
848 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
849 if (fw_ver < 0x00160e00)
850 return true;
bec86378 851 }
bec86378 852 }
91fe77eb 853
854 if (adev->has_hw_reset) {
855 adev->has_hw_reset = false;
856 return true;
857 }
858
859 /* bios scratch used on CIK+ */
860 if (adev->asic_type >= CHIP_BONAIRE)
861 return amdgpu_atombios_scratch_need_asic_init(adev);
862
863 /* check MEM_SIZE for older asics */
864 reg = amdgpu_asic_get_config_memsize(adev);
865
866 if ((reg != 0) && (reg != 0xffffffff))
867 return false;
868
869 return true;
bec86378
ML
870}
871
d38ceaf9
AD
872/* if we get transitioned to only one device, take VGA back */
873/**
06ec9070 874 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
875 *
876 * @cookie: amdgpu_device pointer
877 * @state: enable/disable vga decode
878 *
879 * Enable/disable vga decode (all asics).
880 * Returns VGA resource flags.
881 */
06ec9070 882static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
883{
884 struct amdgpu_device *adev = cookie;
885 amdgpu_asic_set_vga_state(adev, state);
886 if (state)
887 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
888 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
889 else
890 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
891}
892
e3ecdffa
AD
893/**
894 * amdgpu_device_check_block_size - validate the vm block size
895 *
896 * @adev: amdgpu_device pointer
897 *
898 * Validates the vm block size specified via module parameter.
899 * The vm block size defines number of bits in page table versus page directory,
900 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
901 * page table and the remaining bits are in the page directory.
902 */
06ec9070 903static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
904{
905 /* defines number of bits in page table versus page directory,
906 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
907 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
908 if (amdgpu_vm_block_size == -1)
909 return;
a1adf8be 910
bab4fee7 911 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
912 dev_warn(adev->dev, "VM page table size (%d) too small\n",
913 amdgpu_vm_block_size);
97489129 914 amdgpu_vm_block_size = -1;
a1adf8be 915 }
a1adf8be
CZ
916}
917
e3ecdffa
AD
918/**
919 * amdgpu_device_check_vm_size - validate the vm size
920 *
921 * @adev: amdgpu_device pointer
922 *
923 * Validates the vm size in GB specified via module parameter.
924 * The VM size is the size of the GPU virtual memory space in GB.
925 */
06ec9070 926static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 927{
64dab074
AD
928 /* no need to check the default value */
929 if (amdgpu_vm_size == -1)
930 return;
931
83ca145d
ZJ
932 if (amdgpu_vm_size < 1) {
933 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
934 amdgpu_vm_size);
f3368128 935 amdgpu_vm_size = -1;
83ca145d 936 }
83ca145d
ZJ
937}
938
7951e376
RZ
939static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
940{
941 struct sysinfo si;
942 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
943 uint64_t total_memory;
944 uint64_t dram_size_seven_GB = 0x1B8000000;
945 uint64_t dram_size_three_GB = 0xB8000000;
946
947 if (amdgpu_smu_memory_pool_size == 0)
948 return;
949
950 if (!is_os_64) {
951 DRM_WARN("Not 64-bit OS, feature not supported\n");
952 goto def_value;
953 }
954 si_meminfo(&si);
955 total_memory = (uint64_t)si.totalram * si.mem_unit;
956
957 if ((amdgpu_smu_memory_pool_size == 1) ||
958 (amdgpu_smu_memory_pool_size == 2)) {
959 if (total_memory < dram_size_three_GB)
960 goto def_value1;
961 } else if ((amdgpu_smu_memory_pool_size == 4) ||
962 (amdgpu_smu_memory_pool_size == 8)) {
963 if (total_memory < dram_size_seven_GB)
964 goto def_value1;
965 } else {
966 DRM_WARN("Smu memory pool size not supported\n");
967 goto def_value;
968 }
969 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
970
971 return;
972
973def_value1:
974 DRM_WARN("No enough system memory\n");
975def_value:
976 adev->pm.smu_prv_buffer_size = 0;
977}
978
d38ceaf9 979/**
06ec9070 980 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
981 *
982 * @adev: amdgpu_device pointer
983 *
984 * Validates certain module parameters and updates
985 * the associated values used by the driver (all asics).
986 */
912dfc84 987static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 988{
912dfc84
EQ
989 int ret = 0;
990
5b011235
CZ
991 if (amdgpu_sched_jobs < 4) {
992 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
993 amdgpu_sched_jobs);
994 amdgpu_sched_jobs = 4;
76117507 995 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
996 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
997 amdgpu_sched_jobs);
998 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
999 }
d38ceaf9 1000
83e74db6 1001 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1002 /* gart size must be greater or equal to 32M */
1003 dev_warn(adev->dev, "gart size (%d) too small\n",
1004 amdgpu_gart_size);
83e74db6 1005 amdgpu_gart_size = -1;
d38ceaf9
AD
1006 }
1007
36d38372 1008 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1009 /* gtt size must be greater or equal to 32M */
36d38372
CK
1010 dev_warn(adev->dev, "gtt size (%d) too small\n",
1011 amdgpu_gtt_size);
1012 amdgpu_gtt_size = -1;
d38ceaf9
AD
1013 }
1014
d07f14be
RH
1015 /* valid range is between 4 and 9 inclusive */
1016 if (amdgpu_vm_fragment_size != -1 &&
1017 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1018 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1019 amdgpu_vm_fragment_size = -1;
1020 }
1021
7951e376
RZ
1022 amdgpu_device_check_smu_prv_buffer_size(adev);
1023
06ec9070 1024 amdgpu_device_check_vm_size(adev);
d38ceaf9 1025
06ec9070 1026 amdgpu_device_check_block_size(adev);
6a7f76e7 1027
912dfc84
EQ
1028 ret = amdgpu_device_get_job_timeout_settings(adev);
1029 if (ret) {
1030 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
1031 return ret;
8854695a 1032 }
19aede77
AD
1033
1034 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1035
1036 return ret;
d38ceaf9
AD
1037}
1038
1039/**
1040 * amdgpu_switcheroo_set_state - set switcheroo state
1041 *
1042 * @pdev: pci dev pointer
1694467b 1043 * @state: vga_switcheroo state
d38ceaf9
AD
1044 *
1045 * Callback for the switcheroo driver. Suspends or resumes the
1046 * the asics before or after it is powered up using ACPI methods.
1047 */
1048static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1049{
1050 struct drm_device *dev = pci_get_drvdata(pdev);
1051
1052 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1053 return;
1054
1055 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1056 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1057 /* don't suspend or resume card normally */
1058 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1059
810ddc3a 1060 amdgpu_device_resume(dev, true, true);
d38ceaf9 1061
d38ceaf9
AD
1062 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1063 drm_kms_helper_poll_enable(dev);
1064 } else {
7ca85295 1065 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1066 drm_kms_helper_poll_disable(dev);
1067 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1068 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1069 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1070 }
1071}
1072
1073/**
1074 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1075 *
1076 * @pdev: pci dev pointer
1077 *
1078 * Callback for the switcheroo driver. Check of the switcheroo
1079 * state can be changed.
1080 * Returns true if the state can be changed, false if not.
1081 */
1082static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1083{
1084 struct drm_device *dev = pci_get_drvdata(pdev);
1085
1086 /*
1087 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1088 * locking inversion with the driver load path. And the access here is
1089 * completely racy anyway. So don't bother with locking for now.
1090 */
1091 return dev->open_count == 0;
1092}
1093
1094static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1095 .set_gpu_state = amdgpu_switcheroo_set_state,
1096 .reprobe = NULL,
1097 .can_switch = amdgpu_switcheroo_can_switch,
1098};
1099
e3ecdffa
AD
1100/**
1101 * amdgpu_device_ip_set_clockgating_state - set the CG state
1102 *
87e3f136 1103 * @dev: amdgpu_device pointer
e3ecdffa
AD
1104 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1105 * @state: clockgating state (gate or ungate)
1106 *
1107 * Sets the requested clockgating state for all instances of
1108 * the hardware IP specified.
1109 * Returns the error code from the last instance.
1110 */
43fa561f 1111int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1112 enum amd_ip_block_type block_type,
1113 enum amd_clockgating_state state)
d38ceaf9 1114{
43fa561f 1115 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1116 int i, r = 0;
1117
1118 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1119 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1120 continue;
c722865a
RZ
1121 if (adev->ip_blocks[i].version->type != block_type)
1122 continue;
1123 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1124 continue;
1125 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1126 (void *)adev, state);
1127 if (r)
1128 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1129 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1130 }
1131 return r;
1132}
1133
e3ecdffa
AD
1134/**
1135 * amdgpu_device_ip_set_powergating_state - set the PG state
1136 *
87e3f136 1137 * @dev: amdgpu_device pointer
e3ecdffa
AD
1138 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1139 * @state: powergating state (gate or ungate)
1140 *
1141 * Sets the requested powergating state for all instances of
1142 * the hardware IP specified.
1143 * Returns the error code from the last instance.
1144 */
43fa561f 1145int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1146 enum amd_ip_block_type block_type,
1147 enum amd_powergating_state state)
d38ceaf9 1148{
43fa561f 1149 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1150 int i, r = 0;
1151
1152 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1153 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1154 continue;
c722865a
RZ
1155 if (adev->ip_blocks[i].version->type != block_type)
1156 continue;
1157 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1158 continue;
1159 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1160 (void *)adev, state);
1161 if (r)
1162 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1163 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1164 }
1165 return r;
1166}
1167
e3ecdffa
AD
1168/**
1169 * amdgpu_device_ip_get_clockgating_state - get the CG state
1170 *
1171 * @adev: amdgpu_device pointer
1172 * @flags: clockgating feature flags
1173 *
1174 * Walks the list of IPs on the device and updates the clockgating
1175 * flags for each IP.
1176 * Updates @flags with the feature flags for each hardware IP where
1177 * clockgating is enabled.
1178 */
2990a1fc
AD
1179void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1180 u32 *flags)
6cb2d4e4
HR
1181{
1182 int i;
1183
1184 for (i = 0; i < adev->num_ip_blocks; i++) {
1185 if (!adev->ip_blocks[i].status.valid)
1186 continue;
1187 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1188 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1189 }
1190}
1191
e3ecdffa
AD
1192/**
1193 * amdgpu_device_ip_wait_for_idle - wait for idle
1194 *
1195 * @adev: amdgpu_device pointer
1196 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1197 *
1198 * Waits for the request hardware IP to be idle.
1199 * Returns 0 for success or a negative error code on failure.
1200 */
2990a1fc
AD
1201int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1202 enum amd_ip_block_type block_type)
5dbbb60b
AD
1203{
1204 int i, r;
1205
1206 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1207 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1208 continue;
a1255107
AD
1209 if (adev->ip_blocks[i].version->type == block_type) {
1210 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1211 if (r)
1212 return r;
1213 break;
1214 }
1215 }
1216 return 0;
1217
1218}
1219
e3ecdffa
AD
1220/**
1221 * amdgpu_device_ip_is_idle - is the hardware IP idle
1222 *
1223 * @adev: amdgpu_device pointer
1224 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1225 *
1226 * Check if the hardware IP is idle or not.
1227 * Returns true if it the IP is idle, false if not.
1228 */
2990a1fc
AD
1229bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1230 enum amd_ip_block_type block_type)
5dbbb60b
AD
1231{
1232 int i;
1233
1234 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1235 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1236 continue;
a1255107
AD
1237 if (adev->ip_blocks[i].version->type == block_type)
1238 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1239 }
1240 return true;
1241
1242}
1243
e3ecdffa
AD
1244/**
1245 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1246 *
1247 * @adev: amdgpu_device pointer
87e3f136 1248 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1249 *
1250 * Returns a pointer to the hardware IP block structure
1251 * if it exists for the asic, otherwise NULL.
1252 */
2990a1fc
AD
1253struct amdgpu_ip_block *
1254amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1255 enum amd_ip_block_type type)
d38ceaf9
AD
1256{
1257 int i;
1258
1259 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1260 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1261 return &adev->ip_blocks[i];
1262
1263 return NULL;
1264}
1265
1266/**
2990a1fc 1267 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1268 *
1269 * @adev: amdgpu_device pointer
5fc3aeeb 1270 * @type: enum amd_ip_block_type
d38ceaf9
AD
1271 * @major: major version
1272 * @minor: minor version
1273 *
1274 * return 0 if equal or greater
1275 * return 1 if smaller or the ip_block doesn't exist
1276 */
2990a1fc
AD
1277int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1278 enum amd_ip_block_type type,
1279 u32 major, u32 minor)
d38ceaf9 1280{
2990a1fc 1281 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1282
a1255107
AD
1283 if (ip_block && ((ip_block->version->major > major) ||
1284 ((ip_block->version->major == major) &&
1285 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1286 return 0;
1287
1288 return 1;
1289}
1290
a1255107 1291/**
2990a1fc 1292 * amdgpu_device_ip_block_add
a1255107
AD
1293 *
1294 * @adev: amdgpu_device pointer
1295 * @ip_block_version: pointer to the IP to add
1296 *
1297 * Adds the IP block driver information to the collection of IPs
1298 * on the asic.
1299 */
2990a1fc
AD
1300int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1301 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1302{
1303 if (!ip_block_version)
1304 return -EINVAL;
1305
e966a725 1306 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1307 ip_block_version->funcs->name);
1308
a1255107
AD
1309 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1310
1311 return 0;
1312}
1313
e3ecdffa
AD
1314/**
1315 * amdgpu_device_enable_virtual_display - enable virtual display feature
1316 *
1317 * @adev: amdgpu_device pointer
1318 *
1319 * Enabled the virtual display feature if the user has enabled it via
1320 * the module parameter virtual_display. This feature provides a virtual
1321 * display hardware on headless boards or in virtualized environments.
1322 * This function parses and validates the configuration string specified by
1323 * the user and configues the virtual display configuration (number of
1324 * virtual connectors, crtcs, etc.) specified.
1325 */
483ef985 1326static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1327{
1328 adev->enable_virtual_display = false;
1329
1330 if (amdgpu_virtual_display) {
1331 struct drm_device *ddev = adev->ddev;
1332 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1333 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1334
1335 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1336 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1337 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1338 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1339 if (!strcmp("all", pciaddname)
1340 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1341 long num_crtc;
1342 int res = -1;
1343
9accf2fd 1344 adev->enable_virtual_display = true;
0f66356d
ED
1345
1346 if (pciaddname_tmp)
1347 res = kstrtol(pciaddname_tmp, 10,
1348 &num_crtc);
1349
1350 if (!res) {
1351 if (num_crtc < 1)
1352 num_crtc = 1;
1353 if (num_crtc > 6)
1354 num_crtc = 6;
1355 adev->mode_info.num_crtc = num_crtc;
1356 } else {
1357 adev->mode_info.num_crtc = 1;
1358 }
9accf2fd
ED
1359 break;
1360 }
1361 }
1362
0f66356d
ED
1363 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1364 amdgpu_virtual_display, pci_address_name,
1365 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1366
1367 kfree(pciaddstr);
1368 }
1369}
1370
e3ecdffa
AD
1371/**
1372 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1373 *
1374 * @adev: amdgpu_device pointer
1375 *
1376 * Parses the asic configuration parameters specified in the gpu info
1377 * firmware and makes them availale to the driver for use in configuring
1378 * the asic.
1379 * Returns 0 on success, -EINVAL on failure.
1380 */
e2a75f88
AD
1381static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1382{
e2a75f88
AD
1383 const char *chip_name;
1384 char fw_name[30];
1385 int err;
1386 const struct gpu_info_firmware_header_v1_0 *hdr;
1387
ab4fe3e1
HR
1388 adev->firmware.gpu_info_fw = NULL;
1389
e2a75f88
AD
1390 switch (adev->asic_type) {
1391 case CHIP_TOPAZ:
1392 case CHIP_TONGA:
1393 case CHIP_FIJI:
e2a75f88 1394 case CHIP_POLARIS10:
cc07f18d 1395 case CHIP_POLARIS11:
e2a75f88 1396 case CHIP_POLARIS12:
cc07f18d 1397 case CHIP_VEGAM:
e2a75f88
AD
1398 case CHIP_CARRIZO:
1399 case CHIP_STONEY:
1400#ifdef CONFIG_DRM_AMDGPU_SI
1401 case CHIP_VERDE:
1402 case CHIP_TAHITI:
1403 case CHIP_PITCAIRN:
1404 case CHIP_OLAND:
1405 case CHIP_HAINAN:
1406#endif
1407#ifdef CONFIG_DRM_AMDGPU_CIK
1408 case CHIP_BONAIRE:
1409 case CHIP_HAWAII:
1410 case CHIP_KAVERI:
1411 case CHIP_KABINI:
1412 case CHIP_MULLINS:
1413#endif
27c0bc71 1414 case CHIP_VEGA20:
e2a75f88
AD
1415 default:
1416 return 0;
1417 case CHIP_VEGA10:
1418 chip_name = "vega10";
1419 break;
3f76dced
AD
1420 case CHIP_VEGA12:
1421 chip_name = "vega12";
1422 break;
2d2e5e7e 1423 case CHIP_RAVEN:
54c4d17e
FX
1424 if (adev->rev_id >= 8)
1425 chip_name = "raven2";
741deade
AD
1426 else if (adev->pdev->device == 0x15d8)
1427 chip_name = "picasso";
54c4d17e
FX
1428 else
1429 chip_name = "raven";
2d2e5e7e 1430 break;
65e60f6e
LM
1431 case CHIP_ARCTURUS:
1432 chip_name = "arcturus";
1433 break;
b51a26a0
HR
1434 case CHIP_RENOIR:
1435 chip_name = "renoir";
1436 break;
23c6268e
HR
1437 case CHIP_NAVI10:
1438 chip_name = "navi10";
1439 break;
ed42cfe1
XY
1440 case CHIP_NAVI14:
1441 chip_name = "navi14";
1442 break;
42b325e5
XY
1443 case CHIP_NAVI12:
1444 chip_name = "navi12";
1445 break;
e2a75f88
AD
1446 }
1447
1448 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1449 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1450 if (err) {
1451 dev_err(adev->dev,
1452 "Failed to load gpu_info firmware \"%s\"\n",
1453 fw_name);
1454 goto out;
1455 }
ab4fe3e1 1456 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1457 if (err) {
1458 dev_err(adev->dev,
1459 "Failed to validate gpu_info firmware \"%s\"\n",
1460 fw_name);
1461 goto out;
1462 }
1463
ab4fe3e1 1464 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1465 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1466
1467 switch (hdr->version_major) {
1468 case 1:
1469 {
1470 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1471 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1472 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1473
b5ab16bf
AD
1474 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1475 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1476 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1477 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1478 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1479 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1480 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1481 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1482 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1483 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1484 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1485 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1486 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1487 adev->gfx.cu_info.max_waves_per_simd =
1488 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1489 adev->gfx.cu_info.max_scratch_slots_per_cu =
1490 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1491 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1492 if (hdr->version_minor >= 1) {
35c2e910
HZ
1493 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1494 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1495 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1496 adev->gfx.config.num_sc_per_sh =
1497 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1498 adev->gfx.config.num_packer_per_sc =
1499 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1500 }
48321c3d
HW
1501#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1502 if (hdr->version_minor == 2) {
1503 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1504 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1505 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1506 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1507 }
1508#endif
e2a75f88
AD
1509 break;
1510 }
1511 default:
1512 dev_err(adev->dev,
1513 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1514 err = -EINVAL;
1515 goto out;
1516 }
1517out:
e2a75f88
AD
1518 return err;
1519}
1520
e3ecdffa
AD
1521/**
1522 * amdgpu_device_ip_early_init - run early init for hardware IPs
1523 *
1524 * @adev: amdgpu_device pointer
1525 *
1526 * Early initialization pass for hardware IPs. The hardware IPs that make
1527 * up each asic are discovered each IP's early_init callback is run. This
1528 * is the first stage in initializing the asic.
1529 * Returns 0 on success, negative error code on failure.
1530 */
06ec9070 1531static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1532{
aaa36a97 1533 int i, r;
d38ceaf9 1534
483ef985 1535 amdgpu_device_enable_virtual_display(adev);
a6be7570 1536
d38ceaf9 1537 switch (adev->asic_type) {
aaa36a97
AD
1538 case CHIP_TOPAZ:
1539 case CHIP_TONGA:
48299f95 1540 case CHIP_FIJI:
2cc0c0b5 1541 case CHIP_POLARIS10:
32cc7e53 1542 case CHIP_POLARIS11:
c4642a47 1543 case CHIP_POLARIS12:
32cc7e53 1544 case CHIP_VEGAM:
aaa36a97 1545 case CHIP_CARRIZO:
39bb0c92
SL
1546 case CHIP_STONEY:
1547 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1548 adev->family = AMDGPU_FAMILY_CZ;
1549 else
1550 adev->family = AMDGPU_FAMILY_VI;
1551
1552 r = vi_set_ip_blocks(adev);
1553 if (r)
1554 return r;
1555 break;
33f34802
KW
1556#ifdef CONFIG_DRM_AMDGPU_SI
1557 case CHIP_VERDE:
1558 case CHIP_TAHITI:
1559 case CHIP_PITCAIRN:
1560 case CHIP_OLAND:
1561 case CHIP_HAINAN:
295d0daf 1562 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1563 r = si_set_ip_blocks(adev);
1564 if (r)
1565 return r;
1566 break;
1567#endif
a2e73f56
AD
1568#ifdef CONFIG_DRM_AMDGPU_CIK
1569 case CHIP_BONAIRE:
1570 case CHIP_HAWAII:
1571 case CHIP_KAVERI:
1572 case CHIP_KABINI:
1573 case CHIP_MULLINS:
1574 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1575 adev->family = AMDGPU_FAMILY_CI;
1576 else
1577 adev->family = AMDGPU_FAMILY_KV;
1578
1579 r = cik_set_ip_blocks(adev);
1580 if (r)
1581 return r;
1582 break;
1583#endif
e48a3cd9
AD
1584 case CHIP_VEGA10:
1585 case CHIP_VEGA12:
e4bd8170 1586 case CHIP_VEGA20:
e48a3cd9 1587 case CHIP_RAVEN:
61cf44c1 1588 case CHIP_ARCTURUS:
b51a26a0
HR
1589 case CHIP_RENOIR:
1590 if (adev->asic_type == CHIP_RAVEN ||
1591 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1592 adev->family = AMDGPU_FAMILY_RV;
1593 else
1594 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1595
1596 r = soc15_set_ip_blocks(adev);
1597 if (r)
1598 return r;
1599 break;
0a5b8c7b 1600 case CHIP_NAVI10:
7ecb5cd4 1601 case CHIP_NAVI14:
4808cf9c 1602 case CHIP_NAVI12:
0a5b8c7b
HR
1603 adev->family = AMDGPU_FAMILY_NV;
1604
1605 r = nv_set_ip_blocks(adev);
1606 if (r)
1607 return r;
1608 break;
d38ceaf9
AD
1609 default:
1610 /* FIXME: not supported yet */
1611 return -EINVAL;
1612 }
1613
e2a75f88
AD
1614 r = amdgpu_device_parse_gpu_info_fw(adev);
1615 if (r)
1616 return r;
1617
1884734a 1618 amdgpu_amdkfd_device_probe(adev);
1619
3149d9da
XY
1620 if (amdgpu_sriov_vf(adev)) {
1621 r = amdgpu_virt_request_full_gpu(adev, true);
1622 if (r)
5ffa61c1 1623 return -EAGAIN;
3149d9da
XY
1624 }
1625
3b94fb10 1626 adev->pm.pp_feature = amdgpu_pp_feature_mask;
00544006
HR
1627 if (amdgpu_sriov_vf(adev))
1628 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1629
d38ceaf9
AD
1630 for (i = 0; i < adev->num_ip_blocks; i++) {
1631 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1632 DRM_ERROR("disabled ip block: %d <%s>\n",
1633 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1634 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1635 } else {
a1255107
AD
1636 if (adev->ip_blocks[i].version->funcs->early_init) {
1637 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1638 if (r == -ENOENT) {
a1255107 1639 adev->ip_blocks[i].status.valid = false;
2c1a2784 1640 } else if (r) {
a1255107
AD
1641 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1642 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1643 return r;
2c1a2784 1644 } else {
a1255107 1645 adev->ip_blocks[i].status.valid = true;
2c1a2784 1646 }
974e6b64 1647 } else {
a1255107 1648 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1649 }
d38ceaf9 1650 }
21a249ca
AD
1651 /* get the vbios after the asic_funcs are set up */
1652 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1653 /* Read BIOS */
1654 if (!amdgpu_get_bios(adev))
1655 return -EINVAL;
1656
1657 r = amdgpu_atombios_init(adev);
1658 if (r) {
1659 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1660 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1661 return r;
1662 }
1663 }
d38ceaf9
AD
1664 }
1665
395d1fb9
NH
1666 adev->cg_flags &= amdgpu_cg_mask;
1667 adev->pg_flags &= amdgpu_pg_mask;
1668
d38ceaf9
AD
1669 return 0;
1670}
1671
0a4f2520
RZ
1672static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1673{
1674 int i, r;
1675
1676 for (i = 0; i < adev->num_ip_blocks; i++) {
1677 if (!adev->ip_blocks[i].status.sw)
1678 continue;
1679 if (adev->ip_blocks[i].status.hw)
1680 continue;
1681 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1682 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1683 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1684 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1685 if (r) {
1686 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1687 adev->ip_blocks[i].version->funcs->name, r);
1688 return r;
1689 }
1690 adev->ip_blocks[i].status.hw = true;
1691 }
1692 }
1693
1694 return 0;
1695}
1696
1697static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1698{
1699 int i, r;
1700
1701 for (i = 0; i < adev->num_ip_blocks; i++) {
1702 if (!adev->ip_blocks[i].status.sw)
1703 continue;
1704 if (adev->ip_blocks[i].status.hw)
1705 continue;
1706 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1707 if (r) {
1708 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1709 adev->ip_blocks[i].version->funcs->name, r);
1710 return r;
1711 }
1712 adev->ip_blocks[i].status.hw = true;
1713 }
1714
1715 return 0;
1716}
1717
7a3e0bb2
RZ
1718static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1719{
1720 int r = 0;
1721 int i;
80f41f84 1722 uint32_t smu_version;
7a3e0bb2
RZ
1723
1724 if (adev->asic_type >= CHIP_VEGA10) {
1725 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1726 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1727 continue;
1728
1729 /* no need to do the fw loading again if already done*/
1730 if (adev->ip_blocks[i].status.hw == true)
1731 break;
1732
1733 if (adev->in_gpu_reset || adev->in_suspend) {
1734 r = adev->ip_blocks[i].version->funcs->resume(adev);
1735 if (r) {
1736 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1737 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1738 return r;
1739 }
1740 } else {
1741 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1742 if (r) {
1743 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1744 adev->ip_blocks[i].version->funcs->name, r);
1745 return r;
7a3e0bb2 1746 }
7a3e0bb2 1747 }
482f0e53
ML
1748
1749 adev->ip_blocks[i].status.hw = true;
1750 break;
7a3e0bb2
RZ
1751 }
1752 }
482f0e53 1753
80f41f84 1754 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1755
80f41f84 1756 return r;
7a3e0bb2
RZ
1757}
1758
e3ecdffa
AD
1759/**
1760 * amdgpu_device_ip_init - run init for hardware IPs
1761 *
1762 * @adev: amdgpu_device pointer
1763 *
1764 * Main initialization pass for hardware IPs. The list of all the hardware
1765 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1766 * are run. sw_init initializes the software state associated with each IP
1767 * and hw_init initializes the hardware associated with each IP.
1768 * Returns 0 on success, negative error code on failure.
1769 */
06ec9070 1770static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1771{
1772 int i, r;
1773
c030f2e4 1774 r = amdgpu_ras_init(adev);
1775 if (r)
1776 return r;
1777
d38ceaf9 1778 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1779 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1780 continue;
a1255107 1781 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1782 if (r) {
a1255107
AD
1783 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1784 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1785 goto init_failed;
2c1a2784 1786 }
a1255107 1787 adev->ip_blocks[i].status.sw = true;
bfca0289 1788
d38ceaf9 1789 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1790 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1791 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1792 if (r) {
1793 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1794 goto init_failed;
2c1a2784 1795 }
a1255107 1796 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1797 if (r) {
1798 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1799 goto init_failed;
2c1a2784 1800 }
06ec9070 1801 r = amdgpu_device_wb_init(adev);
2c1a2784 1802 if (r) {
06ec9070 1803 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1804 goto init_failed;
2c1a2784 1805 }
a1255107 1806 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1807
1808 /* right after GMC hw init, we create CSA */
f92d5c61 1809 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1810 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1811 AMDGPU_GEM_DOMAIN_VRAM,
1812 AMDGPU_CSA_SIZE);
2493664f
ML
1813 if (r) {
1814 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1815 goto init_failed;
2493664f
ML
1816 }
1817 }
d38ceaf9
AD
1818 }
1819 }
1820
533aed27
AG
1821 r = amdgpu_ib_pool_init(adev);
1822 if (r) {
1823 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1824 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1825 goto init_failed;
1826 }
1827
c8963ea4
RZ
1828 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1829 if (r)
72d3f592 1830 goto init_failed;
0a4f2520
RZ
1831
1832 r = amdgpu_device_ip_hw_init_phase1(adev);
1833 if (r)
72d3f592 1834 goto init_failed;
0a4f2520 1835
7a3e0bb2
RZ
1836 r = amdgpu_device_fw_loading(adev);
1837 if (r)
72d3f592 1838 goto init_failed;
7a3e0bb2 1839
0a4f2520
RZ
1840 r = amdgpu_device_ip_hw_init_phase2(adev);
1841 if (r)
72d3f592 1842 goto init_failed;
d38ceaf9 1843
3e2e2ab5
HZ
1844 if (adev->gmc.xgmi.num_physical_nodes > 1)
1845 amdgpu_xgmi_add_device(adev);
1884734a 1846 amdgpu_amdkfd_device_init(adev);
c6332b97 1847
72d3f592 1848init_failed:
d3c117e5 1849 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1850 if (!r)
1851 amdgpu_virt_init_data_exchange(adev);
c6332b97 1852 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1853 }
c6332b97 1854
72d3f592 1855 return r;
d38ceaf9
AD
1856}
1857
e3ecdffa
AD
1858/**
1859 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1860 *
1861 * @adev: amdgpu_device pointer
1862 *
1863 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1864 * this function before a GPU reset. If the value is retained after a
1865 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1866 */
06ec9070 1867static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1868{
1869 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1870}
1871
e3ecdffa
AD
1872/**
1873 * amdgpu_device_check_vram_lost - check if vram is valid
1874 *
1875 * @adev: amdgpu_device pointer
1876 *
1877 * Checks the reset magic value written to the gart pointer in VRAM.
1878 * The driver calls this after a GPU reset to see if the contents of
1879 * VRAM is lost or now.
1880 * returns true if vram is lost, false if not.
1881 */
06ec9070 1882static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1883{
1884 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1885 AMDGPU_RESET_MAGIC_NUM);
1886}
1887
e3ecdffa 1888/**
1112a46b 1889 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1890 *
1891 * @adev: amdgpu_device pointer
1892 *
e3ecdffa 1893 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1894 * set_clockgating_state callbacks are run.
1895 * Late initialization pass enabling clockgating for hardware IPs.
1896 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1897 * Returns 0 on success, negative error code on failure.
1898 */
fdd34271 1899
1112a46b
RZ
1900static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1901 enum amd_clockgating_state state)
d38ceaf9 1902{
1112a46b 1903 int i, j, r;
d38ceaf9 1904
4a2ba394
SL
1905 if (amdgpu_emu_mode == 1)
1906 return 0;
1907
1112a46b
RZ
1908 for (j = 0; j < adev->num_ip_blocks; j++) {
1909 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1910 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1911 continue;
4a446d55 1912 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1913 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1914 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1915 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1916 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1917 /* enable clockgating to save power */
a1255107 1918 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1919 state);
4a446d55
AD
1920 if (r) {
1921 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1922 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1923 return r;
1924 }
b0b00ff1 1925 }
d38ceaf9 1926 }
06b18f61 1927
c9f96fd5
RZ
1928 return 0;
1929}
1930
1112a46b 1931static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1932{
1112a46b 1933 int i, j, r;
06b18f61 1934
c9f96fd5
RZ
1935 if (amdgpu_emu_mode == 1)
1936 return 0;
1937
1112a46b
RZ
1938 for (j = 0; j < adev->num_ip_blocks; j++) {
1939 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1940 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1941 continue;
1942 /* skip CG for VCE/UVD, it's handled specially */
1943 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1944 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1945 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1946 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1947 /* enable powergating to save power */
1948 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1949 state);
c9f96fd5
RZ
1950 if (r) {
1951 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1952 adev->ip_blocks[i].version->funcs->name, r);
1953 return r;
1954 }
1955 }
1956 }
2dc80b00
S
1957 return 0;
1958}
1959
beff74bc
AD
1960static int amdgpu_device_enable_mgpu_fan_boost(void)
1961{
1962 struct amdgpu_gpu_instance *gpu_ins;
1963 struct amdgpu_device *adev;
1964 int i, ret = 0;
1965
1966 mutex_lock(&mgpu_info.mutex);
1967
1968 /*
1969 * MGPU fan boost feature should be enabled
1970 * only when there are two or more dGPUs in
1971 * the system
1972 */
1973 if (mgpu_info.num_dgpu < 2)
1974 goto out;
1975
1976 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1977 gpu_ins = &(mgpu_info.gpu_ins[i]);
1978 adev = gpu_ins->adev;
1979 if (!(adev->flags & AMD_IS_APU) &&
1980 !gpu_ins->mgpu_fan_enabled &&
1981 adev->powerplay.pp_funcs &&
1982 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1983 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1984 if (ret)
1985 break;
1986
1987 gpu_ins->mgpu_fan_enabled = 1;
1988 }
1989 }
1990
1991out:
1992 mutex_unlock(&mgpu_info.mutex);
1993
1994 return ret;
1995}
1996
e3ecdffa
AD
1997/**
1998 * amdgpu_device_ip_late_init - run late init for hardware IPs
1999 *
2000 * @adev: amdgpu_device pointer
2001 *
2002 * Late initialization pass for hardware IPs. The list of all the hardware
2003 * IPs that make up the asic is walked and the late_init callbacks are run.
2004 * late_init covers any special initialization that an IP requires
2005 * after all of the have been initialized or something that needs to happen
2006 * late in the init process.
2007 * Returns 0 on success, negative error code on failure.
2008 */
06ec9070 2009static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
2010{
2011 int i = 0, r;
2012
2013 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2014 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2015 continue;
2016 if (adev->ip_blocks[i].version->funcs->late_init) {
2017 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2018 if (r) {
2019 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2020 adev->ip_blocks[i].version->funcs->name, r);
2021 return r;
2022 }
2dc80b00 2023 }
73f847db 2024 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2025 }
2026
1112a46b
RZ
2027 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2028 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2029
06ec9070 2030 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2031
beff74bc
AD
2032 r = amdgpu_device_enable_mgpu_fan_boost();
2033 if (r)
2034 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2035
2036 /* set to low pstate by default */
2037 amdgpu_xgmi_set_pstate(adev, 0);
2038
d38ceaf9
AD
2039 return 0;
2040}
2041
e3ecdffa
AD
2042/**
2043 * amdgpu_device_ip_fini - run fini for hardware IPs
2044 *
2045 * @adev: amdgpu_device pointer
2046 *
2047 * Main teardown pass for hardware IPs. The list of all the hardware
2048 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2049 * are run. hw_fini tears down the hardware associated with each IP
2050 * and sw_fini tears down any software state associated with each IP.
2051 * Returns 0 on success, negative error code on failure.
2052 */
06ec9070 2053static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2054{
2055 int i, r;
2056
c030f2e4 2057 amdgpu_ras_pre_fini(adev);
2058
a82400b5
AG
2059 if (adev->gmc.xgmi.num_physical_nodes > 1)
2060 amdgpu_xgmi_remove_device(adev);
2061
1884734a 2062 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2063
2064 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2065 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2066
3e96dbfd
AD
2067 /* need to disable SMC first */
2068 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2069 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2070 continue;
fdd34271 2071 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2072 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2073 /* XXX handle errors */
2074 if (r) {
2075 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2076 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2077 }
a1255107 2078 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2079 break;
2080 }
2081 }
2082
d38ceaf9 2083 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2084 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2085 continue;
8201a67a 2086
a1255107 2087 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2088 /* XXX handle errors */
2c1a2784 2089 if (r) {
a1255107
AD
2090 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2091 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2092 }
8201a67a 2093
a1255107 2094 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2095 }
2096
9950cda2 2097
d38ceaf9 2098 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2099 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2100 continue;
c12aba3a
ML
2101
2102 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2103 amdgpu_ucode_free_bo(adev);
1e256e27 2104 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2105 amdgpu_device_wb_fini(adev);
2106 amdgpu_device_vram_scratch_fini(adev);
533aed27 2107 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2108 }
2109
a1255107 2110 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2111 /* XXX handle errors */
2c1a2784 2112 if (r) {
a1255107
AD
2113 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2114 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2115 }
a1255107
AD
2116 adev->ip_blocks[i].status.sw = false;
2117 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2118 }
2119
a6dcfd9c 2120 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2121 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2122 continue;
a1255107
AD
2123 if (adev->ip_blocks[i].version->funcs->late_fini)
2124 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2125 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2126 }
2127
c030f2e4 2128 amdgpu_ras_fini(adev);
2129
030308fc 2130 if (amdgpu_sriov_vf(adev))
24136135
ML
2131 if (amdgpu_virt_release_full_gpu(adev, false))
2132 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2133
d38ceaf9
AD
2134 return 0;
2135}
2136
e3ecdffa 2137/**
beff74bc 2138 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2139 *
1112a46b 2140 * @work: work_struct.
e3ecdffa 2141 */
beff74bc 2142static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2143{
2144 struct amdgpu_device *adev =
beff74bc 2145 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2146 int r;
2147
2148 r = amdgpu_ib_ring_tests(adev);
2149 if (r)
2150 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2151}
2152
1e317b99
RZ
2153static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2154{
2155 struct amdgpu_device *adev =
2156 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2157
2158 mutex_lock(&adev->gfx.gfx_off_mutex);
2159 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2160 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2161 adev->gfx.gfx_off_state = true;
2162 }
2163 mutex_unlock(&adev->gfx.gfx_off_mutex);
2164}
2165
e3ecdffa 2166/**
e7854a03 2167 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2168 *
2169 * @adev: amdgpu_device pointer
2170 *
2171 * Main suspend function for hardware IPs. The list of all the hardware
2172 * IPs that make up the asic is walked, clockgating is disabled and the
2173 * suspend callbacks are run. suspend puts the hardware and software state
2174 * in each IP into a state suitable for suspend.
2175 * Returns 0 on success, negative error code on failure.
2176 */
e7854a03
AD
2177static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2178{
2179 int i, r;
2180
05df1f01 2181 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2182 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2183
e7854a03
AD
2184 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2185 if (!adev->ip_blocks[i].status.valid)
2186 continue;
2187 /* displays are handled separately */
2188 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2189 /* XXX handle errors */
2190 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2191 /* XXX handle errors */
2192 if (r) {
2193 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2194 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2195 return r;
e7854a03 2196 }
482f0e53 2197 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2198 }
2199 }
2200
e7854a03
AD
2201 return 0;
2202}
2203
2204/**
2205 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2206 *
2207 * @adev: amdgpu_device pointer
2208 *
2209 * Main suspend function for hardware IPs. The list of all the hardware
2210 * IPs that make up the asic is walked, clockgating is disabled and the
2211 * suspend callbacks are run. suspend puts the hardware and software state
2212 * in each IP into a state suitable for suspend.
2213 * Returns 0 on success, negative error code on failure.
2214 */
2215static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2216{
2217 int i, r;
2218
2219 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2220 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2221 continue;
e7854a03
AD
2222 /* displays are handled in phase1 */
2223 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2224 continue;
d38ceaf9 2225 /* XXX handle errors */
a1255107 2226 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2227 /* XXX handle errors */
2c1a2784 2228 if (r) {
a1255107
AD
2229 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2230 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2231 }
876923fb 2232 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2233 /* handle putting the SMC in the appropriate state */
2234 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2235 if (is_support_sw_smu(adev)) {
2236 /* todo */
2237 } else if (adev->powerplay.pp_funcs &&
482f0e53 2238 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2239 r = adev->powerplay.pp_funcs->set_mp1_state(
2240 adev->powerplay.pp_handle,
2241 adev->mp1_state);
2242 if (r) {
2243 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2244 adev->mp1_state, r);
482f0e53 2245 return r;
a3a09142
AD
2246 }
2247 }
2248 }
b5507c7e
AG
2249
2250 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2251 }
2252
2253 return 0;
2254}
2255
e7854a03
AD
2256/**
2257 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2258 *
2259 * @adev: amdgpu_device pointer
2260 *
2261 * Main suspend function for hardware IPs. The list of all the hardware
2262 * IPs that make up the asic is walked, clockgating is disabled and the
2263 * suspend callbacks are run. suspend puts the hardware and software state
2264 * in each IP into a state suitable for suspend.
2265 * Returns 0 on success, negative error code on failure.
2266 */
2267int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2268{
2269 int r;
2270
e7819644
YT
2271 if (amdgpu_sriov_vf(adev))
2272 amdgpu_virt_request_full_gpu(adev, false);
2273
e7854a03
AD
2274 r = amdgpu_device_ip_suspend_phase1(adev);
2275 if (r)
2276 return r;
2277 r = amdgpu_device_ip_suspend_phase2(adev);
2278
e7819644
YT
2279 if (amdgpu_sriov_vf(adev))
2280 amdgpu_virt_release_full_gpu(adev, false);
2281
e7854a03
AD
2282 return r;
2283}
2284
06ec9070 2285static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2286{
2287 int i, r;
2288
2cb681b6
ML
2289 static enum amd_ip_block_type ip_order[] = {
2290 AMD_IP_BLOCK_TYPE_GMC,
2291 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2292 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2293 AMD_IP_BLOCK_TYPE_IH,
2294 };
a90ad3c2 2295
2cb681b6
ML
2296 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2297 int j;
2298 struct amdgpu_ip_block *block;
a90ad3c2 2299
2cb681b6
ML
2300 for (j = 0; j < adev->num_ip_blocks; j++) {
2301 block = &adev->ip_blocks[j];
2302
482f0e53 2303 block->status.hw = false;
2cb681b6
ML
2304 if (block->version->type != ip_order[i] ||
2305 !block->status.valid)
2306 continue;
2307
2308 r = block->version->funcs->hw_init(adev);
0aaeefcc 2309 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2310 if (r)
2311 return r;
482f0e53 2312 block->status.hw = true;
a90ad3c2
ML
2313 }
2314 }
2315
2316 return 0;
2317}
2318
06ec9070 2319static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2320{
2321 int i, r;
2322
2cb681b6
ML
2323 static enum amd_ip_block_type ip_order[] = {
2324 AMD_IP_BLOCK_TYPE_SMC,
2325 AMD_IP_BLOCK_TYPE_DCE,
2326 AMD_IP_BLOCK_TYPE_GFX,
2327 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2328 AMD_IP_BLOCK_TYPE_UVD,
2329 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2330 };
a90ad3c2 2331
2cb681b6
ML
2332 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2333 int j;
2334 struct amdgpu_ip_block *block;
a90ad3c2 2335
2cb681b6
ML
2336 for (j = 0; j < adev->num_ip_blocks; j++) {
2337 block = &adev->ip_blocks[j];
2338
2339 if (block->version->type != ip_order[i] ||
482f0e53
ML
2340 !block->status.valid ||
2341 block->status.hw)
2cb681b6
ML
2342 continue;
2343
2344 r = block->version->funcs->hw_init(adev);
0aaeefcc 2345 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2346 if (r)
2347 return r;
482f0e53 2348 block->status.hw = true;
a90ad3c2
ML
2349 }
2350 }
2351
2352 return 0;
2353}
2354
e3ecdffa
AD
2355/**
2356 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2357 *
2358 * @adev: amdgpu_device pointer
2359 *
2360 * First resume function for hardware IPs. The list of all the hardware
2361 * IPs that make up the asic is walked and the resume callbacks are run for
2362 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2363 * after a suspend and updates the software state as necessary. This
2364 * function is also used for restoring the GPU after a GPU reset.
2365 * Returns 0 on success, negative error code on failure.
2366 */
06ec9070 2367static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2368{
2369 int i, r;
2370
a90ad3c2 2371 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2372 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2373 continue;
a90ad3c2 2374 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2375 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2376 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2377
fcf0649f
CZ
2378 r = adev->ip_blocks[i].version->funcs->resume(adev);
2379 if (r) {
2380 DRM_ERROR("resume of IP block <%s> failed %d\n",
2381 adev->ip_blocks[i].version->funcs->name, r);
2382 return r;
2383 }
482f0e53 2384 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2385 }
2386 }
2387
2388 return 0;
2389}
2390
e3ecdffa
AD
2391/**
2392 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2393 *
2394 * @adev: amdgpu_device pointer
2395 *
2396 * First resume function for hardware IPs. The list of all the hardware
2397 * IPs that make up the asic is walked and the resume callbacks are run for
2398 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2399 * functional state after a suspend and updates the software state as
2400 * necessary. This function is also used for restoring the GPU after a GPU
2401 * reset.
2402 * Returns 0 on success, negative error code on failure.
2403 */
06ec9070 2404static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2405{
2406 int i, r;
2407
2408 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2409 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2410 continue;
fcf0649f 2411 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2412 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2413 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2414 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2415 continue;
a1255107 2416 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2417 if (r) {
a1255107
AD
2418 DRM_ERROR("resume of IP block <%s> failed %d\n",
2419 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2420 return r;
2c1a2784 2421 }
482f0e53 2422 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2423 }
2424
2425 return 0;
2426}
2427
e3ecdffa
AD
2428/**
2429 * amdgpu_device_ip_resume - run resume for hardware IPs
2430 *
2431 * @adev: amdgpu_device pointer
2432 *
2433 * Main resume function for hardware IPs. The hardware IPs
2434 * are split into two resume functions because they are
2435 * are also used in in recovering from a GPU reset and some additional
2436 * steps need to be take between them. In this case (S3/S4) they are
2437 * run sequentially.
2438 * Returns 0 on success, negative error code on failure.
2439 */
06ec9070 2440static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2441{
2442 int r;
2443
06ec9070 2444 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2445 if (r)
2446 return r;
7a3e0bb2
RZ
2447
2448 r = amdgpu_device_fw_loading(adev);
2449 if (r)
2450 return r;
2451
06ec9070 2452 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2453
2454 return r;
2455}
2456
e3ecdffa
AD
2457/**
2458 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2459 *
2460 * @adev: amdgpu_device pointer
2461 *
2462 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2463 */
4e99a44e 2464static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2465{
6867e1b5
ML
2466 if (amdgpu_sriov_vf(adev)) {
2467 if (adev->is_atom_fw) {
2468 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2469 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2470 } else {
2471 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2472 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2473 }
2474
2475 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2476 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2477 }
048765ad
AR
2478}
2479
e3ecdffa
AD
2480/**
2481 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2482 *
2483 * @asic_type: AMD asic type
2484 *
2485 * Check if there is DC (new modesetting infrastructre) support for an asic.
2486 * returns true if DC has support, false if not.
2487 */
4562236b
HW
2488bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2489{
2490 switch (asic_type) {
2491#if defined(CONFIG_DRM_AMD_DC)
2492 case CHIP_BONAIRE:
0d6fbccb 2493 case CHIP_KAVERI:
367e6687
AD
2494 case CHIP_KABINI:
2495 case CHIP_MULLINS:
d9fda248
HW
2496 /*
2497 * We have systems in the wild with these ASICs that require
2498 * LVDS and VGA support which is not supported with DC.
2499 *
2500 * Fallback to the non-DC driver here by default so as not to
2501 * cause regressions.
2502 */
2503 return amdgpu_dc > 0;
2504 case CHIP_HAWAII:
4562236b
HW
2505 case CHIP_CARRIZO:
2506 case CHIP_STONEY:
4562236b 2507 case CHIP_POLARIS10:
675fd32b 2508 case CHIP_POLARIS11:
2c8ad2d5 2509 case CHIP_POLARIS12:
675fd32b 2510 case CHIP_VEGAM:
4562236b
HW
2511 case CHIP_TONGA:
2512 case CHIP_FIJI:
42f8ffa1 2513 case CHIP_VEGA10:
dca7b401 2514 case CHIP_VEGA12:
c6034aa2 2515 case CHIP_VEGA20:
dc37a9a0 2516#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2517 case CHIP_RAVEN:
b4f199c7
HW
2518#endif
2519#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2520 case CHIP_NAVI10:
8fceceb6 2521 case CHIP_NAVI14:
078655d9 2522 case CHIP_NAVI12:
e1c14c43
RL
2523#endif
2524#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
2525 case CHIP_RENOIR:
42f8ffa1 2526#endif
fd187853 2527 return amdgpu_dc != 0;
4562236b
HW
2528#endif
2529 default:
2530 return false;
2531 }
2532}
2533
2534/**
2535 * amdgpu_device_has_dc_support - check if dc is supported
2536 *
2537 * @adev: amdgpu_device_pointer
2538 *
2539 * Returns true for supported, false for not supported
2540 */
2541bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2542{
2555039d
XY
2543 if (amdgpu_sriov_vf(adev))
2544 return false;
2545
4562236b
HW
2546 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2547}
2548
d4535e2c
AG
2549
2550static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2551{
2552 struct amdgpu_device *adev =
2553 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2554
2555 adev->asic_reset_res = amdgpu_asic_reset(adev);
2556 if (adev->asic_reset_res)
fed184e9 2557 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2558 adev->asic_reset_res, adev->ddev->unique);
2559}
2560
2561
d38ceaf9
AD
2562/**
2563 * amdgpu_device_init - initialize the driver
2564 *
2565 * @adev: amdgpu_device pointer
87e3f136 2566 * @ddev: drm dev pointer
d38ceaf9
AD
2567 * @pdev: pci dev pointer
2568 * @flags: driver flags
2569 *
2570 * Initializes the driver info and hw (all asics).
2571 * Returns 0 for success or an error on failure.
2572 * Called at driver startup.
2573 */
2574int amdgpu_device_init(struct amdgpu_device *adev,
2575 struct drm_device *ddev,
2576 struct pci_dev *pdev,
2577 uint32_t flags)
2578{
2579 int r, i;
2580 bool runtime = false;
95844d20 2581 u32 max_MBps;
d38ceaf9
AD
2582
2583 adev->shutdown = false;
2584 adev->dev = &pdev->dev;
2585 adev->ddev = ddev;
2586 adev->pdev = pdev;
2587 adev->flags = flags;
2f7d10b3 2588 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2589 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2590 if (amdgpu_emu_mode == 1)
2591 adev->usec_timeout *= 2;
770d13b1 2592 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2593 adev->accel_working = false;
2594 adev->num_rings = 0;
2595 adev->mman.buffer_funcs = NULL;
2596 adev->mman.buffer_funcs_ring = NULL;
2597 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2598 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2599 adev->gmc.gmc_funcs = NULL;
f54d1867 2600 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2601 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2602
2603 adev->smc_rreg = &amdgpu_invalid_rreg;
2604 adev->smc_wreg = &amdgpu_invalid_wreg;
2605 adev->pcie_rreg = &amdgpu_invalid_rreg;
2606 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2607 adev->pciep_rreg = &amdgpu_invalid_rreg;
2608 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2609 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2610 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2611 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2612 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2613 adev->didt_rreg = &amdgpu_invalid_rreg;
2614 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2615 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2616 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2617 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2618 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2619
3e39ab90
AD
2620 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2621 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2622 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2623
2624 /* mutex initialization are all done here so we
2625 * can recall function without having locking issues */
d38ceaf9 2626 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2627 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2628 mutex_init(&adev->pm.mutex);
2629 mutex_init(&adev->gfx.gpu_clock_mutex);
2630 mutex_init(&adev->srbm_mutex);
b8866c26 2631 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2632 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2633 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2634 mutex_init(&adev->mn_lock);
e23b74aa 2635 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2636 hash_init(adev->mn_hash);
13a752e3 2637 mutex_init(&adev->lock_reset);
bb5a2bdf 2638 mutex_init(&adev->virt.dpm_mutex);
32eaeae0 2639 mutex_init(&adev->psp.mutex);
d38ceaf9 2640
912dfc84
EQ
2641 r = amdgpu_device_check_arguments(adev);
2642 if (r)
2643 return r;
d38ceaf9 2644
d38ceaf9
AD
2645 spin_lock_init(&adev->mmio_idx_lock);
2646 spin_lock_init(&adev->smc_idx_lock);
2647 spin_lock_init(&adev->pcie_idx_lock);
2648 spin_lock_init(&adev->uvd_ctx_idx_lock);
2649 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2650 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2651 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2652 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2653 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2654
0c4e7fa5
CZ
2655 INIT_LIST_HEAD(&adev->shadow_list);
2656 mutex_init(&adev->shadow_list_lock);
2657
795f2813
AR
2658 INIT_LIST_HEAD(&adev->ring_lru_list);
2659 spin_lock_init(&adev->ring_lru_list_lock);
2660
beff74bc
AD
2661 INIT_DELAYED_WORK(&adev->delayed_init_work,
2662 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2663 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2664 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2665
d4535e2c
AG
2666 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2667
d23ee13f 2668 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2669 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2670
0fa49558
AX
2671 /* Registers mapping */
2672 /* TODO: block userspace mapping of io register */
da69c161
KW
2673 if (adev->asic_type >= CHIP_BONAIRE) {
2674 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2675 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2676 } else {
2677 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2678 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2679 }
d38ceaf9 2680
d38ceaf9
AD
2681 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2682 if (adev->rmmio == NULL) {
2683 return -ENOMEM;
2684 }
2685 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2686 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2687
d38ceaf9
AD
2688 /* io port mapping */
2689 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2690 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2691 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2692 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2693 break;
2694 }
2695 }
2696 if (adev->rio_mem == NULL)
b64a18c5 2697 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2698
b2109d8e
JX
2699 /* enable PCIE atomic ops */
2700 r = pci_enable_atomic_ops_to_root(adev->pdev,
2701 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2702 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2703 if (r) {
2704 adev->have_atomics_support = false;
2705 DRM_INFO("PCIE atomic ops is not supported\n");
2706 } else {
2707 adev->have_atomics_support = true;
2708 }
2709
5494d864
AD
2710 amdgpu_device_get_pcie_info(adev);
2711
b239c017
JX
2712 if (amdgpu_mcbp)
2713 DRM_INFO("MCBP is enabled\n");
2714
5f84cc63
JX
2715 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2716 adev->enable_mes = true;
2717
f54eeab4 2718 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2719 r = amdgpu_discovery_init(adev);
2720 if (r) {
2721 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2722 return r;
2723 }
2724 }
2725
d38ceaf9 2726 /* early init functions */
06ec9070 2727 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2728 if (r)
2729 return r;
2730
6585661d
OZ
2731 /* doorbell bar mapping and doorbell index init*/
2732 amdgpu_device_doorbell_init(adev);
2733
d38ceaf9
AD
2734 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2735 /* this will fail for cards that aren't VGA class devices, just
2736 * ignore it */
06ec9070 2737 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2738
e9bef455 2739 if (amdgpu_device_is_px(ddev))
d38ceaf9 2740 runtime = true;
84c8b22e
LW
2741 if (!pci_is_thunderbolt_attached(adev->pdev))
2742 vga_switcheroo_register_client(adev->pdev,
2743 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2744 if (runtime)
2745 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2746
9475a943
SL
2747 if (amdgpu_emu_mode == 1) {
2748 /* post the asic on emulation mode */
2749 emu_soc_asic_init(adev);
bfca0289 2750 goto fence_driver_init;
9475a943 2751 }
bfca0289 2752
4e99a44e
ML
2753 /* detect if we are with an SRIOV vbios */
2754 amdgpu_device_detect_sriov_bios(adev);
048765ad 2755
95e8e59e
AD
2756 /* check if we need to reset the asic
2757 * E.g., driver was not cleanly unloaded previously, etc.
2758 */
f14899fd 2759 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2760 r = amdgpu_asic_reset(adev);
2761 if (r) {
2762 dev_err(adev->dev, "asic reset on init failed\n");
2763 goto failed;
2764 }
2765 }
2766
d38ceaf9 2767 /* Post card if necessary */
39c640c0 2768 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2769 if (!adev->bios) {
bec86378 2770 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2771 r = -EINVAL;
2772 goto failed;
d38ceaf9 2773 }
bec86378 2774 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2775 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2776 if (r) {
2777 dev_err(adev->dev, "gpu post error!\n");
2778 goto failed;
2779 }
d38ceaf9
AD
2780 }
2781
88b64e95
AD
2782 if (adev->is_atom_fw) {
2783 /* Initialize clocks */
2784 r = amdgpu_atomfirmware_get_clock_info(adev);
2785 if (r) {
2786 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2787 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2788 goto failed;
2789 }
2790 } else {
a5bde2f9
AD
2791 /* Initialize clocks */
2792 r = amdgpu_atombios_get_clock_info(adev);
2793 if (r) {
2794 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2795 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2796 goto failed;
a5bde2f9
AD
2797 }
2798 /* init i2c buses */
4562236b
HW
2799 if (!amdgpu_device_has_dc_support(adev))
2800 amdgpu_atombios_i2c_init(adev);
2c1a2784 2801 }
d38ceaf9 2802
bfca0289 2803fence_driver_init:
d38ceaf9
AD
2804 /* Fence driver */
2805 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2806 if (r) {
2807 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2808 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2809 goto failed;
2c1a2784 2810 }
d38ceaf9
AD
2811
2812 /* init the mode config */
2813 drm_mode_config_init(adev->ddev);
2814
06ec9070 2815 r = amdgpu_device_ip_init(adev);
d38ceaf9 2816 if (r) {
8840a387 2817 /* failed in exclusive mode due to timeout */
2818 if (amdgpu_sriov_vf(adev) &&
2819 !amdgpu_sriov_runtime(adev) &&
2820 amdgpu_virt_mmio_blocked(adev) &&
2821 !amdgpu_virt_wait_reset(adev)) {
2822 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2823 /* Don't send request since VF is inactive. */
2824 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2825 adev->virt.ops = NULL;
8840a387 2826 r = -EAGAIN;
2827 goto failed;
2828 }
06ec9070 2829 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2830 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2831 if (amdgpu_virt_request_full_gpu(adev, false))
2832 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2833 goto failed;
d38ceaf9
AD
2834 }
2835
2836 adev->accel_working = true;
2837
e59c0205
AX
2838 amdgpu_vm_check_compute_bug(adev);
2839
95844d20
MO
2840 /* Initialize the buffer migration limit. */
2841 if (amdgpu_moverate >= 0)
2842 max_MBps = amdgpu_moverate;
2843 else
2844 max_MBps = 8; /* Allow 8 MB/s. */
2845 /* Get a log2 for easy divisions. */
2846 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2847
9bc92b9c
ML
2848 amdgpu_fbdev_init(adev);
2849
e9bc1bf7
YT
2850 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2851 amdgpu_pm_virt_sysfs_init(adev);
2852
d2f52ac8
RZ
2853 r = amdgpu_pm_sysfs_init(adev);
2854 if (r)
2855 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2856
5bb23532
OM
2857 r = amdgpu_ucode_sysfs_init(adev);
2858 if (r)
2859 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2860
75758255 2861 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2862 if (r)
d38ceaf9 2863 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2864
2865 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2866 if (r)
d38ceaf9 2867 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2868
50ab2533 2869 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2870 if (r)
50ab2533 2871 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2872
763efb6c 2873 r = amdgpu_debugfs_init(adev);
db95e218 2874 if (r)
763efb6c 2875 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2876
d38ceaf9
AD
2877 if ((amdgpu_testing & 1)) {
2878 if (adev->accel_working)
2879 amdgpu_test_moves(adev);
2880 else
2881 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2882 }
d38ceaf9
AD
2883 if (amdgpu_benchmarking) {
2884 if (adev->accel_working)
2885 amdgpu_benchmark(adev, amdgpu_benchmarking);
2886 else
2887 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2888 }
2889
2890 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2891 * explicit gating rather than handling it automatically.
2892 */
06ec9070 2893 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2894 if (r) {
06ec9070 2895 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2896 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2897 goto failed;
2c1a2784 2898 }
d38ceaf9 2899
108c6a63 2900 /* must succeed. */
511fdbc3 2901 amdgpu_ras_resume(adev);
108c6a63 2902
beff74bc
AD
2903 queue_delayed_work(system_wq, &adev->delayed_init_work,
2904 msecs_to_jiffies(AMDGPU_RESUME_MS));
2905
dcea6e65
KR
2906 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2907 if (r) {
2908 dev_err(adev->dev, "Could not create pcie_replay_count");
2909 return r;
2910 }
108c6a63 2911
d155bef0
AB
2912 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2913 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
2914 if (r)
2915 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2916
d38ceaf9 2917 return 0;
83ba126a
AD
2918
2919failed:
89041940 2920 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2921 if (runtime)
2922 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2923
83ba126a 2924 return r;
d38ceaf9
AD
2925}
2926
d38ceaf9
AD
2927/**
2928 * amdgpu_device_fini - tear down the driver
2929 *
2930 * @adev: amdgpu_device pointer
2931 *
2932 * Tear down the driver info (all asics).
2933 * Called at driver shutdown.
2934 */
2935void amdgpu_device_fini(struct amdgpu_device *adev)
2936{
2937 int r;
2938
2939 DRM_INFO("amdgpu: finishing device.\n");
2940 adev->shutdown = true;
e5b03032
ML
2941 /* disable all interrupts */
2942 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2943 if (adev->mode_info.mode_config_initialized){
2944 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2945 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2946 else
2947 drm_atomic_helper_shutdown(adev->ddev);
2948 }
d38ceaf9 2949 amdgpu_fence_driver_fini(adev);
58e955d9 2950 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2951 amdgpu_fbdev_fini(adev);
06ec9070 2952 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2953 if (adev->firmware.gpu_info_fw) {
2954 release_firmware(adev->firmware.gpu_info_fw);
2955 adev->firmware.gpu_info_fw = NULL;
2956 }
d38ceaf9 2957 adev->accel_working = false;
beff74bc 2958 cancel_delayed_work_sync(&adev->delayed_init_work);
d38ceaf9 2959 /* free i2c buses */
4562236b
HW
2960 if (!amdgpu_device_has_dc_support(adev))
2961 amdgpu_i2c_fini(adev);
bfca0289
SL
2962
2963 if (amdgpu_emu_mode != 1)
2964 amdgpu_atombios_fini(adev);
2965
d38ceaf9
AD
2966 kfree(adev->bios);
2967 adev->bios = NULL;
84c8b22e
LW
2968 if (!pci_is_thunderbolt_attached(adev->pdev))
2969 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2970 if (adev->flags & AMD_IS_PX)
2971 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2972 vga_client_register(adev->pdev, NULL, NULL, NULL);
2973 if (adev->rio_mem)
2974 pci_iounmap(adev->pdev, adev->rio_mem);
2975 adev->rio_mem = NULL;
2976 iounmap(adev->rmmio);
2977 adev->rmmio = NULL;
06ec9070 2978 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
2979 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2980 amdgpu_pm_virt_sysfs_fini(adev);
2981
d38ceaf9 2982 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 2983 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 2984 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
2985 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2986 amdgpu_pmu_fini(adev);
6698a3d0 2987 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 2988 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 2989 amdgpu_discovery_fini(adev);
d38ceaf9
AD
2990}
2991
2992
2993/*
2994 * Suspend & resume.
2995 */
2996/**
810ddc3a 2997 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2998 *
87e3f136
DP
2999 * @dev: drm dev pointer
3000 * @suspend: suspend state
3001 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3002 *
3003 * Puts the hw in the suspend state (all asics).
3004 * Returns 0 for success or an error on failure.
3005 * Called at driver suspend.
3006 */
810ddc3a 3007int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
3008{
3009 struct amdgpu_device *adev;
3010 struct drm_crtc *crtc;
3011 struct drm_connector *connector;
5ceb54c6 3012 int r;
d38ceaf9
AD
3013
3014 if (dev == NULL || dev->dev_private == NULL) {
3015 return -ENODEV;
3016 }
3017
3018 adev = dev->dev_private;
3019
3020 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3021 return 0;
3022
44779b43 3023 adev->in_suspend = true;
d38ceaf9
AD
3024 drm_kms_helper_poll_disable(dev);
3025
5f818173
S
3026 if (fbcon)
3027 amdgpu_fbdev_set_suspend(adev, 1);
3028
beff74bc 3029 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3030
4562236b
HW
3031 if (!amdgpu_device_has_dc_support(adev)) {
3032 /* turn off display hw */
3033 drm_modeset_lock_all(dev);
3034 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3035 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
3036 }
3037 drm_modeset_unlock_all(dev);
fe1053b7
AD
3038 /* unpin the front buffers and cursors */
3039 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3040 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3041 struct drm_framebuffer *fb = crtc->primary->fb;
3042 struct amdgpu_bo *robj;
3043
91334223 3044 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3045 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3046 r = amdgpu_bo_reserve(aobj, true);
3047 if (r == 0) {
3048 amdgpu_bo_unpin(aobj);
3049 amdgpu_bo_unreserve(aobj);
3050 }
756e6880 3051 }
756e6880 3052
fe1053b7
AD
3053 if (fb == NULL || fb->obj[0] == NULL) {
3054 continue;
3055 }
3056 robj = gem_to_amdgpu_bo(fb->obj[0]);
3057 /* don't unpin kernel fb objects */
3058 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3059 r = amdgpu_bo_reserve(robj, true);
3060 if (r == 0) {
3061 amdgpu_bo_unpin(robj);
3062 amdgpu_bo_unreserve(robj);
3063 }
d38ceaf9
AD
3064 }
3065 }
3066 }
fe1053b7
AD
3067
3068 amdgpu_amdkfd_suspend(adev);
3069
5e6932fe 3070 amdgpu_ras_suspend(adev);
3071
fe1053b7
AD
3072 r = amdgpu_device_ip_suspend_phase1(adev);
3073
d38ceaf9
AD
3074 /* evict vram memory */
3075 amdgpu_bo_evict_vram(adev);
3076
5ceb54c6 3077 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3078
fe1053b7 3079 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3080
a0a71e49
AD
3081 /* evict remaining vram memory
3082 * This second call to evict vram is to evict the gart page table
3083 * using the CPU.
3084 */
d38ceaf9
AD
3085 amdgpu_bo_evict_vram(adev);
3086
3087 pci_save_state(dev->pdev);
3088 if (suspend) {
3089 /* Shut down the device */
3090 pci_disable_device(dev->pdev);
3091 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 3092 } else {
3093 r = amdgpu_asic_reset(adev);
3094 if (r)
3095 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
3096 }
3097
d38ceaf9
AD
3098 return 0;
3099}
3100
3101/**
810ddc3a 3102 * amdgpu_device_resume - initiate device resume
d38ceaf9 3103 *
87e3f136
DP
3104 * @dev: drm dev pointer
3105 * @resume: resume state
3106 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3107 *
3108 * Bring the hw back to operating state (all asics).
3109 * Returns 0 for success or an error on failure.
3110 * Called at driver resume.
3111 */
810ddc3a 3112int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
3113{
3114 struct drm_connector *connector;
3115 struct amdgpu_device *adev = dev->dev_private;
756e6880 3116 struct drm_crtc *crtc;
03161a6e 3117 int r = 0;
d38ceaf9
AD
3118
3119 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3120 return 0;
3121
d38ceaf9
AD
3122 if (resume) {
3123 pci_set_power_state(dev->pdev, PCI_D0);
3124 pci_restore_state(dev->pdev);
74b0b157 3125 r = pci_enable_device(dev->pdev);
03161a6e 3126 if (r)
4d3b9ae5 3127 return r;
d38ceaf9
AD
3128 }
3129
3130 /* post card */
39c640c0 3131 if (amdgpu_device_need_post(adev)) {
74b0b157 3132 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3133 if (r)
3134 DRM_ERROR("amdgpu asic init failed\n");
3135 }
d38ceaf9 3136
06ec9070 3137 r = amdgpu_device_ip_resume(adev);
e6707218 3138 if (r) {
06ec9070 3139 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3140 return r;
e6707218 3141 }
5ceb54c6
AD
3142 amdgpu_fence_driver_resume(adev);
3143
d38ceaf9 3144
06ec9070 3145 r = amdgpu_device_ip_late_init(adev);
03161a6e 3146 if (r)
4d3b9ae5 3147 return r;
d38ceaf9 3148
beff74bc
AD
3149 queue_delayed_work(system_wq, &adev->delayed_init_work,
3150 msecs_to_jiffies(AMDGPU_RESUME_MS));
3151
fe1053b7
AD
3152 if (!amdgpu_device_has_dc_support(adev)) {
3153 /* pin cursors */
3154 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3155 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3156
91334223 3157 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3158 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3159 r = amdgpu_bo_reserve(aobj, true);
3160 if (r == 0) {
3161 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3162 if (r != 0)
3163 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3164 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3165 amdgpu_bo_unreserve(aobj);
3166 }
756e6880
AD
3167 }
3168 }
3169 }
ba997709
YZ
3170 r = amdgpu_amdkfd_resume(adev);
3171 if (r)
3172 return r;
756e6880 3173
96a5d8d4 3174 /* Make sure IB tests flushed */
beff74bc 3175 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3176
d38ceaf9
AD
3177 /* blat the mode back in */
3178 if (fbcon) {
4562236b
HW
3179 if (!amdgpu_device_has_dc_support(adev)) {
3180 /* pre DCE11 */
3181 drm_helper_resume_force_mode(dev);
3182
3183 /* turn on display hw */
3184 drm_modeset_lock_all(dev);
3185 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3186 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3187 }
3188 drm_modeset_unlock_all(dev);
d38ceaf9 3189 }
4d3b9ae5 3190 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3191 }
3192
3193 drm_kms_helper_poll_enable(dev);
23a1a9e5 3194
5e6932fe 3195 amdgpu_ras_resume(adev);
3196
23a1a9e5
L
3197 /*
3198 * Most of the connector probing functions try to acquire runtime pm
3199 * refs to ensure that the GPU is powered on when connector polling is
3200 * performed. Since we're calling this from a runtime PM callback,
3201 * trying to acquire rpm refs will cause us to deadlock.
3202 *
3203 * Since we're guaranteed to be holding the rpm lock, it's safe to
3204 * temporarily disable the rpm helpers so this doesn't deadlock us.
3205 */
3206#ifdef CONFIG_PM
3207 dev->dev->power.disable_depth++;
3208#endif
4562236b
HW
3209 if (!amdgpu_device_has_dc_support(adev))
3210 drm_helper_hpd_irq_event(dev);
3211 else
3212 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3213#ifdef CONFIG_PM
3214 dev->dev->power.disable_depth--;
3215#endif
44779b43
RZ
3216 adev->in_suspend = false;
3217
4d3b9ae5 3218 return 0;
d38ceaf9
AD
3219}
3220
e3ecdffa
AD
3221/**
3222 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3223 *
3224 * @adev: amdgpu_device pointer
3225 *
3226 * The list of all the hardware IPs that make up the asic is walked and
3227 * the check_soft_reset callbacks are run. check_soft_reset determines
3228 * if the asic is still hung or not.
3229 * Returns true if any of the IPs are still in a hung state, false if not.
3230 */
06ec9070 3231static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3232{
3233 int i;
3234 bool asic_hang = false;
3235
f993d628
ML
3236 if (amdgpu_sriov_vf(adev))
3237 return true;
3238
8bc04c29
AD
3239 if (amdgpu_asic_need_full_reset(adev))
3240 return true;
3241
63fbf42f 3242 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3243 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3244 continue;
a1255107
AD
3245 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3246 adev->ip_blocks[i].status.hang =
3247 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3248 if (adev->ip_blocks[i].status.hang) {
3249 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3250 asic_hang = true;
3251 }
3252 }
3253 return asic_hang;
3254}
3255
e3ecdffa
AD
3256/**
3257 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3258 *
3259 * @adev: amdgpu_device pointer
3260 *
3261 * The list of all the hardware IPs that make up the asic is walked and the
3262 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3263 * handles any IP specific hardware or software state changes that are
3264 * necessary for a soft reset to succeed.
3265 * Returns 0 on success, negative error code on failure.
3266 */
06ec9070 3267static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3268{
3269 int i, r = 0;
3270
3271 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3272 if (!adev->ip_blocks[i].status.valid)
d31a501e 3273 continue;
a1255107
AD
3274 if (adev->ip_blocks[i].status.hang &&
3275 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3276 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3277 if (r)
3278 return r;
3279 }
3280 }
3281
3282 return 0;
3283}
3284
e3ecdffa
AD
3285/**
3286 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3287 *
3288 * @adev: amdgpu_device pointer
3289 *
3290 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3291 * reset is necessary to recover.
3292 * Returns true if a full asic reset is required, false if not.
3293 */
06ec9070 3294static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3295{
da146d3b
AD
3296 int i;
3297
8bc04c29
AD
3298 if (amdgpu_asic_need_full_reset(adev))
3299 return true;
3300
da146d3b 3301 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3302 if (!adev->ip_blocks[i].status.valid)
da146d3b 3303 continue;
a1255107
AD
3304 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3305 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3306 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3307 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3308 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3309 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3310 DRM_INFO("Some block need full reset!\n");
3311 return true;
3312 }
3313 }
35d782fe
CZ
3314 }
3315 return false;
3316}
3317
e3ecdffa
AD
3318/**
3319 * amdgpu_device_ip_soft_reset - do a soft reset
3320 *
3321 * @adev: amdgpu_device pointer
3322 *
3323 * The list of all the hardware IPs that make up the asic is walked and the
3324 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3325 * IP specific hardware or software state changes that are necessary to soft
3326 * reset the IP.
3327 * Returns 0 on success, negative error code on failure.
3328 */
06ec9070 3329static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3330{
3331 int i, r = 0;
3332
3333 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3334 if (!adev->ip_blocks[i].status.valid)
35d782fe 3335 continue;
a1255107
AD
3336 if (adev->ip_blocks[i].status.hang &&
3337 adev->ip_blocks[i].version->funcs->soft_reset) {
3338 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3339 if (r)
3340 return r;
3341 }
3342 }
3343
3344 return 0;
3345}
3346
e3ecdffa
AD
3347/**
3348 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3349 *
3350 * @adev: amdgpu_device pointer
3351 *
3352 * The list of all the hardware IPs that make up the asic is walked and the
3353 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3354 * handles any IP specific hardware or software state changes that are
3355 * necessary after the IP has been soft reset.
3356 * Returns 0 on success, negative error code on failure.
3357 */
06ec9070 3358static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3359{
3360 int i, r = 0;
3361
3362 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3363 if (!adev->ip_blocks[i].status.valid)
35d782fe 3364 continue;
a1255107
AD
3365 if (adev->ip_blocks[i].status.hang &&
3366 adev->ip_blocks[i].version->funcs->post_soft_reset)
3367 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3368 if (r)
3369 return r;
3370 }
3371
3372 return 0;
3373}
3374
e3ecdffa 3375/**
c33adbc7 3376 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3377 *
3378 * @adev: amdgpu_device pointer
3379 *
3380 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3381 * restore things like GPUVM page tables after a GPU reset where
3382 * the contents of VRAM might be lost.
403009bf
CK
3383 *
3384 * Returns:
3385 * 0 on success, negative error code on failure.
e3ecdffa 3386 */
c33adbc7 3387static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3388{
c41d1cf6 3389 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3390 struct amdgpu_bo *shadow;
3391 long r = 1, tmo;
c41d1cf6
ML
3392
3393 if (amdgpu_sriov_runtime(adev))
b045d3af 3394 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3395 else
3396 tmo = msecs_to_jiffies(100);
3397
3398 DRM_INFO("recover vram bo from shadow start\n");
3399 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3400 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3401
3402 /* No need to recover an evicted BO */
3403 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3404 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3405 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3406 continue;
3407
3408 r = amdgpu_bo_restore_shadow(shadow, &next);
3409 if (r)
3410 break;
3411
c41d1cf6 3412 if (fence) {
1712fb1a 3413 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3414 dma_fence_put(fence);
3415 fence = next;
1712fb1a 3416 if (tmo == 0) {
3417 r = -ETIMEDOUT;
c41d1cf6 3418 break;
1712fb1a 3419 } else if (tmo < 0) {
3420 r = tmo;
3421 break;
3422 }
403009bf
CK
3423 } else {
3424 fence = next;
c41d1cf6 3425 }
c41d1cf6
ML
3426 }
3427 mutex_unlock(&adev->shadow_list_lock);
3428
403009bf
CK
3429 if (fence)
3430 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3431 dma_fence_put(fence);
3432
1712fb1a 3433 if (r < 0 || tmo <= 0) {
3434 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3435 return -EIO;
3436 }
c41d1cf6 3437
403009bf
CK
3438 DRM_INFO("recover vram bo from shadow done\n");
3439 return 0;
c41d1cf6
ML
3440}
3441
a90ad3c2 3442
e3ecdffa 3443/**
06ec9070 3444 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3445 *
3446 * @adev: amdgpu device pointer
87e3f136 3447 * @from_hypervisor: request from hypervisor
5740682e
ML
3448 *
3449 * do VF FLR and reinitialize Asic
3f48c681 3450 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3451 */
3452static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3453 bool from_hypervisor)
5740682e
ML
3454{
3455 int r;
3456
3457 if (from_hypervisor)
3458 r = amdgpu_virt_request_full_gpu(adev, true);
3459 else
3460 r = amdgpu_virt_reset_gpu(adev);
3461 if (r)
3462 return r;
a90ad3c2 3463
f81e8d53
WL
3464 amdgpu_amdkfd_pre_reset(adev);
3465
a90ad3c2 3466 /* Resume IP prior to SMC */
06ec9070 3467 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3468 if (r)
3469 goto error;
a90ad3c2
ML
3470
3471 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3472 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3473
7a3e0bb2
RZ
3474 r = amdgpu_device_fw_loading(adev);
3475 if (r)
3476 return r;
3477
a90ad3c2 3478 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3479 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3480 if (r)
3481 goto error;
a90ad3c2
ML
3482
3483 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3484 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3485 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3486
abc34253 3487error:
d3c117e5 3488 amdgpu_virt_init_data_exchange(adev);
abc34253 3489 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3490 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3491 amdgpu_inc_vram_lost(adev);
c33adbc7 3492 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3493 }
3494
3495 return r;
3496}
3497
12938fad
CK
3498/**
3499 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3500 *
3501 * @adev: amdgpu device pointer
3502 *
3503 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3504 * a hung GPU.
3505 */
3506bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3507{
3508 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3509 DRM_INFO("Timeout, but no hardware hang detected.\n");
3510 return false;
3511 }
3512
3ba7b418
AG
3513 if (amdgpu_gpu_recovery == 0)
3514 goto disabled;
3515
3516 if (amdgpu_sriov_vf(adev))
3517 return true;
3518
3519 if (amdgpu_gpu_recovery == -1) {
3520 switch (adev->asic_type) {
fc42d47c
AG
3521 case CHIP_BONAIRE:
3522 case CHIP_HAWAII:
3ba7b418
AG
3523 case CHIP_TOPAZ:
3524 case CHIP_TONGA:
3525 case CHIP_FIJI:
3526 case CHIP_POLARIS10:
3527 case CHIP_POLARIS11:
3528 case CHIP_POLARIS12:
3529 case CHIP_VEGAM:
3530 case CHIP_VEGA20:
3531 case CHIP_VEGA10:
3532 case CHIP_VEGA12:
c43b849f 3533 case CHIP_RAVEN:
3ba7b418
AG
3534 break;
3535 default:
3536 goto disabled;
3537 }
12938fad
CK
3538 }
3539
3540 return true;
3ba7b418
AG
3541
3542disabled:
3543 DRM_INFO("GPU recovery disabled.\n");
3544 return false;
12938fad
CK
3545}
3546
5c6dd71e 3547
26bc5340
AG
3548static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3549 struct amdgpu_job *job,
3550 bool *need_full_reset_arg)
3551{
3552 int i, r = 0;
3553 bool need_full_reset = *need_full_reset_arg;
71182665 3554
71182665 3555 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3556 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3557 struct amdgpu_ring *ring = adev->rings[i];
3558
51687759 3559 if (!ring || !ring->sched.thread)
0875dc9e 3560 continue;
5740682e 3561
2f9d4084
ML
3562 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3563 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3564 }
d38ceaf9 3565
222b5f04
AG
3566 if(job)
3567 drm_sched_increase_karma(&job->base);
3568
1d721ed6 3569 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3570 if (!amdgpu_sriov_vf(adev)) {
3571
3572 if (!need_full_reset)
3573 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3574
3575 if (!need_full_reset) {
3576 amdgpu_device_ip_pre_soft_reset(adev);
3577 r = amdgpu_device_ip_soft_reset(adev);
3578 amdgpu_device_ip_post_soft_reset(adev);
3579 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3580 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3581 need_full_reset = true;
3582 }
3583 }
3584
3585 if (need_full_reset)
3586 r = amdgpu_device_ip_suspend(adev);
3587
3588 *need_full_reset_arg = need_full_reset;
3589 }
3590
3591 return r;
3592}
3593
3594static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3595 struct list_head *device_list_handle,
3596 bool *need_full_reset_arg)
3597{
3598 struct amdgpu_device *tmp_adev = NULL;
3599 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3600 int r = 0;
3601
3602 /*
3603 * ASIC reset has to be done on all HGMI hive nodes ASAP
3604 * to allow proper links negotiation in FW (within 1 sec)
3605 */
3606 if (need_full_reset) {
3607 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3608 /* For XGMI run all resets in parallel to speed up the process */
3609 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3610 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3611 r = -EALREADY;
3612 } else
3613 r = amdgpu_asic_reset(tmp_adev);
3614
3615 if (r) {
fed184e9 3616 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3617 r, tmp_adev->ddev->unique);
d4535e2c
AG
3618 break;
3619 }
3620 }
3621
3622 /* For XGMI wait for all PSP resets to complete before proceed */
3623 if (!r) {
3624 list_for_each_entry(tmp_adev, device_list_handle,
3625 gmc.xgmi.head) {
3626 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3627 flush_work(&tmp_adev->xgmi_reset_work);
3628 r = tmp_adev->asic_reset_res;
3629 if (r)
3630 break;
3631 }
3632 }
26bc5340
AG
3633 }
3634 }
3635
3636
3637 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3638 if (need_full_reset) {
3639 /* post card */
3640 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3641 DRM_WARN("asic atom init failed!");
3642
3643 if (!r) {
3644 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3645 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3646 if (r)
3647 goto out;
3648
3649 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3650 if (vram_lost) {
77e7f829 3651 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3652 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3653 }
3654
3655 r = amdgpu_gtt_mgr_recover(
3656 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3657 if (r)
3658 goto out;
3659
3660 r = amdgpu_device_fw_loading(tmp_adev);
3661 if (r)
3662 return r;
3663
3664 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3665 if (r)
3666 goto out;
3667
3668 if (vram_lost)
3669 amdgpu_device_fill_reset_magic(tmp_adev);
3670
fdafb359
EQ
3671 /*
3672 * Add this ASIC as tracked as reset was already
3673 * complete successfully.
3674 */
3675 amdgpu_register_gpu_instance(tmp_adev);
3676
7c04ca50 3677 r = amdgpu_device_ip_late_init(tmp_adev);
3678 if (r)
3679 goto out;
3680
e79a04d5 3681 /* must succeed. */
511fdbc3 3682 amdgpu_ras_resume(tmp_adev);
e79a04d5 3683
26bc5340
AG
3684 /* Update PSP FW topology after reset */
3685 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3686 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3687 }
3688 }
3689
3690
3691out:
3692 if (!r) {
3693 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3694 r = amdgpu_ib_ring_tests(tmp_adev);
3695 if (r) {
3696 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3697 r = amdgpu_device_ip_suspend(tmp_adev);
3698 need_full_reset = true;
3699 r = -EAGAIN;
3700 goto end;
3701 }
3702 }
3703
3704 if (!r)
3705 r = amdgpu_device_recover_vram(tmp_adev);
3706 else
3707 tmp_adev->asic_reset_res = r;
3708 }
3709
3710end:
3711 *need_full_reset_arg = need_full_reset;
3712 return r;
3713}
3714
1d721ed6 3715static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3716{
1d721ed6
AG
3717 if (trylock) {
3718 if (!mutex_trylock(&adev->lock_reset))
3719 return false;
3720 } else
3721 mutex_lock(&adev->lock_reset);
5740682e 3722
26bc5340
AG
3723 atomic_inc(&adev->gpu_reset_counter);
3724 adev->in_gpu_reset = 1;
a3a09142
AD
3725 switch (amdgpu_asic_reset_method(adev)) {
3726 case AMD_RESET_METHOD_MODE1:
3727 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3728 break;
3729 case AMD_RESET_METHOD_MODE2:
3730 adev->mp1_state = PP_MP1_STATE_RESET;
3731 break;
3732 default:
3733 adev->mp1_state = PP_MP1_STATE_NONE;
3734 break;
3735 }
1d721ed6
AG
3736
3737 return true;
26bc5340 3738}
d38ceaf9 3739
26bc5340
AG
3740static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3741{
89041940 3742 amdgpu_vf_error_trans_all(adev);
a3a09142 3743 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
3744 adev->in_gpu_reset = 0;
3745 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3746}
3747
26bc5340
AG
3748/**
3749 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3750 *
3751 * @adev: amdgpu device pointer
3752 * @job: which job trigger hang
3753 *
3754 * Attempt to reset the GPU if it has hung (all asics).
3755 * Attempt to do soft-reset or full-reset and reinitialize Asic
3756 * Returns 0 for success or an error on failure.
3757 */
3758
3759int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3760 struct amdgpu_job *job)
3761{
1d721ed6
AG
3762 struct list_head device_list, *device_list_handle = NULL;
3763 bool need_full_reset, job_signaled;
26bc5340 3764 struct amdgpu_hive_info *hive = NULL;
26bc5340 3765 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3766 int i, r = 0;
7c6e68c7 3767 bool in_ras_intr = amdgpu_ras_intr_triggered();
26bc5340 3768
d5ea093e
AG
3769 /*
3770 * Flush RAM to disk so that after reboot
3771 * the user can read log and see why the system rebooted.
3772 */
3773 if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
3774
3775 DRM_WARN("Emergency reboot.");
3776
3777 ksys_sync_helper();
3778 emergency_restart();
3779 }
3780
1d721ed6 3781 need_full_reset = job_signaled = false;
26bc5340
AG
3782 INIT_LIST_HEAD(&device_list);
3783
7c6e68c7 3784 dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset");
26bc5340 3785
beff74bc 3786 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3787
1d721ed6
AG
3788 hive = amdgpu_get_xgmi_hive(adev, false);
3789
26bc5340 3790 /*
1d721ed6
AG
3791 * Here we trylock to avoid chain of resets executing from
3792 * either trigger by jobs on different adevs in XGMI hive or jobs on
3793 * different schedulers for same device while this TO handler is running.
3794 * We always reset all schedulers for device and all devices for XGMI
3795 * hive so that should take care of them too.
26bc5340 3796 */
1d721ed6
AG
3797
3798 if (hive && !mutex_trylock(&hive->reset_lock)) {
3799 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 3800 job ? job->base.id : -1, hive->hive_id);
26bc5340 3801 return 0;
1d721ed6 3802 }
26bc5340
AG
3803
3804 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3805 if (!amdgpu_device_lock_adev(adev, !hive)) {
3806 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 3807 job ? job->base.id : -1);
1d721ed6 3808 return 0;
26bc5340
AG
3809 }
3810
7c6e68c7
AG
3811 /* Block kfd: SRIOV would do it separately */
3812 if (!amdgpu_sriov_vf(adev))
3813 amdgpu_amdkfd_pre_reset(adev);
3814
26bc5340 3815 /* Build list of devices to reset */
1d721ed6 3816 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 3817 if (!hive) {
7c6e68c7
AG
3818 /*unlock kfd: SRIOV would do it separately */
3819 if (!amdgpu_sriov_vf(adev))
3820 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
3821 amdgpu_device_unlock_adev(adev);
3822 return -ENODEV;
3823 }
3824
3825 /*
3826 * In case we are in XGMI hive mode device reset is done for all the
3827 * nodes in the hive to retrain all XGMI links and hence the reset
3828 * sequence is executed in loop on all nodes.
3829 */
3830 device_list_handle = &hive->device_list;
3831 } else {
3832 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3833 device_list_handle = &device_list;
3834 }
3835
1d721ed6
AG
3836 /* block all schedulers and reset given job's ring */
3837 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 3838 if (tmp_adev != adev) {
12ffa55d 3839 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
3840 if (!amdgpu_sriov_vf(tmp_adev))
3841 amdgpu_amdkfd_pre_reset(tmp_adev);
3842 }
3843
12ffa55d
AG
3844 /*
3845 * Mark these ASICs to be reseted as untracked first
3846 * And add them back after reset completed
3847 */
3848 amdgpu_unregister_gpu_instance(tmp_adev);
3849
f1c1314b 3850 /* disable ras on ALL IPs */
7c6e68c7 3851 if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 3852 amdgpu_ras_suspend(tmp_adev);
3853
1d721ed6
AG
3854 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3855 struct amdgpu_ring *ring = tmp_adev->rings[i];
3856
3857 if (!ring || !ring->sched.thread)
3858 continue;
3859
0b2d2c2e 3860 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7
AG
3861
3862 if (in_ras_intr)
3863 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
3864 }
3865 }
3866
3867
7c6e68c7
AG
3868 if (in_ras_intr)
3869 goto skip_sched_resume;
3870
1d721ed6
AG
3871 /*
3872 * Must check guilty signal here since after this point all old
3873 * HW fences are force signaled.
3874 *
3875 * job->base holds a reference to parent fence
3876 */
3877 if (job && job->base.s_fence->parent &&
3878 dma_fence_is_signaled(job->base.s_fence->parent))
3879 job_signaled = true;
3880
1d721ed6
AG
3881 if (job_signaled) {
3882 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3883 goto skip_hw_reset;
3884 }
3885
3886
3887 /* Guilty job will be freed after this*/
0b2d2c2e 3888 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
3889 if (r) {
3890 /*TODO Should we stop ?*/
3891 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3892 r, adev->ddev->unique);
3893 adev->asic_reset_res = r;
3894 }
3895
26bc5340
AG
3896retry: /* Rest of adevs pre asic reset from XGMI hive. */
3897 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3898
3899 if (tmp_adev == adev)
3900 continue;
3901
26bc5340
AG
3902 r = amdgpu_device_pre_asic_reset(tmp_adev,
3903 NULL,
3904 &need_full_reset);
3905 /*TODO Should we stop ?*/
3906 if (r) {
3907 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3908 r, tmp_adev->ddev->unique);
3909 tmp_adev->asic_reset_res = r;
3910 }
3911 }
3912
3913 /* Actual ASIC resets if needed.*/
3914 /* TODO Implement XGMI hive reset logic for SRIOV */
3915 if (amdgpu_sriov_vf(adev)) {
3916 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3917 if (r)
3918 adev->asic_reset_res = r;
3919 } else {
3920 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3921 if (r && r == -EAGAIN)
3922 goto retry;
3923 }
3924
1d721ed6
AG
3925skip_hw_reset:
3926
26bc5340
AG
3927 /* Post ASIC reset for all devs .*/
3928 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 3929
1d721ed6
AG
3930 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3931 struct amdgpu_ring *ring = tmp_adev->rings[i];
3932
3933 if (!ring || !ring->sched.thread)
3934 continue;
3935
3936 /* No point to resubmit jobs if we didn't HW reset*/
3937 if (!tmp_adev->asic_reset_res && !job_signaled)
3938 drm_sched_resubmit_jobs(&ring->sched);
3939
3940 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3941 }
3942
3943 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3944 drm_helper_resume_force_mode(tmp_adev->ddev);
3945 }
3946
3947 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3948
3949 if (r) {
3950 /* bad news, how to tell it to userspace ? */
12ffa55d 3951 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
3952 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3953 } else {
12ffa55d 3954 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 3955 }
7c6e68c7 3956 }
26bc5340 3957
7c6e68c7
AG
3958skip_sched_resume:
3959 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3960 /*unlock kfd: SRIOV would do it separately */
3961 if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
3962 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
3963 amdgpu_device_unlock_adev(tmp_adev);
3964 }
3965
1d721ed6 3966 if (hive)
22d6575b 3967 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3968
3969 if (r)
3970 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3971 return r;
3972}
3973
e3ecdffa
AD
3974/**
3975 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3976 *
3977 * @adev: amdgpu_device pointer
3978 *
3979 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3980 * and lanes) of the slot the device is in. Handles APUs and
3981 * virtualized environments where PCIE config space may not be available.
3982 */
5494d864 3983static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3984{
5d9a6330 3985 struct pci_dev *pdev;
c5313457
HK
3986 enum pci_bus_speed speed_cap, platform_speed_cap;
3987 enum pcie_link_width platform_link_width;
d0dd7f0c 3988
cd474ba0
AD
3989 if (amdgpu_pcie_gen_cap)
3990 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3991
cd474ba0
AD
3992 if (amdgpu_pcie_lane_cap)
3993 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3994
cd474ba0
AD
3995 /* covers APUs as well */
3996 if (pci_is_root_bus(adev->pdev->bus)) {
3997 if (adev->pm.pcie_gen_mask == 0)
3998 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3999 if (adev->pm.pcie_mlw_mask == 0)
4000 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4001 return;
cd474ba0 4002 }
d0dd7f0c 4003
c5313457
HK
4004 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4005 return;
4006
dbaa922b
AD
4007 pcie_bandwidth_available(adev->pdev, NULL,
4008 &platform_speed_cap, &platform_link_width);
c5313457 4009
cd474ba0 4010 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4011 /* asic caps */
4012 pdev = adev->pdev;
4013 speed_cap = pcie_get_speed_cap(pdev);
4014 if (speed_cap == PCI_SPEED_UNKNOWN) {
4015 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4016 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4017 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4018 } else {
5d9a6330
AD
4019 if (speed_cap == PCIE_SPEED_16_0GT)
4020 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4021 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4022 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4023 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4024 else if (speed_cap == PCIE_SPEED_8_0GT)
4025 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4026 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4027 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4028 else if (speed_cap == PCIE_SPEED_5_0GT)
4029 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4030 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4031 else
4032 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4033 }
4034 /* platform caps */
c5313457 4035 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4036 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4037 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4038 } else {
c5313457 4039 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4040 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4041 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4042 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4043 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4044 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4045 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4046 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4047 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4048 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4049 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4050 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4051 else
4052 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4053
cd474ba0
AD
4054 }
4055 }
4056 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4057 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4058 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4059 } else {
c5313457 4060 switch (platform_link_width) {
5d9a6330 4061 case PCIE_LNK_X32:
cd474ba0
AD
4062 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4063 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4064 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4065 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4066 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4067 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4068 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4069 break;
5d9a6330 4070 case PCIE_LNK_X16:
cd474ba0
AD
4071 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4072 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4073 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4074 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4075 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4076 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4077 break;
5d9a6330 4078 case PCIE_LNK_X12:
cd474ba0
AD
4079 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4080 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4081 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4082 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4083 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4084 break;
5d9a6330 4085 case PCIE_LNK_X8:
cd474ba0
AD
4086 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4087 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4088 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4089 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4090 break;
5d9a6330 4091 case PCIE_LNK_X4:
cd474ba0
AD
4092 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4093 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4094 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4095 break;
5d9a6330 4096 case PCIE_LNK_X2:
cd474ba0
AD
4097 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4098 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4099 break;
5d9a6330 4100 case PCIE_LNK_X1:
cd474ba0
AD
4101 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4102 break;
4103 default:
4104 break;
4105 }
d0dd7f0c
AD
4106 }
4107 }
4108}
d38ceaf9 4109