drm/amdgpu/vcn: remove unnecessary included headers
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e
AG
68#include <linux/suspend.h>
69
e2a75f88 70MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 71MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 72MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 73MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 74MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 75MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 76MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 77MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 78MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 79MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 80
2dc80b00
S
81#define AMDGPU_RESUME_MS 2000
82
050091ab 83const char *amdgpu_asic_name[] = {
da69c161
KW
84 "TAHITI",
85 "PITCAIRN",
86 "VERDE",
87 "OLAND",
88 "HAINAN",
d38ceaf9
AD
89 "BONAIRE",
90 "KAVERI",
91 "KABINI",
92 "HAWAII",
93 "MULLINS",
94 "TOPAZ",
95 "TONGA",
48299f95 96 "FIJI",
d38ceaf9 97 "CARRIZO",
139f4917 98 "STONEY",
2cc0c0b5
FC
99 "POLARIS10",
100 "POLARIS11",
c4642a47 101 "POLARIS12",
48ff108d 102 "VEGAM",
d4196f01 103 "VEGA10",
8fab806a 104 "VEGA12",
956fcddc 105 "VEGA20",
2ca8a5d2 106 "RAVEN",
d6c3b24e 107 "ARCTURUS",
1eee4228 108 "RENOIR",
852a6626 109 "NAVI10",
87dbad02 110 "NAVI14",
9802f5d7 111 "NAVI12",
d38ceaf9
AD
112 "LAST",
113};
114
dcea6e65
KR
115/**
116 * DOC: pcie_replay_count
117 *
118 * The amdgpu driver provides a sysfs API for reporting the total number
119 * of PCIe replays (NAKs)
120 * The file pcie_replay_count is used for this and returns the total
121 * number of replays as a sum of the NAKs generated and NAKs received
122 */
123
124static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
125 struct device_attribute *attr, char *buf)
126{
127 struct drm_device *ddev = dev_get_drvdata(dev);
128 struct amdgpu_device *adev = ddev->dev_private;
129 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
130
131 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
132}
133
134static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
135 amdgpu_device_get_pcie_replay_count, NULL);
136
5494d864
AD
137static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
138
e3ecdffa 139/**
31af062a 140 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
141 *
142 * @dev: drm_device pointer
143 *
144 * Returns true if the device is a dGPU with HG/PX power control,
145 * otherwise return false.
146 */
31af062a 147bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
148{
149 struct amdgpu_device *adev = dev->dev_private;
150
2f7d10b3 151 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
152 return true;
153 return false;
154}
155
a69cba42
AD
156/**
157 * amdgpu_device_supports_baco - Does the device support BACO
158 *
159 * @dev: drm_device pointer
160 *
161 * Returns true if the device supporte BACO,
162 * otherwise return false.
163 */
164bool amdgpu_device_supports_baco(struct drm_device *dev)
165{
166 struct amdgpu_device *adev = dev->dev_private;
167
168 return amdgpu_asic_supports_baco(adev);
169}
170
e35e2b11
TY
171/**
172 * VRAM access helper functions.
173 *
174 * amdgpu_device_vram_access - read/write a buffer in vram
175 *
176 * @adev: amdgpu_device pointer
177 * @pos: offset of the buffer in vram
178 * @buf: virtual address of the buffer in system memory
179 * @size: read/write size, sizeof(@buf) must > @size
180 * @write: true - write to vram, otherwise - read from vram
181 */
182void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
183 uint32_t *buf, size_t size, bool write)
184{
185 uint64_t last;
186 unsigned long flags;
187
188 last = size - 4;
189 for (last += pos; pos <= last; pos += 4) {
190 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
191 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
192 WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
193 if (write)
194 WREG32_NO_KIQ(mmMM_DATA, *buf++);
195 else
196 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
197 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
198 }
199}
200
d38ceaf9
AD
201/*
202 * MMIO register access helper functions.
203 */
e3ecdffa
AD
204/**
205 * amdgpu_mm_rreg - read a memory mapped IO register
206 *
207 * @adev: amdgpu_device pointer
208 * @reg: dword aligned register offset
209 * @acc_flags: access flags which require special behavior
210 *
211 * Returns the 32 bit value from the offset specified.
212 */
d38ceaf9 213uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 214 uint32_t acc_flags)
d38ceaf9 215{
f4b373f4
TSD
216 uint32_t ret;
217
43ca8efa 218 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 219 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 220
15d72fd7 221 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 222 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
223 else {
224 unsigned long flags;
d38ceaf9
AD
225
226 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
227 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
228 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
229 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 230 }
f4b373f4
TSD
231 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
232 return ret;
d38ceaf9
AD
233}
234
421a2a30
ML
235/*
236 * MMIO register read with bytes helper functions
237 * @offset:bytes offset from MMIO start
238 *
239*/
240
e3ecdffa
AD
241/**
242 * amdgpu_mm_rreg8 - read a memory mapped IO register
243 *
244 * @adev: amdgpu_device pointer
245 * @offset: byte aligned register offset
246 *
247 * Returns the 8 bit value from the offset specified.
248 */
421a2a30
ML
249uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
250 if (offset < adev->rmmio_size)
251 return (readb(adev->rmmio + offset));
252 BUG();
253}
254
255/*
256 * MMIO register write with bytes helper functions
257 * @offset:bytes offset from MMIO start
258 * @value: the value want to be written to the register
259 *
260*/
e3ecdffa
AD
261/**
262 * amdgpu_mm_wreg8 - read a memory mapped IO register
263 *
264 * @adev: amdgpu_device pointer
265 * @offset: byte aligned register offset
266 * @value: 8 bit value to write
267 *
268 * Writes the value specified to the offset specified.
269 */
421a2a30
ML
270void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
271 if (offset < adev->rmmio_size)
272 writeb(value, adev->rmmio + offset);
273 else
274 BUG();
275}
276
e3ecdffa
AD
277/**
278 * amdgpu_mm_wreg - write to a memory mapped IO register
279 *
280 * @adev: amdgpu_device pointer
281 * @reg: dword aligned register offset
282 * @v: 32 bit value to write to the register
283 * @acc_flags: access flags which require special behavior
284 *
285 * Writes the value specified to the offset specified.
286 */
d38ceaf9 287void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 288 uint32_t acc_flags)
d38ceaf9 289{
f4b373f4 290 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 291
47ed4e1c
KW
292 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
293 adev->last_mm_index = v;
294 }
295
43ca8efa 296 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 297 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 298
15d72fd7 299 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
300 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
301 else {
302 unsigned long flags;
303
304 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
305 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
306 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
307 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
308 }
47ed4e1c
KW
309
310 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
311 udelay(500);
312 }
d38ceaf9
AD
313}
314
e3ecdffa
AD
315/**
316 * amdgpu_io_rreg - read an IO register
317 *
318 * @adev: amdgpu_device pointer
319 * @reg: dword aligned register offset
320 *
321 * Returns the 32 bit value from the offset specified.
322 */
d38ceaf9
AD
323u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
324{
325 if ((reg * 4) < adev->rio_mem_size)
326 return ioread32(adev->rio_mem + (reg * 4));
327 else {
328 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
329 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
330 }
331}
332
e3ecdffa
AD
333/**
334 * amdgpu_io_wreg - write to an IO register
335 *
336 * @adev: amdgpu_device pointer
337 * @reg: dword aligned register offset
338 * @v: 32 bit value to write to the register
339 *
340 * Writes the value specified to the offset specified.
341 */
d38ceaf9
AD
342void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
343{
47ed4e1c
KW
344 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
345 adev->last_mm_index = v;
346 }
d38ceaf9
AD
347
348 if ((reg * 4) < adev->rio_mem_size)
349 iowrite32(v, adev->rio_mem + (reg * 4));
350 else {
351 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
352 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
353 }
47ed4e1c
KW
354
355 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
356 udelay(500);
357 }
d38ceaf9
AD
358}
359
360/**
361 * amdgpu_mm_rdoorbell - read a doorbell dword
362 *
363 * @adev: amdgpu_device pointer
364 * @index: doorbell index
365 *
366 * Returns the value in the doorbell aperture at the
367 * requested doorbell index (CIK).
368 */
369u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
370{
371 if (index < adev->doorbell.num_doorbells) {
372 return readl(adev->doorbell.ptr + index);
373 } else {
374 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
375 return 0;
376 }
377}
378
379/**
380 * amdgpu_mm_wdoorbell - write a doorbell dword
381 *
382 * @adev: amdgpu_device pointer
383 * @index: doorbell index
384 * @v: value to write
385 *
386 * Writes @v to the doorbell aperture at the
387 * requested doorbell index (CIK).
388 */
389void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
390{
391 if (index < adev->doorbell.num_doorbells) {
392 writel(v, adev->doorbell.ptr + index);
393 } else {
394 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
395 }
396}
397
832be404
KW
398/**
399 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
400 *
401 * @adev: amdgpu_device pointer
402 * @index: doorbell index
403 *
404 * Returns the value in the doorbell aperture at the
405 * requested doorbell index (VEGA10+).
406 */
407u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
408{
409 if (index < adev->doorbell.num_doorbells) {
410 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
411 } else {
412 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
413 return 0;
414 }
415}
416
417/**
418 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
419 *
420 * @adev: amdgpu_device pointer
421 * @index: doorbell index
422 * @v: value to write
423 *
424 * Writes @v to the doorbell aperture at the
425 * requested doorbell index (VEGA10+).
426 */
427void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
428{
429 if (index < adev->doorbell.num_doorbells) {
430 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
431 } else {
432 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
433 }
434}
435
d38ceaf9
AD
436/**
437 * amdgpu_invalid_rreg - dummy reg read function
438 *
439 * @adev: amdgpu device pointer
440 * @reg: offset of register
441 *
442 * Dummy register read function. Used for register blocks
443 * that certain asics don't have (all asics).
444 * Returns the value in the register.
445 */
446static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
447{
448 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
449 BUG();
450 return 0;
451}
452
453/**
454 * amdgpu_invalid_wreg - dummy reg write function
455 *
456 * @adev: amdgpu device pointer
457 * @reg: offset of register
458 * @v: value to write to the register
459 *
460 * Dummy register read function. Used for register blocks
461 * that certain asics don't have (all asics).
462 */
463static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
464{
465 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
466 reg, v);
467 BUG();
468}
469
4fa1c6a6
TZ
470/**
471 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
472 *
473 * @adev: amdgpu device pointer
474 * @reg: offset of register
475 *
476 * Dummy register read function. Used for register blocks
477 * that certain asics don't have (all asics).
478 * Returns the value in the register.
479 */
480static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
481{
482 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
483 BUG();
484 return 0;
485}
486
487/**
488 * amdgpu_invalid_wreg64 - dummy reg write function
489 *
490 * @adev: amdgpu device pointer
491 * @reg: offset of register
492 * @v: value to write to the register
493 *
494 * Dummy register read function. Used for register blocks
495 * that certain asics don't have (all asics).
496 */
497static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
498{
499 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
500 reg, v);
501 BUG();
502}
503
d38ceaf9
AD
504/**
505 * amdgpu_block_invalid_rreg - dummy reg read function
506 *
507 * @adev: amdgpu device pointer
508 * @block: offset of instance
509 * @reg: offset of register
510 *
511 * Dummy register read function. Used for register blocks
512 * that certain asics don't have (all asics).
513 * Returns the value in the register.
514 */
515static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
516 uint32_t block, uint32_t reg)
517{
518 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
519 reg, block);
520 BUG();
521 return 0;
522}
523
524/**
525 * amdgpu_block_invalid_wreg - dummy reg write function
526 *
527 * @adev: amdgpu device pointer
528 * @block: offset of instance
529 * @reg: offset of register
530 * @v: value to write to the register
531 *
532 * Dummy register read function. Used for register blocks
533 * that certain asics don't have (all asics).
534 */
535static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
536 uint32_t block,
537 uint32_t reg, uint32_t v)
538{
539 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
540 reg, block, v);
541 BUG();
542}
543
e3ecdffa
AD
544/**
545 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
546 *
547 * @adev: amdgpu device pointer
548 *
549 * Allocates a scratch page of VRAM for use by various things in the
550 * driver.
551 */
06ec9070 552static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 553{
a4a02777
CK
554 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
555 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
556 &adev->vram_scratch.robj,
557 &adev->vram_scratch.gpu_addr,
558 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
559}
560
e3ecdffa
AD
561/**
562 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
563 *
564 * @adev: amdgpu device pointer
565 *
566 * Frees the VRAM scratch page.
567 */
06ec9070 568static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 569{
078af1a3 570 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
571}
572
573/**
9c3f2b54 574 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
575 *
576 * @adev: amdgpu_device pointer
577 * @registers: pointer to the register array
578 * @array_size: size of the register array
579 *
580 * Programs an array or registers with and and or masks.
581 * This is a helper for setting golden registers.
582 */
9c3f2b54
AD
583void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
584 const u32 *registers,
585 const u32 array_size)
d38ceaf9
AD
586{
587 u32 tmp, reg, and_mask, or_mask;
588 int i;
589
590 if (array_size % 3)
591 return;
592
593 for (i = 0; i < array_size; i +=3) {
594 reg = registers[i + 0];
595 and_mask = registers[i + 1];
596 or_mask = registers[i + 2];
597
598 if (and_mask == 0xffffffff) {
599 tmp = or_mask;
600 } else {
601 tmp = RREG32(reg);
602 tmp &= ~and_mask;
e0d07657
HZ
603 if (adev->family >= AMDGPU_FAMILY_AI)
604 tmp |= (or_mask & and_mask);
605 else
606 tmp |= or_mask;
d38ceaf9
AD
607 }
608 WREG32(reg, tmp);
609 }
610}
611
e3ecdffa
AD
612/**
613 * amdgpu_device_pci_config_reset - reset the GPU
614 *
615 * @adev: amdgpu_device pointer
616 *
617 * Resets the GPU using the pci config reset sequence.
618 * Only applicable to asics prior to vega10.
619 */
8111c387 620void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
621{
622 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
623}
624
625/*
626 * GPU doorbell aperture helpers function.
627 */
628/**
06ec9070 629 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
630 *
631 * @adev: amdgpu_device pointer
632 *
633 * Init doorbell driver information (CIK)
634 * Returns 0 on success, error on failure.
635 */
06ec9070 636static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 637{
6585661d 638
705e519e
CK
639 /* No doorbell on SI hardware generation */
640 if (adev->asic_type < CHIP_BONAIRE) {
641 adev->doorbell.base = 0;
642 adev->doorbell.size = 0;
643 adev->doorbell.num_doorbells = 0;
644 adev->doorbell.ptr = NULL;
645 return 0;
646 }
647
d6895ad3
CK
648 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
649 return -EINVAL;
650
22357775
AD
651 amdgpu_asic_init_doorbell_index(adev);
652
d38ceaf9
AD
653 /* doorbell bar mapping */
654 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
655 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
656
edf600da 657 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 658 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
659 if (adev->doorbell.num_doorbells == 0)
660 return -EINVAL;
661
ec3db8a6 662 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
663 * paging queue doorbell use the second page. The
664 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
665 * doorbells are in the first page. So with paging queue enabled,
666 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
667 */
668 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 669 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 670
8972e5d2
CK
671 adev->doorbell.ptr = ioremap(adev->doorbell.base,
672 adev->doorbell.num_doorbells *
673 sizeof(u32));
674 if (adev->doorbell.ptr == NULL)
d38ceaf9 675 return -ENOMEM;
d38ceaf9
AD
676
677 return 0;
678}
679
680/**
06ec9070 681 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
682 *
683 * @adev: amdgpu_device pointer
684 *
685 * Tear down doorbell driver information (CIK)
686 */
06ec9070 687static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
688{
689 iounmap(adev->doorbell.ptr);
690 adev->doorbell.ptr = NULL;
691}
692
22cb0164 693
d38ceaf9
AD
694
695/*
06ec9070 696 * amdgpu_device_wb_*()
455a7bc2 697 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 698 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
699 */
700
701/**
06ec9070 702 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
703 *
704 * @adev: amdgpu_device pointer
705 *
706 * Disables Writeback and frees the Writeback memory (all asics).
707 * Used at driver shutdown.
708 */
06ec9070 709static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
710{
711 if (adev->wb.wb_obj) {
a76ed485
AD
712 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
713 &adev->wb.gpu_addr,
714 (void **)&adev->wb.wb);
d38ceaf9
AD
715 adev->wb.wb_obj = NULL;
716 }
717}
718
719/**
06ec9070 720 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
721 *
722 * @adev: amdgpu_device pointer
723 *
455a7bc2 724 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
725 * Used at driver startup.
726 * Returns 0 on success or an -error on failure.
727 */
06ec9070 728static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
729{
730 int r;
731
732 if (adev->wb.wb_obj == NULL) {
97407b63
AD
733 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
734 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
735 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
736 &adev->wb.wb_obj, &adev->wb.gpu_addr,
737 (void **)&adev->wb.wb);
d38ceaf9
AD
738 if (r) {
739 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
740 return r;
741 }
d38ceaf9
AD
742
743 adev->wb.num_wb = AMDGPU_MAX_WB;
744 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
745
746 /* clear wb memory */
73469585 747 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
748 }
749
750 return 0;
751}
752
753/**
131b4b36 754 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
755 *
756 * @adev: amdgpu_device pointer
757 * @wb: wb index
758 *
759 * Allocate a wb slot for use by the driver (all asics).
760 * Returns 0 on success or -EINVAL on failure.
761 */
131b4b36 762int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
763{
764 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 765
97407b63 766 if (offset < adev->wb.num_wb) {
7014285a 767 __set_bit(offset, adev->wb.used);
63ae07ca 768 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
769 return 0;
770 } else {
771 return -EINVAL;
772 }
773}
774
d38ceaf9 775/**
131b4b36 776 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
777 *
778 * @adev: amdgpu_device pointer
779 * @wb: wb index
780 *
781 * Free a wb slot allocated for use by the driver (all asics)
782 */
131b4b36 783void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 784{
73469585 785 wb >>= 3;
d38ceaf9 786 if (wb < adev->wb.num_wb)
73469585 787 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
788}
789
d6895ad3
CK
790/**
791 * amdgpu_device_resize_fb_bar - try to resize FB BAR
792 *
793 * @adev: amdgpu_device pointer
794 *
795 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
796 * to fail, but if any of the BARs is not accessible after the size we abort
797 * driver loading by returning -ENODEV.
798 */
799int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
800{
770d13b1 801 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 802 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
803 struct pci_bus *root;
804 struct resource *res;
805 unsigned i;
d6895ad3
CK
806 u16 cmd;
807 int r;
808
0c03b912 809 /* Bypass for VF */
810 if (amdgpu_sriov_vf(adev))
811 return 0;
812
31b8adab
CK
813 /* Check if the root BUS has 64bit memory resources */
814 root = adev->pdev->bus;
815 while (root->parent)
816 root = root->parent;
817
818 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 819 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
820 res->start > 0x100000000ull)
821 break;
822 }
823
824 /* Trying to resize is pointless without a root hub window above 4GB */
825 if (!res)
826 return 0;
827
d6895ad3
CK
828 /* Disable memory decoding while we change the BAR addresses and size */
829 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
830 pci_write_config_word(adev->pdev, PCI_COMMAND,
831 cmd & ~PCI_COMMAND_MEMORY);
832
833 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 834 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
835 if (adev->asic_type >= CHIP_BONAIRE)
836 pci_release_resource(adev->pdev, 2);
837
838 pci_release_resource(adev->pdev, 0);
839
840 r = pci_resize_resource(adev->pdev, 0, rbar_size);
841 if (r == -ENOSPC)
842 DRM_INFO("Not enough PCI address space for a large BAR.");
843 else if (r && r != -ENOTSUPP)
844 DRM_ERROR("Problem resizing BAR0 (%d).", r);
845
846 pci_assign_unassigned_bus_resources(adev->pdev->bus);
847
848 /* When the doorbell or fb BAR isn't available we have no chance of
849 * using the device.
850 */
06ec9070 851 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
852 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
853 return -ENODEV;
854
855 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
856
857 return 0;
858}
a05502e5 859
d38ceaf9
AD
860/*
861 * GPU helpers function.
862 */
863/**
39c640c0 864 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
865 *
866 * @adev: amdgpu_device pointer
867 *
c836fec5
JQ
868 * Check if the asic has been initialized (all asics) at driver startup
869 * or post is needed if hw reset is performed.
870 * Returns true if need or false if not.
d38ceaf9 871 */
39c640c0 872bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
873{
874 uint32_t reg;
875
bec86378
ML
876 if (amdgpu_sriov_vf(adev))
877 return false;
878
879 if (amdgpu_passthrough(adev)) {
1da2c326
ML
880 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
881 * some old smc fw still need driver do vPost otherwise gpu hang, while
882 * those smc fw version above 22.15 doesn't have this flaw, so we force
883 * vpost executed for smc version below 22.15
bec86378
ML
884 */
885 if (adev->asic_type == CHIP_FIJI) {
886 int err;
887 uint32_t fw_ver;
888 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
889 /* force vPost if error occured */
890 if (err)
891 return true;
892
893 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
894 if (fw_ver < 0x00160e00)
895 return true;
bec86378 896 }
bec86378 897 }
91fe77eb 898
899 if (adev->has_hw_reset) {
900 adev->has_hw_reset = false;
901 return true;
902 }
903
904 /* bios scratch used on CIK+ */
905 if (adev->asic_type >= CHIP_BONAIRE)
906 return amdgpu_atombios_scratch_need_asic_init(adev);
907
908 /* check MEM_SIZE for older asics */
909 reg = amdgpu_asic_get_config_memsize(adev);
910
911 if ((reg != 0) && (reg != 0xffffffff))
912 return false;
913
914 return true;
bec86378
ML
915}
916
d38ceaf9
AD
917/* if we get transitioned to only one device, take VGA back */
918/**
06ec9070 919 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
920 *
921 * @cookie: amdgpu_device pointer
922 * @state: enable/disable vga decode
923 *
924 * Enable/disable vga decode (all asics).
925 * Returns VGA resource flags.
926 */
06ec9070 927static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
928{
929 struct amdgpu_device *adev = cookie;
930 amdgpu_asic_set_vga_state(adev, state);
931 if (state)
932 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
933 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
934 else
935 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
936}
937
e3ecdffa
AD
938/**
939 * amdgpu_device_check_block_size - validate the vm block size
940 *
941 * @adev: amdgpu_device pointer
942 *
943 * Validates the vm block size specified via module parameter.
944 * The vm block size defines number of bits in page table versus page directory,
945 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
946 * page table and the remaining bits are in the page directory.
947 */
06ec9070 948static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
949{
950 /* defines number of bits in page table versus page directory,
951 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
952 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
953 if (amdgpu_vm_block_size == -1)
954 return;
a1adf8be 955
bab4fee7 956 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
957 dev_warn(adev->dev, "VM page table size (%d) too small\n",
958 amdgpu_vm_block_size);
97489129 959 amdgpu_vm_block_size = -1;
a1adf8be 960 }
a1adf8be
CZ
961}
962
e3ecdffa
AD
963/**
964 * amdgpu_device_check_vm_size - validate the vm size
965 *
966 * @adev: amdgpu_device pointer
967 *
968 * Validates the vm size in GB specified via module parameter.
969 * The VM size is the size of the GPU virtual memory space in GB.
970 */
06ec9070 971static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 972{
64dab074
AD
973 /* no need to check the default value */
974 if (amdgpu_vm_size == -1)
975 return;
976
83ca145d
ZJ
977 if (amdgpu_vm_size < 1) {
978 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
979 amdgpu_vm_size);
f3368128 980 amdgpu_vm_size = -1;
83ca145d 981 }
83ca145d
ZJ
982}
983
7951e376
RZ
984static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
985{
986 struct sysinfo si;
987 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
988 uint64_t total_memory;
989 uint64_t dram_size_seven_GB = 0x1B8000000;
990 uint64_t dram_size_three_GB = 0xB8000000;
991
992 if (amdgpu_smu_memory_pool_size == 0)
993 return;
994
995 if (!is_os_64) {
996 DRM_WARN("Not 64-bit OS, feature not supported\n");
997 goto def_value;
998 }
999 si_meminfo(&si);
1000 total_memory = (uint64_t)si.totalram * si.mem_unit;
1001
1002 if ((amdgpu_smu_memory_pool_size == 1) ||
1003 (amdgpu_smu_memory_pool_size == 2)) {
1004 if (total_memory < dram_size_three_GB)
1005 goto def_value1;
1006 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1007 (amdgpu_smu_memory_pool_size == 8)) {
1008 if (total_memory < dram_size_seven_GB)
1009 goto def_value1;
1010 } else {
1011 DRM_WARN("Smu memory pool size not supported\n");
1012 goto def_value;
1013 }
1014 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1015
1016 return;
1017
1018def_value1:
1019 DRM_WARN("No enough system memory\n");
1020def_value:
1021 adev->pm.smu_prv_buffer_size = 0;
1022}
1023
d38ceaf9 1024/**
06ec9070 1025 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1026 *
1027 * @adev: amdgpu_device pointer
1028 *
1029 * Validates certain module parameters and updates
1030 * the associated values used by the driver (all asics).
1031 */
912dfc84 1032static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1033{
912dfc84
EQ
1034 int ret = 0;
1035
5b011235
CZ
1036 if (amdgpu_sched_jobs < 4) {
1037 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1038 amdgpu_sched_jobs);
1039 amdgpu_sched_jobs = 4;
76117507 1040 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1041 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1042 amdgpu_sched_jobs);
1043 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1044 }
d38ceaf9 1045
83e74db6 1046 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1047 /* gart size must be greater or equal to 32M */
1048 dev_warn(adev->dev, "gart size (%d) too small\n",
1049 amdgpu_gart_size);
83e74db6 1050 amdgpu_gart_size = -1;
d38ceaf9
AD
1051 }
1052
36d38372 1053 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1054 /* gtt size must be greater or equal to 32M */
36d38372
CK
1055 dev_warn(adev->dev, "gtt size (%d) too small\n",
1056 amdgpu_gtt_size);
1057 amdgpu_gtt_size = -1;
d38ceaf9
AD
1058 }
1059
d07f14be
RH
1060 /* valid range is between 4 and 9 inclusive */
1061 if (amdgpu_vm_fragment_size != -1 &&
1062 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1063 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1064 amdgpu_vm_fragment_size = -1;
1065 }
1066
7951e376
RZ
1067 amdgpu_device_check_smu_prv_buffer_size(adev);
1068
06ec9070 1069 amdgpu_device_check_vm_size(adev);
d38ceaf9 1070
06ec9070 1071 amdgpu_device_check_block_size(adev);
6a7f76e7 1072
19aede77 1073 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1074
1075 return ret;
d38ceaf9
AD
1076}
1077
1078/**
1079 * amdgpu_switcheroo_set_state - set switcheroo state
1080 *
1081 * @pdev: pci dev pointer
1694467b 1082 * @state: vga_switcheroo state
d38ceaf9
AD
1083 *
1084 * Callback for the switcheroo driver. Suspends or resumes the
1085 * the asics before or after it is powered up using ACPI methods.
1086 */
1087static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1088{
1089 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1090 int r;
d38ceaf9 1091
31af062a 1092 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1093 return;
1094
1095 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1096 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1097 /* don't suspend or resume card normally */
1098 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1099
de185019
AD
1100 pci_set_power_state(dev->pdev, PCI_D0);
1101 pci_restore_state(dev->pdev);
1102 r = pci_enable_device(dev->pdev);
1103 if (r)
1104 DRM_WARN("pci_enable_device failed (%d)\n", r);
1105 amdgpu_device_resume(dev, true);
d38ceaf9 1106
d38ceaf9
AD
1107 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1108 drm_kms_helper_poll_enable(dev);
1109 } else {
7ca85295 1110 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1111 drm_kms_helper_poll_disable(dev);
1112 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1113 amdgpu_device_suspend(dev, true);
1114 pci_save_state(dev->pdev);
1115 /* Shut down the device */
1116 pci_disable_device(dev->pdev);
1117 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1118 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1119 }
1120}
1121
1122/**
1123 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1124 *
1125 * @pdev: pci dev pointer
1126 *
1127 * Callback for the switcheroo driver. Check of the switcheroo
1128 * state can be changed.
1129 * Returns true if the state can be changed, false if not.
1130 */
1131static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1132{
1133 struct drm_device *dev = pci_get_drvdata(pdev);
1134
1135 /*
1136 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1137 * locking inversion with the driver load path. And the access here is
1138 * completely racy anyway. So don't bother with locking for now.
1139 */
1140 return dev->open_count == 0;
1141}
1142
1143static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1144 .set_gpu_state = amdgpu_switcheroo_set_state,
1145 .reprobe = NULL,
1146 .can_switch = amdgpu_switcheroo_can_switch,
1147};
1148
e3ecdffa
AD
1149/**
1150 * amdgpu_device_ip_set_clockgating_state - set the CG state
1151 *
87e3f136 1152 * @dev: amdgpu_device pointer
e3ecdffa
AD
1153 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1154 * @state: clockgating state (gate or ungate)
1155 *
1156 * Sets the requested clockgating state for all instances of
1157 * the hardware IP specified.
1158 * Returns the error code from the last instance.
1159 */
43fa561f 1160int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1161 enum amd_ip_block_type block_type,
1162 enum amd_clockgating_state state)
d38ceaf9 1163{
43fa561f 1164 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1165 int i, r = 0;
1166
1167 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1168 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1169 continue;
c722865a
RZ
1170 if (adev->ip_blocks[i].version->type != block_type)
1171 continue;
1172 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1173 continue;
1174 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1175 (void *)adev, state);
1176 if (r)
1177 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1178 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1179 }
1180 return r;
1181}
1182
e3ecdffa
AD
1183/**
1184 * amdgpu_device_ip_set_powergating_state - set the PG state
1185 *
87e3f136 1186 * @dev: amdgpu_device pointer
e3ecdffa
AD
1187 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1188 * @state: powergating state (gate or ungate)
1189 *
1190 * Sets the requested powergating state for all instances of
1191 * the hardware IP specified.
1192 * Returns the error code from the last instance.
1193 */
43fa561f 1194int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1195 enum amd_ip_block_type block_type,
1196 enum amd_powergating_state state)
d38ceaf9 1197{
43fa561f 1198 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1199 int i, r = 0;
1200
1201 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1202 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1203 continue;
c722865a
RZ
1204 if (adev->ip_blocks[i].version->type != block_type)
1205 continue;
1206 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1207 continue;
1208 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1209 (void *)adev, state);
1210 if (r)
1211 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1212 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1213 }
1214 return r;
1215}
1216
e3ecdffa
AD
1217/**
1218 * amdgpu_device_ip_get_clockgating_state - get the CG state
1219 *
1220 * @adev: amdgpu_device pointer
1221 * @flags: clockgating feature flags
1222 *
1223 * Walks the list of IPs on the device and updates the clockgating
1224 * flags for each IP.
1225 * Updates @flags with the feature flags for each hardware IP where
1226 * clockgating is enabled.
1227 */
2990a1fc
AD
1228void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1229 u32 *flags)
6cb2d4e4
HR
1230{
1231 int i;
1232
1233 for (i = 0; i < adev->num_ip_blocks; i++) {
1234 if (!adev->ip_blocks[i].status.valid)
1235 continue;
1236 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1237 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1238 }
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_ip_wait_for_idle - wait for idle
1243 *
1244 * @adev: amdgpu_device pointer
1245 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1246 *
1247 * Waits for the request hardware IP to be idle.
1248 * Returns 0 for success or a negative error code on failure.
1249 */
2990a1fc
AD
1250int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1251 enum amd_ip_block_type block_type)
5dbbb60b
AD
1252{
1253 int i, r;
1254
1255 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1256 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1257 continue;
a1255107
AD
1258 if (adev->ip_blocks[i].version->type == block_type) {
1259 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1260 if (r)
1261 return r;
1262 break;
1263 }
1264 }
1265 return 0;
1266
1267}
1268
e3ecdffa
AD
1269/**
1270 * amdgpu_device_ip_is_idle - is the hardware IP idle
1271 *
1272 * @adev: amdgpu_device pointer
1273 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1274 *
1275 * Check if the hardware IP is idle or not.
1276 * Returns true if it the IP is idle, false if not.
1277 */
2990a1fc
AD
1278bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1279 enum amd_ip_block_type block_type)
5dbbb60b
AD
1280{
1281 int i;
1282
1283 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1284 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1285 continue;
a1255107
AD
1286 if (adev->ip_blocks[i].version->type == block_type)
1287 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1288 }
1289 return true;
1290
1291}
1292
e3ecdffa
AD
1293/**
1294 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1295 *
1296 * @adev: amdgpu_device pointer
87e3f136 1297 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1298 *
1299 * Returns a pointer to the hardware IP block structure
1300 * if it exists for the asic, otherwise NULL.
1301 */
2990a1fc
AD
1302struct amdgpu_ip_block *
1303amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1304 enum amd_ip_block_type type)
d38ceaf9
AD
1305{
1306 int i;
1307
1308 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1309 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1310 return &adev->ip_blocks[i];
1311
1312 return NULL;
1313}
1314
1315/**
2990a1fc 1316 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1317 *
1318 * @adev: amdgpu_device pointer
5fc3aeeb 1319 * @type: enum amd_ip_block_type
d38ceaf9
AD
1320 * @major: major version
1321 * @minor: minor version
1322 *
1323 * return 0 if equal or greater
1324 * return 1 if smaller or the ip_block doesn't exist
1325 */
2990a1fc
AD
1326int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1327 enum amd_ip_block_type type,
1328 u32 major, u32 minor)
d38ceaf9 1329{
2990a1fc 1330 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1331
a1255107
AD
1332 if (ip_block && ((ip_block->version->major > major) ||
1333 ((ip_block->version->major == major) &&
1334 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1335 return 0;
1336
1337 return 1;
1338}
1339
a1255107 1340/**
2990a1fc 1341 * amdgpu_device_ip_block_add
a1255107
AD
1342 *
1343 * @adev: amdgpu_device pointer
1344 * @ip_block_version: pointer to the IP to add
1345 *
1346 * Adds the IP block driver information to the collection of IPs
1347 * on the asic.
1348 */
2990a1fc
AD
1349int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1350 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1351{
1352 if (!ip_block_version)
1353 return -EINVAL;
1354
e966a725 1355 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1356 ip_block_version->funcs->name);
1357
a1255107
AD
1358 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1359
1360 return 0;
1361}
1362
e3ecdffa
AD
1363/**
1364 * amdgpu_device_enable_virtual_display - enable virtual display feature
1365 *
1366 * @adev: amdgpu_device pointer
1367 *
1368 * Enabled the virtual display feature if the user has enabled it via
1369 * the module parameter virtual_display. This feature provides a virtual
1370 * display hardware on headless boards or in virtualized environments.
1371 * This function parses and validates the configuration string specified by
1372 * the user and configues the virtual display configuration (number of
1373 * virtual connectors, crtcs, etc.) specified.
1374 */
483ef985 1375static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1376{
1377 adev->enable_virtual_display = false;
1378
1379 if (amdgpu_virtual_display) {
1380 struct drm_device *ddev = adev->ddev;
1381 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1382 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1383
1384 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1385 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1386 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1387 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1388 if (!strcmp("all", pciaddname)
1389 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1390 long num_crtc;
1391 int res = -1;
1392
9accf2fd 1393 adev->enable_virtual_display = true;
0f66356d
ED
1394
1395 if (pciaddname_tmp)
1396 res = kstrtol(pciaddname_tmp, 10,
1397 &num_crtc);
1398
1399 if (!res) {
1400 if (num_crtc < 1)
1401 num_crtc = 1;
1402 if (num_crtc > 6)
1403 num_crtc = 6;
1404 adev->mode_info.num_crtc = num_crtc;
1405 } else {
1406 adev->mode_info.num_crtc = 1;
1407 }
9accf2fd
ED
1408 break;
1409 }
1410 }
1411
0f66356d
ED
1412 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1413 amdgpu_virtual_display, pci_address_name,
1414 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1415
1416 kfree(pciaddstr);
1417 }
1418}
1419
e3ecdffa
AD
1420/**
1421 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1422 *
1423 * @adev: amdgpu_device pointer
1424 *
1425 * Parses the asic configuration parameters specified in the gpu info
1426 * firmware and makes them availale to the driver for use in configuring
1427 * the asic.
1428 * Returns 0 on success, -EINVAL on failure.
1429 */
e2a75f88
AD
1430static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1431{
e2a75f88
AD
1432 const char *chip_name;
1433 char fw_name[30];
1434 int err;
1435 const struct gpu_info_firmware_header_v1_0 *hdr;
1436
ab4fe3e1
HR
1437 adev->firmware.gpu_info_fw = NULL;
1438
e2a75f88
AD
1439 switch (adev->asic_type) {
1440 case CHIP_TOPAZ:
1441 case CHIP_TONGA:
1442 case CHIP_FIJI:
e2a75f88 1443 case CHIP_POLARIS10:
cc07f18d 1444 case CHIP_POLARIS11:
e2a75f88 1445 case CHIP_POLARIS12:
cc07f18d 1446 case CHIP_VEGAM:
e2a75f88
AD
1447 case CHIP_CARRIZO:
1448 case CHIP_STONEY:
1449#ifdef CONFIG_DRM_AMDGPU_SI
1450 case CHIP_VERDE:
1451 case CHIP_TAHITI:
1452 case CHIP_PITCAIRN:
1453 case CHIP_OLAND:
1454 case CHIP_HAINAN:
1455#endif
1456#ifdef CONFIG_DRM_AMDGPU_CIK
1457 case CHIP_BONAIRE:
1458 case CHIP_HAWAII:
1459 case CHIP_KAVERI:
1460 case CHIP_KABINI:
1461 case CHIP_MULLINS:
1462#endif
27c0bc71 1463 case CHIP_VEGA20:
e2a75f88
AD
1464 default:
1465 return 0;
1466 case CHIP_VEGA10:
1467 chip_name = "vega10";
1468 break;
3f76dced
AD
1469 case CHIP_VEGA12:
1470 chip_name = "vega12";
1471 break;
2d2e5e7e 1472 case CHIP_RAVEN:
54c4d17e
FX
1473 if (adev->rev_id >= 8)
1474 chip_name = "raven2";
741deade
AD
1475 else if (adev->pdev->device == 0x15d8)
1476 chip_name = "picasso";
54c4d17e
FX
1477 else
1478 chip_name = "raven";
2d2e5e7e 1479 break;
65e60f6e
LM
1480 case CHIP_ARCTURUS:
1481 chip_name = "arcturus";
1482 break;
b51a26a0
HR
1483 case CHIP_RENOIR:
1484 chip_name = "renoir";
1485 break;
23c6268e
HR
1486 case CHIP_NAVI10:
1487 chip_name = "navi10";
1488 break;
ed42cfe1
XY
1489 case CHIP_NAVI14:
1490 chip_name = "navi14";
1491 break;
42b325e5
XY
1492 case CHIP_NAVI12:
1493 chip_name = "navi12";
1494 break;
e2a75f88
AD
1495 }
1496
1497 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1498 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1499 if (err) {
1500 dev_err(adev->dev,
1501 "Failed to load gpu_info firmware \"%s\"\n",
1502 fw_name);
1503 goto out;
1504 }
ab4fe3e1 1505 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1506 if (err) {
1507 dev_err(adev->dev,
1508 "Failed to validate gpu_info firmware \"%s\"\n",
1509 fw_name);
1510 goto out;
1511 }
1512
ab4fe3e1 1513 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1514 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1515
1516 switch (hdr->version_major) {
1517 case 1:
1518 {
1519 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1520 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1521 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1522
ec51d3fa
XY
1523 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1524 goto parse_soc_bounding_box;
1525
b5ab16bf
AD
1526 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1527 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1528 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1529 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1530 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1531 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1532 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1533 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1534 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1535 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1536 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1537 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1538 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1539 adev->gfx.cu_info.max_waves_per_simd =
1540 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1541 adev->gfx.cu_info.max_scratch_slots_per_cu =
1542 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1543 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1544 if (hdr->version_minor >= 1) {
35c2e910
HZ
1545 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1546 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1547 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1548 adev->gfx.config.num_sc_per_sh =
1549 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1550 adev->gfx.config.num_packer_per_sc =
1551 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1552 }
ec51d3fa
XY
1553
1554parse_soc_bounding_box:
ec51d3fa
XY
1555 /*
1556 * soc bounding box info is not integrated in disocovery table,
1557 * we always need to parse it from gpu info firmware.
1558 */
48321c3d
HW
1559 if (hdr->version_minor == 2) {
1560 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1561 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1562 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1563 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1564 }
e2a75f88
AD
1565 break;
1566 }
1567 default:
1568 dev_err(adev->dev,
1569 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1570 err = -EINVAL;
1571 goto out;
1572 }
1573out:
e2a75f88
AD
1574 return err;
1575}
1576
e3ecdffa
AD
1577/**
1578 * amdgpu_device_ip_early_init - run early init for hardware IPs
1579 *
1580 * @adev: amdgpu_device pointer
1581 *
1582 * Early initialization pass for hardware IPs. The hardware IPs that make
1583 * up each asic are discovered each IP's early_init callback is run. This
1584 * is the first stage in initializing the asic.
1585 * Returns 0 on success, negative error code on failure.
1586 */
06ec9070 1587static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1588{
aaa36a97 1589 int i, r;
d38ceaf9 1590
483ef985 1591 amdgpu_device_enable_virtual_display(adev);
a6be7570 1592
d38ceaf9 1593 switch (adev->asic_type) {
aaa36a97
AD
1594 case CHIP_TOPAZ:
1595 case CHIP_TONGA:
48299f95 1596 case CHIP_FIJI:
2cc0c0b5 1597 case CHIP_POLARIS10:
32cc7e53 1598 case CHIP_POLARIS11:
c4642a47 1599 case CHIP_POLARIS12:
32cc7e53 1600 case CHIP_VEGAM:
aaa36a97 1601 case CHIP_CARRIZO:
39bb0c92
SL
1602 case CHIP_STONEY:
1603 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1604 adev->family = AMDGPU_FAMILY_CZ;
1605 else
1606 adev->family = AMDGPU_FAMILY_VI;
1607
1608 r = vi_set_ip_blocks(adev);
1609 if (r)
1610 return r;
1611 break;
33f34802
KW
1612#ifdef CONFIG_DRM_AMDGPU_SI
1613 case CHIP_VERDE:
1614 case CHIP_TAHITI:
1615 case CHIP_PITCAIRN:
1616 case CHIP_OLAND:
1617 case CHIP_HAINAN:
295d0daf 1618 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1619 r = si_set_ip_blocks(adev);
1620 if (r)
1621 return r;
1622 break;
1623#endif
a2e73f56
AD
1624#ifdef CONFIG_DRM_AMDGPU_CIK
1625 case CHIP_BONAIRE:
1626 case CHIP_HAWAII:
1627 case CHIP_KAVERI:
1628 case CHIP_KABINI:
1629 case CHIP_MULLINS:
1630 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1631 adev->family = AMDGPU_FAMILY_CI;
1632 else
1633 adev->family = AMDGPU_FAMILY_KV;
1634
1635 r = cik_set_ip_blocks(adev);
1636 if (r)
1637 return r;
1638 break;
1639#endif
e48a3cd9
AD
1640 case CHIP_VEGA10:
1641 case CHIP_VEGA12:
e4bd8170 1642 case CHIP_VEGA20:
e48a3cd9 1643 case CHIP_RAVEN:
61cf44c1 1644 case CHIP_ARCTURUS:
b51a26a0
HR
1645 case CHIP_RENOIR:
1646 if (adev->asic_type == CHIP_RAVEN ||
1647 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1648 adev->family = AMDGPU_FAMILY_RV;
1649 else
1650 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1651
1652 r = soc15_set_ip_blocks(adev);
1653 if (r)
1654 return r;
1655 break;
0a5b8c7b 1656 case CHIP_NAVI10:
7ecb5cd4 1657 case CHIP_NAVI14:
4808cf9c 1658 case CHIP_NAVI12:
0a5b8c7b
HR
1659 adev->family = AMDGPU_FAMILY_NV;
1660
1661 r = nv_set_ip_blocks(adev);
1662 if (r)
1663 return r;
1664 break;
d38ceaf9
AD
1665 default:
1666 /* FIXME: not supported yet */
1667 return -EINVAL;
1668 }
1669
e2a75f88
AD
1670 r = amdgpu_device_parse_gpu_info_fw(adev);
1671 if (r)
1672 return r;
1673
ec51d3fa
XY
1674 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1675 amdgpu_discovery_get_gfx_info(adev);
1676
1884734a 1677 amdgpu_amdkfd_device_probe(adev);
1678
3149d9da
XY
1679 if (amdgpu_sriov_vf(adev)) {
1680 r = amdgpu_virt_request_full_gpu(adev, true);
1681 if (r)
5ffa61c1 1682 return -EAGAIN;
3149d9da
XY
1683 }
1684
3b94fb10 1685 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1686 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1687 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1688
d38ceaf9
AD
1689 for (i = 0; i < adev->num_ip_blocks; i++) {
1690 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1691 DRM_ERROR("disabled ip block: %d <%s>\n",
1692 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1693 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1694 } else {
a1255107
AD
1695 if (adev->ip_blocks[i].version->funcs->early_init) {
1696 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1697 if (r == -ENOENT) {
a1255107 1698 adev->ip_blocks[i].status.valid = false;
2c1a2784 1699 } else if (r) {
a1255107
AD
1700 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1701 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1702 return r;
2c1a2784 1703 } else {
a1255107 1704 adev->ip_blocks[i].status.valid = true;
2c1a2784 1705 }
974e6b64 1706 } else {
a1255107 1707 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1708 }
d38ceaf9 1709 }
21a249ca
AD
1710 /* get the vbios after the asic_funcs are set up */
1711 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1712 /* Read BIOS */
1713 if (!amdgpu_get_bios(adev))
1714 return -EINVAL;
1715
1716 r = amdgpu_atombios_init(adev);
1717 if (r) {
1718 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1719 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1720 return r;
1721 }
1722 }
d38ceaf9
AD
1723 }
1724
395d1fb9
NH
1725 adev->cg_flags &= amdgpu_cg_mask;
1726 adev->pg_flags &= amdgpu_pg_mask;
1727
d38ceaf9
AD
1728 return 0;
1729}
1730
0a4f2520
RZ
1731static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1732{
1733 int i, r;
1734
1735 for (i = 0; i < adev->num_ip_blocks; i++) {
1736 if (!adev->ip_blocks[i].status.sw)
1737 continue;
1738 if (adev->ip_blocks[i].status.hw)
1739 continue;
1740 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1741 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1742 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1743 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1744 if (r) {
1745 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1746 adev->ip_blocks[i].version->funcs->name, r);
1747 return r;
1748 }
1749 adev->ip_blocks[i].status.hw = true;
1750 }
1751 }
1752
1753 return 0;
1754}
1755
1756static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1757{
1758 int i, r;
1759
1760 for (i = 0; i < adev->num_ip_blocks; i++) {
1761 if (!adev->ip_blocks[i].status.sw)
1762 continue;
1763 if (adev->ip_blocks[i].status.hw)
1764 continue;
1765 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1766 if (r) {
1767 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1768 adev->ip_blocks[i].version->funcs->name, r);
1769 return r;
1770 }
1771 adev->ip_blocks[i].status.hw = true;
1772 }
1773
1774 return 0;
1775}
1776
7a3e0bb2
RZ
1777static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1778{
1779 int r = 0;
1780 int i;
80f41f84 1781 uint32_t smu_version;
7a3e0bb2
RZ
1782
1783 if (adev->asic_type >= CHIP_VEGA10) {
1784 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1785 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1786 continue;
1787
1788 /* no need to do the fw loading again if already done*/
1789 if (adev->ip_blocks[i].status.hw == true)
1790 break;
1791
1792 if (adev->in_gpu_reset || adev->in_suspend) {
1793 r = adev->ip_blocks[i].version->funcs->resume(adev);
1794 if (r) {
1795 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1796 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1797 return r;
1798 }
1799 } else {
1800 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1801 if (r) {
1802 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1803 adev->ip_blocks[i].version->funcs->name, r);
1804 return r;
7a3e0bb2 1805 }
7a3e0bb2 1806 }
482f0e53
ML
1807
1808 adev->ip_blocks[i].status.hw = true;
1809 break;
7a3e0bb2
RZ
1810 }
1811 }
482f0e53 1812
8973d9ec
ED
1813 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1814 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1815
80f41f84 1816 return r;
7a3e0bb2
RZ
1817}
1818
e3ecdffa
AD
1819/**
1820 * amdgpu_device_ip_init - run init for hardware IPs
1821 *
1822 * @adev: amdgpu_device pointer
1823 *
1824 * Main initialization pass for hardware IPs. The list of all the hardware
1825 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1826 * are run. sw_init initializes the software state associated with each IP
1827 * and hw_init initializes the hardware associated with each IP.
1828 * Returns 0 on success, negative error code on failure.
1829 */
06ec9070 1830static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1831{
1832 int i, r;
1833
c030f2e4 1834 r = amdgpu_ras_init(adev);
1835 if (r)
1836 return r;
1837
d38ceaf9 1838 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1839 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1840 continue;
a1255107 1841 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1842 if (r) {
a1255107
AD
1843 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1844 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1845 goto init_failed;
2c1a2784 1846 }
a1255107 1847 adev->ip_blocks[i].status.sw = true;
bfca0289 1848
d38ceaf9 1849 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1850 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1851 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1852 if (r) {
1853 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1854 goto init_failed;
2c1a2784 1855 }
a1255107 1856 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1857 if (r) {
1858 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1859 goto init_failed;
2c1a2784 1860 }
06ec9070 1861 r = amdgpu_device_wb_init(adev);
2c1a2784 1862 if (r) {
06ec9070 1863 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1864 goto init_failed;
2c1a2784 1865 }
a1255107 1866 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1867
1868 /* right after GMC hw init, we create CSA */
f92d5c61 1869 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1870 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1871 AMDGPU_GEM_DOMAIN_VRAM,
1872 AMDGPU_CSA_SIZE);
2493664f
ML
1873 if (r) {
1874 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1875 goto init_failed;
2493664f
ML
1876 }
1877 }
d38ceaf9
AD
1878 }
1879 }
1880
c9ffa427
YT
1881 if (amdgpu_sriov_vf(adev))
1882 amdgpu_virt_init_data_exchange(adev);
1883
533aed27
AG
1884 r = amdgpu_ib_pool_init(adev);
1885 if (r) {
1886 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1887 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1888 goto init_failed;
1889 }
1890
c8963ea4
RZ
1891 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1892 if (r)
72d3f592 1893 goto init_failed;
0a4f2520
RZ
1894
1895 r = amdgpu_device_ip_hw_init_phase1(adev);
1896 if (r)
72d3f592 1897 goto init_failed;
0a4f2520 1898
7a3e0bb2
RZ
1899 r = amdgpu_device_fw_loading(adev);
1900 if (r)
72d3f592 1901 goto init_failed;
7a3e0bb2 1902
0a4f2520
RZ
1903 r = amdgpu_device_ip_hw_init_phase2(adev);
1904 if (r)
72d3f592 1905 goto init_failed;
d38ceaf9 1906
121a2bc6
AG
1907 /*
1908 * retired pages will be loaded from eeprom and reserved here,
1909 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1910 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1911 * for I2C communication which only true at this point.
1912 * recovery_init may fail, but it can free all resources allocated by
1913 * itself and its failure should not stop amdgpu init process.
1914 *
1915 * Note: theoretically, this should be called before all vram allocations
1916 * to protect retired page from abusing
1917 */
1918 amdgpu_ras_recovery_init(adev);
1919
3e2e2ab5
HZ
1920 if (adev->gmc.xgmi.num_physical_nodes > 1)
1921 amdgpu_xgmi_add_device(adev);
1884734a 1922 amdgpu_amdkfd_device_init(adev);
c6332b97 1923
72d3f592 1924init_failed:
c9ffa427 1925 if (amdgpu_sriov_vf(adev))
c6332b97 1926 amdgpu_virt_release_full_gpu(adev, true);
1927
72d3f592 1928 return r;
d38ceaf9
AD
1929}
1930
e3ecdffa
AD
1931/**
1932 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1933 *
1934 * @adev: amdgpu_device pointer
1935 *
1936 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1937 * this function before a GPU reset. If the value is retained after a
1938 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1939 */
06ec9070 1940static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1941{
1942 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1943}
1944
e3ecdffa
AD
1945/**
1946 * amdgpu_device_check_vram_lost - check if vram is valid
1947 *
1948 * @adev: amdgpu_device pointer
1949 *
1950 * Checks the reset magic value written to the gart pointer in VRAM.
1951 * The driver calls this after a GPU reset to see if the contents of
1952 * VRAM is lost or now.
1953 * returns true if vram is lost, false if not.
1954 */
06ec9070 1955static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1956{
1957 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1958 AMDGPU_RESET_MAGIC_NUM);
1959}
1960
e3ecdffa 1961/**
1112a46b 1962 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1963 *
1964 * @adev: amdgpu_device pointer
b8b72130 1965 * @state: clockgating state (gate or ungate)
e3ecdffa 1966 *
e3ecdffa 1967 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1968 * set_clockgating_state callbacks are run.
1969 * Late initialization pass enabling clockgating for hardware IPs.
1970 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1971 * Returns 0 on success, negative error code on failure.
1972 */
fdd34271 1973
1112a46b
RZ
1974static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1975 enum amd_clockgating_state state)
d38ceaf9 1976{
1112a46b 1977 int i, j, r;
d38ceaf9 1978
4a2ba394
SL
1979 if (amdgpu_emu_mode == 1)
1980 return 0;
1981
1112a46b
RZ
1982 for (j = 0; j < adev->num_ip_blocks; j++) {
1983 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1984 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1985 continue;
4a446d55 1986 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1987 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1988 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1989 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 1990 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 1991 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1992 /* enable clockgating to save power */
a1255107 1993 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1994 state);
4a446d55
AD
1995 if (r) {
1996 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1997 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1998 return r;
1999 }
b0b00ff1 2000 }
d38ceaf9 2001 }
06b18f61 2002
c9f96fd5
RZ
2003 return 0;
2004}
2005
1112a46b 2006static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2007{
1112a46b 2008 int i, j, r;
06b18f61 2009
c9f96fd5
RZ
2010 if (amdgpu_emu_mode == 1)
2011 return 0;
2012
1112a46b
RZ
2013 for (j = 0; j < adev->num_ip_blocks; j++) {
2014 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2015 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2016 continue;
2017 /* skip CG for VCE/UVD, it's handled specially */
2018 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2019 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2021 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2022 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2023 /* enable powergating to save power */
2024 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2025 state);
c9f96fd5
RZ
2026 if (r) {
2027 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2028 adev->ip_blocks[i].version->funcs->name, r);
2029 return r;
2030 }
2031 }
2032 }
2dc80b00
S
2033 return 0;
2034}
2035
beff74bc
AD
2036static int amdgpu_device_enable_mgpu_fan_boost(void)
2037{
2038 struct amdgpu_gpu_instance *gpu_ins;
2039 struct amdgpu_device *adev;
2040 int i, ret = 0;
2041
2042 mutex_lock(&mgpu_info.mutex);
2043
2044 /*
2045 * MGPU fan boost feature should be enabled
2046 * only when there are two or more dGPUs in
2047 * the system
2048 */
2049 if (mgpu_info.num_dgpu < 2)
2050 goto out;
2051
2052 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2053 gpu_ins = &(mgpu_info.gpu_ins[i]);
2054 adev = gpu_ins->adev;
2055 if (!(adev->flags & AMD_IS_APU) &&
2056 !gpu_ins->mgpu_fan_enabled &&
2057 adev->powerplay.pp_funcs &&
2058 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2059 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2060 if (ret)
2061 break;
2062
2063 gpu_ins->mgpu_fan_enabled = 1;
2064 }
2065 }
2066
2067out:
2068 mutex_unlock(&mgpu_info.mutex);
2069
2070 return ret;
2071}
2072
e3ecdffa
AD
2073/**
2074 * amdgpu_device_ip_late_init - run late init for hardware IPs
2075 *
2076 * @adev: amdgpu_device pointer
2077 *
2078 * Late initialization pass for hardware IPs. The list of all the hardware
2079 * IPs that make up the asic is walked and the late_init callbacks are run.
2080 * late_init covers any special initialization that an IP requires
2081 * after all of the have been initialized or something that needs to happen
2082 * late in the init process.
2083 * Returns 0 on success, negative error code on failure.
2084 */
06ec9070 2085static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2086{
60599a03 2087 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2088 int i = 0, r;
2089
2090 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2091 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2092 continue;
2093 if (adev->ip_blocks[i].version->funcs->late_init) {
2094 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2095 if (r) {
2096 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2097 adev->ip_blocks[i].version->funcs->name, r);
2098 return r;
2099 }
2dc80b00 2100 }
73f847db 2101 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2102 }
2103
1112a46b
RZ
2104 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2105 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2106
06ec9070 2107 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2108
beff74bc
AD
2109 r = amdgpu_device_enable_mgpu_fan_boost();
2110 if (r)
2111 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2112
60599a03
EQ
2113
2114 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2115 mutex_lock(&mgpu_info.mutex);
2116
2117 /*
2118 * Reset device p-state to low as this was booted with high.
2119 *
2120 * This should be performed only after all devices from the same
2121 * hive get initialized.
2122 *
2123 * However, it's unknown how many device in the hive in advance.
2124 * As this is counted one by one during devices initializations.
2125 *
2126 * So, we wait for all XGMI interlinked devices initialized.
2127 * This may bring some delays as those devices may come from
2128 * different hives. But that should be OK.
2129 */
2130 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2131 for (i = 0; i < mgpu_info.num_gpu; i++) {
2132 gpu_instance = &(mgpu_info.gpu_ins[i]);
2133 if (gpu_instance->adev->flags & AMD_IS_APU)
2134 continue;
2135
2136 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2137 if (r) {
2138 DRM_ERROR("pstate setting failed (%d).\n", r);
2139 break;
2140 }
2141 }
2142 }
2143
2144 mutex_unlock(&mgpu_info.mutex);
2145 }
2146
d38ceaf9
AD
2147 return 0;
2148}
2149
e3ecdffa
AD
2150/**
2151 * amdgpu_device_ip_fini - run fini for hardware IPs
2152 *
2153 * @adev: amdgpu_device pointer
2154 *
2155 * Main teardown pass for hardware IPs. The list of all the hardware
2156 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2157 * are run. hw_fini tears down the hardware associated with each IP
2158 * and sw_fini tears down any software state associated with each IP.
2159 * Returns 0 on success, negative error code on failure.
2160 */
06ec9070 2161static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2162{
2163 int i, r;
2164
c030f2e4 2165 amdgpu_ras_pre_fini(adev);
2166
a82400b5
AG
2167 if (adev->gmc.xgmi.num_physical_nodes > 1)
2168 amdgpu_xgmi_remove_device(adev);
2169
1884734a 2170 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2171
2172 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2173 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2174
3e96dbfd
AD
2175 /* need to disable SMC first */
2176 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2177 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2178 continue;
fdd34271 2179 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2180 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2181 /* XXX handle errors */
2182 if (r) {
2183 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2184 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2185 }
a1255107 2186 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2187 break;
2188 }
2189 }
2190
d38ceaf9 2191 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2192 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2193 continue;
8201a67a 2194
a1255107 2195 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2196 /* XXX handle errors */
2c1a2784 2197 if (r) {
a1255107
AD
2198 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2199 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2200 }
8201a67a 2201
a1255107 2202 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2203 }
2204
9950cda2 2205
d38ceaf9 2206 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2207 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2208 continue;
c12aba3a
ML
2209
2210 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2211 amdgpu_ucode_free_bo(adev);
1e256e27 2212 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2213 amdgpu_device_wb_fini(adev);
2214 amdgpu_device_vram_scratch_fini(adev);
533aed27 2215 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2216 }
2217
a1255107 2218 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2219 /* XXX handle errors */
2c1a2784 2220 if (r) {
a1255107
AD
2221 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2222 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2223 }
a1255107
AD
2224 adev->ip_blocks[i].status.sw = false;
2225 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2226 }
2227
a6dcfd9c 2228 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2229 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2230 continue;
a1255107
AD
2231 if (adev->ip_blocks[i].version->funcs->late_fini)
2232 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2233 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2234 }
2235
c030f2e4 2236 amdgpu_ras_fini(adev);
2237
030308fc 2238 if (amdgpu_sriov_vf(adev))
24136135
ML
2239 if (amdgpu_virt_release_full_gpu(adev, false))
2240 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2241
d38ceaf9
AD
2242 return 0;
2243}
2244
e3ecdffa 2245/**
beff74bc 2246 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2247 *
1112a46b 2248 * @work: work_struct.
e3ecdffa 2249 */
beff74bc 2250static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2251{
2252 struct amdgpu_device *adev =
beff74bc 2253 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2254 int r;
2255
2256 r = amdgpu_ib_ring_tests(adev);
2257 if (r)
2258 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2259}
2260
1e317b99
RZ
2261static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2262{
2263 struct amdgpu_device *adev =
2264 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2265
2266 mutex_lock(&adev->gfx.gfx_off_mutex);
2267 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2268 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2269 adev->gfx.gfx_off_state = true;
2270 }
2271 mutex_unlock(&adev->gfx.gfx_off_mutex);
2272}
2273
e3ecdffa 2274/**
e7854a03 2275 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2276 *
2277 * @adev: amdgpu_device pointer
2278 *
2279 * Main suspend function for hardware IPs. The list of all the hardware
2280 * IPs that make up the asic is walked, clockgating is disabled and the
2281 * suspend callbacks are run. suspend puts the hardware and software state
2282 * in each IP into a state suitable for suspend.
2283 * Returns 0 on success, negative error code on failure.
2284 */
e7854a03
AD
2285static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2286{
2287 int i, r;
2288
05df1f01 2289 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2290 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2291
e7854a03
AD
2292 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2293 if (!adev->ip_blocks[i].status.valid)
2294 continue;
2295 /* displays are handled separately */
2296 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2297 /* XXX handle errors */
2298 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2299 /* XXX handle errors */
2300 if (r) {
2301 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2302 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2303 return r;
e7854a03 2304 }
482f0e53 2305 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2306 }
2307 }
2308
e7854a03
AD
2309 return 0;
2310}
2311
2312/**
2313 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2314 *
2315 * @adev: amdgpu_device pointer
2316 *
2317 * Main suspend function for hardware IPs. The list of all the hardware
2318 * IPs that make up the asic is walked, clockgating is disabled and the
2319 * suspend callbacks are run. suspend puts the hardware and software state
2320 * in each IP into a state suitable for suspend.
2321 * Returns 0 on success, negative error code on failure.
2322 */
2323static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2324{
2325 int i, r;
2326
2327 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2328 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2329 continue;
e7854a03
AD
2330 /* displays are handled in phase1 */
2331 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2332 continue;
bff77e86
LM
2333 /* PSP lost connection when err_event_athub occurs */
2334 if (amdgpu_ras_intr_triggered() &&
2335 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2336 adev->ip_blocks[i].status.hw = false;
2337 continue;
2338 }
d38ceaf9 2339 /* XXX handle errors */
a1255107 2340 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2341 /* XXX handle errors */
2c1a2784 2342 if (r) {
a1255107
AD
2343 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2344 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2345 }
876923fb 2346 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2347 /* handle putting the SMC in the appropriate state */
2348 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2349 if (is_support_sw_smu(adev)) {
0e0b89c0 2350 r = smu_set_mp1_state(&adev->smu, adev->mp1_state);
a3a09142 2351 } else if (adev->powerplay.pp_funcs &&
482f0e53 2352 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2353 r = adev->powerplay.pp_funcs->set_mp1_state(
2354 adev->powerplay.pp_handle,
2355 adev->mp1_state);
0e0b89c0
EQ
2356 }
2357 if (r) {
2358 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2359 adev->mp1_state, r);
2360 return r;
a3a09142
AD
2361 }
2362 }
b5507c7e
AG
2363
2364 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2365 }
2366
2367 return 0;
2368}
2369
e7854a03
AD
2370/**
2371 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2372 *
2373 * @adev: amdgpu_device pointer
2374 *
2375 * Main suspend function for hardware IPs. The list of all the hardware
2376 * IPs that make up the asic is walked, clockgating is disabled and the
2377 * suspend callbacks are run. suspend puts the hardware and software state
2378 * in each IP into a state suitable for suspend.
2379 * Returns 0 on success, negative error code on failure.
2380 */
2381int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2382{
2383 int r;
2384
e7819644
YT
2385 if (amdgpu_sriov_vf(adev))
2386 amdgpu_virt_request_full_gpu(adev, false);
2387
e7854a03
AD
2388 r = amdgpu_device_ip_suspend_phase1(adev);
2389 if (r)
2390 return r;
2391 r = amdgpu_device_ip_suspend_phase2(adev);
2392
e7819644
YT
2393 if (amdgpu_sriov_vf(adev))
2394 amdgpu_virt_release_full_gpu(adev, false);
2395
e7854a03
AD
2396 return r;
2397}
2398
06ec9070 2399static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2400{
2401 int i, r;
2402
2cb681b6
ML
2403 static enum amd_ip_block_type ip_order[] = {
2404 AMD_IP_BLOCK_TYPE_GMC,
2405 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2406 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2407 AMD_IP_BLOCK_TYPE_IH,
2408 };
a90ad3c2 2409
2cb681b6
ML
2410 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2411 int j;
2412 struct amdgpu_ip_block *block;
a90ad3c2 2413
2cb681b6
ML
2414 for (j = 0; j < adev->num_ip_blocks; j++) {
2415 block = &adev->ip_blocks[j];
2416
482f0e53 2417 block->status.hw = false;
2cb681b6
ML
2418 if (block->version->type != ip_order[i] ||
2419 !block->status.valid)
2420 continue;
2421
2422 r = block->version->funcs->hw_init(adev);
0aaeefcc 2423 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2424 if (r)
2425 return r;
482f0e53 2426 block->status.hw = true;
a90ad3c2
ML
2427 }
2428 }
2429
2430 return 0;
2431}
2432
06ec9070 2433static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2434{
2435 int i, r;
2436
2cb681b6
ML
2437 static enum amd_ip_block_type ip_order[] = {
2438 AMD_IP_BLOCK_TYPE_SMC,
2439 AMD_IP_BLOCK_TYPE_DCE,
2440 AMD_IP_BLOCK_TYPE_GFX,
2441 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2442 AMD_IP_BLOCK_TYPE_UVD,
2443 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2444 };
a90ad3c2 2445
2cb681b6
ML
2446 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2447 int j;
2448 struct amdgpu_ip_block *block;
a90ad3c2 2449
2cb681b6
ML
2450 for (j = 0; j < adev->num_ip_blocks; j++) {
2451 block = &adev->ip_blocks[j];
2452
2453 if (block->version->type != ip_order[i] ||
482f0e53
ML
2454 !block->status.valid ||
2455 block->status.hw)
2cb681b6
ML
2456 continue;
2457
2458 r = block->version->funcs->hw_init(adev);
0aaeefcc 2459 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2460 if (r)
2461 return r;
482f0e53 2462 block->status.hw = true;
a90ad3c2
ML
2463 }
2464 }
2465
2466 return 0;
2467}
2468
e3ecdffa
AD
2469/**
2470 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2471 *
2472 * @adev: amdgpu_device pointer
2473 *
2474 * First resume function for hardware IPs. The list of all the hardware
2475 * IPs that make up the asic is walked and the resume callbacks are run for
2476 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2477 * after a suspend and updates the software state as necessary. This
2478 * function is also used for restoring the GPU after a GPU reset.
2479 * Returns 0 on success, negative error code on failure.
2480 */
06ec9070 2481static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2482{
2483 int i, r;
2484
a90ad3c2 2485 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2486 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2487 continue;
a90ad3c2 2488 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2489 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2490 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2491
fcf0649f
CZ
2492 r = adev->ip_blocks[i].version->funcs->resume(adev);
2493 if (r) {
2494 DRM_ERROR("resume of IP block <%s> failed %d\n",
2495 adev->ip_blocks[i].version->funcs->name, r);
2496 return r;
2497 }
482f0e53 2498 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2499 }
2500 }
2501
2502 return 0;
2503}
2504
e3ecdffa
AD
2505/**
2506 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2507 *
2508 * @adev: amdgpu_device pointer
2509 *
2510 * First resume function for hardware IPs. The list of all the hardware
2511 * IPs that make up the asic is walked and the resume callbacks are run for
2512 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2513 * functional state after a suspend and updates the software state as
2514 * necessary. This function is also used for restoring the GPU after a GPU
2515 * reset.
2516 * Returns 0 on success, negative error code on failure.
2517 */
06ec9070 2518static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2519{
2520 int i, r;
2521
2522 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2523 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2524 continue;
fcf0649f 2525 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2526 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2527 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2528 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2529 continue;
a1255107 2530 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2531 if (r) {
a1255107
AD
2532 DRM_ERROR("resume of IP block <%s> failed %d\n",
2533 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2534 return r;
2c1a2784 2535 }
482f0e53 2536 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2537 }
2538
2539 return 0;
2540}
2541
e3ecdffa
AD
2542/**
2543 * amdgpu_device_ip_resume - run resume for hardware IPs
2544 *
2545 * @adev: amdgpu_device pointer
2546 *
2547 * Main resume function for hardware IPs. The hardware IPs
2548 * are split into two resume functions because they are
2549 * are also used in in recovering from a GPU reset and some additional
2550 * steps need to be take between them. In this case (S3/S4) they are
2551 * run sequentially.
2552 * Returns 0 on success, negative error code on failure.
2553 */
06ec9070 2554static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2555{
2556 int r;
2557
06ec9070 2558 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2559 if (r)
2560 return r;
7a3e0bb2
RZ
2561
2562 r = amdgpu_device_fw_loading(adev);
2563 if (r)
2564 return r;
2565
06ec9070 2566 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2567
2568 return r;
2569}
2570
e3ecdffa
AD
2571/**
2572 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2573 *
2574 * @adev: amdgpu_device pointer
2575 *
2576 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2577 */
4e99a44e 2578static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2579{
6867e1b5
ML
2580 if (amdgpu_sriov_vf(adev)) {
2581 if (adev->is_atom_fw) {
2582 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2583 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2584 } else {
2585 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2586 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2587 }
2588
2589 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2590 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2591 }
048765ad
AR
2592}
2593
e3ecdffa
AD
2594/**
2595 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2596 *
2597 * @asic_type: AMD asic type
2598 *
2599 * Check if there is DC (new modesetting infrastructre) support for an asic.
2600 * returns true if DC has support, false if not.
2601 */
4562236b
HW
2602bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2603{
2604 switch (asic_type) {
2605#if defined(CONFIG_DRM_AMD_DC)
2606 case CHIP_BONAIRE:
0d6fbccb 2607 case CHIP_KAVERI:
367e6687
AD
2608 case CHIP_KABINI:
2609 case CHIP_MULLINS:
d9fda248
HW
2610 /*
2611 * We have systems in the wild with these ASICs that require
2612 * LVDS and VGA support which is not supported with DC.
2613 *
2614 * Fallback to the non-DC driver here by default so as not to
2615 * cause regressions.
2616 */
2617 return amdgpu_dc > 0;
2618 case CHIP_HAWAII:
4562236b
HW
2619 case CHIP_CARRIZO:
2620 case CHIP_STONEY:
4562236b 2621 case CHIP_POLARIS10:
675fd32b 2622 case CHIP_POLARIS11:
2c8ad2d5 2623 case CHIP_POLARIS12:
675fd32b 2624 case CHIP_VEGAM:
4562236b
HW
2625 case CHIP_TONGA:
2626 case CHIP_FIJI:
42f8ffa1 2627 case CHIP_VEGA10:
dca7b401 2628 case CHIP_VEGA12:
c6034aa2 2629 case CHIP_VEGA20:
b86a1aa3 2630#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2631 case CHIP_RAVEN:
b4f199c7 2632 case CHIP_NAVI10:
8fceceb6 2633 case CHIP_NAVI14:
078655d9 2634 case CHIP_NAVI12:
e1c14c43 2635 case CHIP_RENOIR:
42f8ffa1 2636#endif
fd187853 2637 return amdgpu_dc != 0;
4562236b
HW
2638#endif
2639 default:
93b09a9a
SS
2640 if (amdgpu_dc > 0)
2641 DRM_INFO("Display Core has been requested via kernel parameter "
2642 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2643 return false;
2644 }
2645}
2646
2647/**
2648 * amdgpu_device_has_dc_support - check if dc is supported
2649 *
2650 * @adev: amdgpu_device_pointer
2651 *
2652 * Returns true for supported, false for not supported
2653 */
2654bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2655{
2555039d
XY
2656 if (amdgpu_sriov_vf(adev))
2657 return false;
2658
4562236b
HW
2659 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2660}
2661
d4535e2c
AG
2662
2663static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2664{
2665 struct amdgpu_device *adev =
2666 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2667
ce316fa5
LM
2668 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
2669 adev->asic_reset_res = (adev->in_baco == false) ?
2670 amdgpu_device_baco_enter(adev->ddev) :
2671 amdgpu_device_baco_exit(adev->ddev);
2672 else
2673 adev->asic_reset_res = amdgpu_asic_reset(adev);
2674
d4535e2c 2675 if (adev->asic_reset_res)
fed184e9 2676 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2677 adev->asic_reset_res, adev->ddev->unique);
2678}
2679
71f98027
AD
2680static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2681{
2682 char *input = amdgpu_lockup_timeout;
2683 char *timeout_setting = NULL;
2684 int index = 0;
2685 long timeout;
2686 int ret = 0;
2687
2688 /*
2689 * By default timeout for non compute jobs is 10000.
2690 * And there is no timeout enforced on compute jobs.
2691 * In SR-IOV or passthrough mode, timeout for compute
2692 * jobs are 10000 by default.
2693 */
2694 adev->gfx_timeout = msecs_to_jiffies(10000);
2695 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2696 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2697 adev->compute_timeout = adev->gfx_timeout;
2698 else
2699 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2700
f440ff44 2701 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2702 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2703 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2704 ret = kstrtol(timeout_setting, 0, &timeout);
2705 if (ret)
2706 return ret;
2707
2708 if (timeout == 0) {
2709 index++;
2710 continue;
2711 } else if (timeout < 0) {
2712 timeout = MAX_SCHEDULE_TIMEOUT;
2713 } else {
2714 timeout = msecs_to_jiffies(timeout);
2715 }
2716
2717 switch (index++) {
2718 case 0:
2719 adev->gfx_timeout = timeout;
2720 break;
2721 case 1:
2722 adev->compute_timeout = timeout;
2723 break;
2724 case 2:
2725 adev->sdma_timeout = timeout;
2726 break;
2727 case 3:
2728 adev->video_timeout = timeout;
2729 break;
2730 default:
2731 break;
2732 }
2733 }
2734 /*
2735 * There is only one value specified and
2736 * it should apply to all non-compute jobs.
2737 */
bcccee89 2738 if (index == 1) {
71f98027 2739 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2740 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2741 adev->compute_timeout = adev->gfx_timeout;
2742 }
71f98027
AD
2743 }
2744
2745 return ret;
2746}
d4535e2c 2747
d38ceaf9
AD
2748/**
2749 * amdgpu_device_init - initialize the driver
2750 *
2751 * @adev: amdgpu_device pointer
87e3f136 2752 * @ddev: drm dev pointer
d38ceaf9
AD
2753 * @pdev: pci dev pointer
2754 * @flags: driver flags
2755 *
2756 * Initializes the driver info and hw (all asics).
2757 * Returns 0 for success or an error on failure.
2758 * Called at driver startup.
2759 */
2760int amdgpu_device_init(struct amdgpu_device *adev,
2761 struct drm_device *ddev,
2762 struct pci_dev *pdev,
2763 uint32_t flags)
2764{
2765 int r, i;
3840c5bc 2766 bool boco = false;
95844d20 2767 u32 max_MBps;
d38ceaf9
AD
2768
2769 adev->shutdown = false;
2770 adev->dev = &pdev->dev;
2771 adev->ddev = ddev;
2772 adev->pdev = pdev;
2773 adev->flags = flags;
4e66d7d2
YZ
2774
2775 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2776 adev->asic_type = amdgpu_force_asic_type;
2777 else
2778 adev->asic_type = flags & AMD_ASIC_MASK;
2779
d38ceaf9 2780 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2781 if (amdgpu_emu_mode == 1)
2782 adev->usec_timeout *= 2;
770d13b1 2783 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2784 adev->accel_working = false;
2785 adev->num_rings = 0;
2786 adev->mman.buffer_funcs = NULL;
2787 adev->mman.buffer_funcs_ring = NULL;
2788 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2789 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2790 adev->gmc.gmc_funcs = NULL;
f54d1867 2791 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2792 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2793
2794 adev->smc_rreg = &amdgpu_invalid_rreg;
2795 adev->smc_wreg = &amdgpu_invalid_wreg;
2796 adev->pcie_rreg = &amdgpu_invalid_rreg;
2797 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2798 adev->pciep_rreg = &amdgpu_invalid_rreg;
2799 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2800 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2801 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2802 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2803 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2804 adev->didt_rreg = &amdgpu_invalid_rreg;
2805 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2806 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2807 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2808 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2809 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2810
3e39ab90
AD
2811 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2812 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2813 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2814
2815 /* mutex initialization are all done here so we
2816 * can recall function without having locking issues */
d38ceaf9 2817 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2818 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2819 mutex_init(&adev->pm.mutex);
2820 mutex_init(&adev->gfx.gpu_clock_mutex);
2821 mutex_init(&adev->srbm_mutex);
b8866c26 2822 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2823 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2824 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2825 mutex_init(&adev->mn_lock);
e23b74aa 2826 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2827 hash_init(adev->mn_hash);
13a752e3 2828 mutex_init(&adev->lock_reset);
32eaeae0 2829 mutex_init(&adev->psp.mutex);
d38ceaf9 2830
912dfc84
EQ
2831 r = amdgpu_device_check_arguments(adev);
2832 if (r)
2833 return r;
d38ceaf9 2834
d38ceaf9
AD
2835 spin_lock_init(&adev->mmio_idx_lock);
2836 spin_lock_init(&adev->smc_idx_lock);
2837 spin_lock_init(&adev->pcie_idx_lock);
2838 spin_lock_init(&adev->uvd_ctx_idx_lock);
2839 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2840 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2841 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2842 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2843 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2844
0c4e7fa5
CZ
2845 INIT_LIST_HEAD(&adev->shadow_list);
2846 mutex_init(&adev->shadow_list_lock);
2847
795f2813
AR
2848 INIT_LIST_HEAD(&adev->ring_lru_list);
2849 spin_lock_init(&adev->ring_lru_list_lock);
2850
beff74bc
AD
2851 INIT_DELAYED_WORK(&adev->delayed_init_work,
2852 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2853 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2854 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2855
d4535e2c
AG
2856 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2857
d23ee13f 2858 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2859 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2860
0fa49558
AX
2861 /* Registers mapping */
2862 /* TODO: block userspace mapping of io register */
da69c161
KW
2863 if (adev->asic_type >= CHIP_BONAIRE) {
2864 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2865 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2866 } else {
2867 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2868 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2869 }
d38ceaf9 2870
d38ceaf9
AD
2871 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2872 if (adev->rmmio == NULL) {
2873 return -ENOMEM;
2874 }
2875 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2876 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2877
d38ceaf9
AD
2878 /* io port mapping */
2879 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2880 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2881 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2882 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2883 break;
2884 }
2885 }
2886 if (adev->rio_mem == NULL)
b64a18c5 2887 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2888
b2109d8e
JX
2889 /* enable PCIE atomic ops */
2890 r = pci_enable_atomic_ops_to_root(adev->pdev,
2891 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2892 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2893 if (r) {
2894 adev->have_atomics_support = false;
2895 DRM_INFO("PCIE atomic ops is not supported\n");
2896 } else {
2897 adev->have_atomics_support = true;
2898 }
2899
5494d864
AD
2900 amdgpu_device_get_pcie_info(adev);
2901
b239c017
JX
2902 if (amdgpu_mcbp)
2903 DRM_INFO("MCBP is enabled\n");
2904
5f84cc63
JX
2905 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2906 adev->enable_mes = true;
2907
f54eeab4 2908 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2909 r = amdgpu_discovery_init(adev);
2910 if (r) {
2911 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2912 return r;
2913 }
2914 }
2915
d38ceaf9 2916 /* early init functions */
06ec9070 2917 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2918 if (r)
2919 return r;
2920
df99ac0f
JZ
2921 r = amdgpu_device_get_job_timeout_settings(adev);
2922 if (r) {
2923 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
2924 return r;
2925 }
2926
6585661d
OZ
2927 /* doorbell bar mapping and doorbell index init*/
2928 amdgpu_device_doorbell_init(adev);
2929
d38ceaf9
AD
2930 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2931 /* this will fail for cards that aren't VGA class devices, just
2932 * ignore it */
06ec9070 2933 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2934
31af062a 2935 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
2936 boco = true;
2937 if (amdgpu_has_atpx() &&
2938 (amdgpu_is_atpx_hybrid() ||
2939 amdgpu_has_atpx_dgpu_power_cntl()) &&
2940 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 2941 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
2942 &amdgpu_switcheroo_ops, boco);
2943 if (boco)
d38ceaf9
AD
2944 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2945
9475a943
SL
2946 if (amdgpu_emu_mode == 1) {
2947 /* post the asic on emulation mode */
2948 emu_soc_asic_init(adev);
bfca0289 2949 goto fence_driver_init;
9475a943 2950 }
bfca0289 2951
4e99a44e
ML
2952 /* detect if we are with an SRIOV vbios */
2953 amdgpu_device_detect_sriov_bios(adev);
048765ad 2954
95e8e59e
AD
2955 /* check if we need to reset the asic
2956 * E.g., driver was not cleanly unloaded previously, etc.
2957 */
f14899fd 2958 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2959 r = amdgpu_asic_reset(adev);
2960 if (r) {
2961 dev_err(adev->dev, "asic reset on init failed\n");
2962 goto failed;
2963 }
2964 }
2965
d38ceaf9 2966 /* Post card if necessary */
39c640c0 2967 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2968 if (!adev->bios) {
bec86378 2969 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2970 r = -EINVAL;
2971 goto failed;
d38ceaf9 2972 }
bec86378 2973 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2974 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2975 if (r) {
2976 dev_err(adev->dev, "gpu post error!\n");
2977 goto failed;
2978 }
d38ceaf9
AD
2979 }
2980
88b64e95
AD
2981 if (adev->is_atom_fw) {
2982 /* Initialize clocks */
2983 r = amdgpu_atomfirmware_get_clock_info(adev);
2984 if (r) {
2985 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2986 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2987 goto failed;
2988 }
2989 } else {
a5bde2f9
AD
2990 /* Initialize clocks */
2991 r = amdgpu_atombios_get_clock_info(adev);
2992 if (r) {
2993 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2994 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2995 goto failed;
a5bde2f9
AD
2996 }
2997 /* init i2c buses */
4562236b
HW
2998 if (!amdgpu_device_has_dc_support(adev))
2999 amdgpu_atombios_i2c_init(adev);
2c1a2784 3000 }
d38ceaf9 3001
bfca0289 3002fence_driver_init:
d38ceaf9
AD
3003 /* Fence driver */
3004 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3005 if (r) {
3006 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3007 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3008 goto failed;
2c1a2784 3009 }
d38ceaf9
AD
3010
3011 /* init the mode config */
3012 drm_mode_config_init(adev->ddev);
3013
06ec9070 3014 r = amdgpu_device_ip_init(adev);
d38ceaf9 3015 if (r) {
8840a387 3016 /* failed in exclusive mode due to timeout */
3017 if (amdgpu_sriov_vf(adev) &&
3018 !amdgpu_sriov_runtime(adev) &&
3019 amdgpu_virt_mmio_blocked(adev) &&
3020 !amdgpu_virt_wait_reset(adev)) {
3021 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3022 /* Don't send request since VF is inactive. */
3023 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3024 adev->virt.ops = NULL;
8840a387 3025 r = -EAGAIN;
3026 goto failed;
3027 }
06ec9070 3028 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3029 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3030 goto failed;
d38ceaf9
AD
3031 }
3032
d7f72fe4
YZ
3033 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3034 adev->gfx.config.max_shader_engines,
3035 adev->gfx.config.max_sh_per_se,
3036 adev->gfx.config.max_cu_per_sh,
3037 adev->gfx.cu_info.number);
3038
f880799d
ND
3039 amdgpu_ctx_init_sched(adev);
3040
d38ceaf9
AD
3041 adev->accel_working = true;
3042
e59c0205
AX
3043 amdgpu_vm_check_compute_bug(adev);
3044
95844d20
MO
3045 /* Initialize the buffer migration limit. */
3046 if (amdgpu_moverate >= 0)
3047 max_MBps = amdgpu_moverate;
3048 else
3049 max_MBps = 8; /* Allow 8 MB/s. */
3050 /* Get a log2 for easy divisions. */
3051 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3052
9bc92b9c
ML
3053 amdgpu_fbdev_init(adev);
3054
d2f52ac8 3055 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3056 if (r) {
3057 adev->pm_sysfs_en = false;
d2f52ac8 3058 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3059 } else
3060 adev->pm_sysfs_en = true;
d2f52ac8 3061
5bb23532 3062 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3063 if (r) {
3064 adev->ucode_sysfs_en = false;
5bb23532 3065 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3066 } else
3067 adev->ucode_sysfs_en = true;
5bb23532 3068
75758255 3069 r = amdgpu_debugfs_gem_init(adev);
3f14e623 3070 if (r)
d38ceaf9 3071 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
3072
3073 r = amdgpu_debugfs_regs_init(adev);
3f14e623 3074 if (r)
d38ceaf9 3075 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 3076
50ab2533 3077 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 3078 if (r)
50ab2533 3079 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 3080
763efb6c 3081 r = amdgpu_debugfs_init(adev);
db95e218 3082 if (r)
763efb6c 3083 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 3084
d38ceaf9
AD
3085 if ((amdgpu_testing & 1)) {
3086 if (adev->accel_working)
3087 amdgpu_test_moves(adev);
3088 else
3089 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3090 }
d38ceaf9
AD
3091 if (amdgpu_benchmarking) {
3092 if (adev->accel_working)
3093 amdgpu_benchmark(adev, amdgpu_benchmarking);
3094 else
3095 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3096 }
3097
b0adca4d
EQ
3098 /*
3099 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3100 * Otherwise the mgpu fan boost feature will be skipped due to the
3101 * gpu instance is counted less.
3102 */
3103 amdgpu_register_gpu_instance(adev);
3104
d38ceaf9
AD
3105 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3106 * explicit gating rather than handling it automatically.
3107 */
06ec9070 3108 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3109 if (r) {
06ec9070 3110 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3111 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3112 goto failed;
2c1a2784 3113 }
d38ceaf9 3114
108c6a63 3115 /* must succeed. */
511fdbc3 3116 amdgpu_ras_resume(adev);
108c6a63 3117
beff74bc
AD
3118 queue_delayed_work(system_wq, &adev->delayed_init_work,
3119 msecs_to_jiffies(AMDGPU_RESUME_MS));
3120
dcea6e65
KR
3121 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3122 if (r) {
3123 dev_err(adev->dev, "Could not create pcie_replay_count");
3124 return r;
3125 }
108c6a63 3126
d155bef0
AB
3127 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3128 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3129 if (r)
3130 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3131
d38ceaf9 3132 return 0;
83ba126a
AD
3133
3134failed:
89041940 3135 amdgpu_vf_error_trans_all(adev);
3840c5bc 3136 if (boco)
83ba126a 3137 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3138
83ba126a 3139 return r;
d38ceaf9
AD
3140}
3141
d38ceaf9
AD
3142/**
3143 * amdgpu_device_fini - tear down the driver
3144 *
3145 * @adev: amdgpu_device pointer
3146 *
3147 * Tear down the driver info (all asics).
3148 * Called at driver shutdown.
3149 */
3150void amdgpu_device_fini(struct amdgpu_device *adev)
3151{
3152 int r;
3153
3154 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3155 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3156 adev->shutdown = true;
9f875167 3157
e5b03032
ML
3158 /* disable all interrupts */
3159 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3160 if (adev->mode_info.mode_config_initialized){
3161 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3162 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3163 else
3164 drm_atomic_helper_shutdown(adev->ddev);
3165 }
d38ceaf9 3166 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3167 if (adev->pm_sysfs_en)
3168 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3169 amdgpu_fbdev_fini(adev);
06ec9070 3170 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3171 if (adev->firmware.gpu_info_fw) {
3172 release_firmware(adev->firmware.gpu_info_fw);
3173 adev->firmware.gpu_info_fw = NULL;
3174 }
d38ceaf9
AD
3175 adev->accel_working = false;
3176 /* free i2c buses */
4562236b
HW
3177 if (!amdgpu_device_has_dc_support(adev))
3178 amdgpu_i2c_fini(adev);
bfca0289
SL
3179
3180 if (amdgpu_emu_mode != 1)
3181 amdgpu_atombios_fini(adev);
3182
d38ceaf9
AD
3183 kfree(adev->bios);
3184 adev->bios = NULL;
3840c5bc
AD
3185 if (amdgpu_has_atpx() &&
3186 (amdgpu_is_atpx_hybrid() ||
3187 amdgpu_has_atpx_dgpu_power_cntl()) &&
3188 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3189 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3190 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3191 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3192 vga_client_register(adev->pdev, NULL, NULL, NULL);
3193 if (adev->rio_mem)
3194 pci_iounmap(adev->pdev, adev->rio_mem);
3195 adev->rio_mem = NULL;
3196 iounmap(adev->rmmio);
3197 adev->rmmio = NULL;
06ec9070 3198 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3199
d38ceaf9 3200 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3201 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3202 if (adev->ucode_sysfs_en)
3203 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3204 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3205 amdgpu_pmu_fini(adev);
6698a3d0 3206 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3207 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3208 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3209}
3210
3211
3212/*
3213 * Suspend & resume.
3214 */
3215/**
810ddc3a 3216 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3217 *
87e3f136
DP
3218 * @dev: drm dev pointer
3219 * @suspend: suspend state
3220 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3221 *
3222 * Puts the hw in the suspend state (all asics).
3223 * Returns 0 for success or an error on failure.
3224 * Called at driver suspend.
3225 */
de185019 3226int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3227{
3228 struct amdgpu_device *adev;
3229 struct drm_crtc *crtc;
3230 struct drm_connector *connector;
f8d2d39e 3231 struct drm_connector_list_iter iter;
5ceb54c6 3232 int r;
d38ceaf9
AD
3233
3234 if (dev == NULL || dev->dev_private == NULL) {
3235 return -ENODEV;
3236 }
3237
3238 adev = dev->dev_private;
3239
3240 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3241 return 0;
3242
44779b43 3243 adev->in_suspend = true;
d38ceaf9
AD
3244 drm_kms_helper_poll_disable(dev);
3245
5f818173
S
3246 if (fbcon)
3247 amdgpu_fbdev_set_suspend(adev, 1);
3248
beff74bc 3249 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3250
4562236b
HW
3251 if (!amdgpu_device_has_dc_support(adev)) {
3252 /* turn off display hw */
3253 drm_modeset_lock_all(dev);
f8d2d39e
LP
3254 drm_connector_list_iter_begin(dev, &iter);
3255 drm_for_each_connector_iter(connector, &iter)
3256 drm_helper_connector_dpms(connector,
3257 DRM_MODE_DPMS_OFF);
3258 drm_connector_list_iter_end(&iter);
4562236b 3259 drm_modeset_unlock_all(dev);
fe1053b7
AD
3260 /* unpin the front buffers and cursors */
3261 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3262 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3263 struct drm_framebuffer *fb = crtc->primary->fb;
3264 struct amdgpu_bo *robj;
3265
91334223 3266 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3267 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3268 r = amdgpu_bo_reserve(aobj, true);
3269 if (r == 0) {
3270 amdgpu_bo_unpin(aobj);
3271 amdgpu_bo_unreserve(aobj);
3272 }
756e6880 3273 }
756e6880 3274
fe1053b7
AD
3275 if (fb == NULL || fb->obj[0] == NULL) {
3276 continue;
3277 }
3278 robj = gem_to_amdgpu_bo(fb->obj[0]);
3279 /* don't unpin kernel fb objects */
3280 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3281 r = amdgpu_bo_reserve(robj, true);
3282 if (r == 0) {
3283 amdgpu_bo_unpin(robj);
3284 amdgpu_bo_unreserve(robj);
3285 }
d38ceaf9
AD
3286 }
3287 }
3288 }
fe1053b7
AD
3289
3290 amdgpu_amdkfd_suspend(adev);
3291
5e6932fe 3292 amdgpu_ras_suspend(adev);
3293
fe1053b7
AD
3294 r = amdgpu_device_ip_suspend_phase1(adev);
3295
d38ceaf9
AD
3296 /* evict vram memory */
3297 amdgpu_bo_evict_vram(adev);
3298
5ceb54c6 3299 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3300
fe1053b7 3301 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3302
a0a71e49
AD
3303 /* evict remaining vram memory
3304 * This second call to evict vram is to evict the gart page table
3305 * using the CPU.
3306 */
d38ceaf9
AD
3307 amdgpu_bo_evict_vram(adev);
3308
d38ceaf9
AD
3309 return 0;
3310}
3311
3312/**
810ddc3a 3313 * amdgpu_device_resume - initiate device resume
d38ceaf9 3314 *
87e3f136
DP
3315 * @dev: drm dev pointer
3316 * @resume: resume state
3317 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3318 *
3319 * Bring the hw back to operating state (all asics).
3320 * Returns 0 for success or an error on failure.
3321 * Called at driver resume.
3322 */
de185019 3323int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3324{
3325 struct drm_connector *connector;
f8d2d39e 3326 struct drm_connector_list_iter iter;
d38ceaf9 3327 struct amdgpu_device *adev = dev->dev_private;
756e6880 3328 struct drm_crtc *crtc;
03161a6e 3329 int r = 0;
d38ceaf9
AD
3330
3331 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3332 return 0;
3333
d38ceaf9 3334 /* post card */
39c640c0 3335 if (amdgpu_device_need_post(adev)) {
74b0b157 3336 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3337 if (r)
3338 DRM_ERROR("amdgpu asic init failed\n");
3339 }
d38ceaf9 3340
06ec9070 3341 r = amdgpu_device_ip_resume(adev);
e6707218 3342 if (r) {
06ec9070 3343 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3344 return r;
e6707218 3345 }
5ceb54c6
AD
3346 amdgpu_fence_driver_resume(adev);
3347
d38ceaf9 3348
06ec9070 3349 r = amdgpu_device_ip_late_init(adev);
03161a6e 3350 if (r)
4d3b9ae5 3351 return r;
d38ceaf9 3352
beff74bc
AD
3353 queue_delayed_work(system_wq, &adev->delayed_init_work,
3354 msecs_to_jiffies(AMDGPU_RESUME_MS));
3355
fe1053b7
AD
3356 if (!amdgpu_device_has_dc_support(adev)) {
3357 /* pin cursors */
3358 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3359 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3360
91334223 3361 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3362 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3363 r = amdgpu_bo_reserve(aobj, true);
3364 if (r == 0) {
3365 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3366 if (r != 0)
3367 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3368 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3369 amdgpu_bo_unreserve(aobj);
3370 }
756e6880
AD
3371 }
3372 }
3373 }
ba997709
YZ
3374 r = amdgpu_amdkfd_resume(adev);
3375 if (r)
3376 return r;
756e6880 3377
96a5d8d4 3378 /* Make sure IB tests flushed */
beff74bc 3379 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3380
d38ceaf9
AD
3381 /* blat the mode back in */
3382 if (fbcon) {
4562236b
HW
3383 if (!amdgpu_device_has_dc_support(adev)) {
3384 /* pre DCE11 */
3385 drm_helper_resume_force_mode(dev);
3386
3387 /* turn on display hw */
3388 drm_modeset_lock_all(dev);
f8d2d39e
LP
3389
3390 drm_connector_list_iter_begin(dev, &iter);
3391 drm_for_each_connector_iter(connector, &iter)
3392 drm_helper_connector_dpms(connector,
3393 DRM_MODE_DPMS_ON);
3394 drm_connector_list_iter_end(&iter);
3395
4562236b 3396 drm_modeset_unlock_all(dev);
d38ceaf9 3397 }
4d3b9ae5 3398 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3399 }
3400
3401 drm_kms_helper_poll_enable(dev);
23a1a9e5 3402
5e6932fe 3403 amdgpu_ras_resume(adev);
3404
23a1a9e5
L
3405 /*
3406 * Most of the connector probing functions try to acquire runtime pm
3407 * refs to ensure that the GPU is powered on when connector polling is
3408 * performed. Since we're calling this from a runtime PM callback,
3409 * trying to acquire rpm refs will cause us to deadlock.
3410 *
3411 * Since we're guaranteed to be holding the rpm lock, it's safe to
3412 * temporarily disable the rpm helpers so this doesn't deadlock us.
3413 */
3414#ifdef CONFIG_PM
3415 dev->dev->power.disable_depth++;
3416#endif
4562236b
HW
3417 if (!amdgpu_device_has_dc_support(adev))
3418 drm_helper_hpd_irq_event(dev);
3419 else
3420 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3421#ifdef CONFIG_PM
3422 dev->dev->power.disable_depth--;
3423#endif
44779b43
RZ
3424 adev->in_suspend = false;
3425
4d3b9ae5 3426 return 0;
d38ceaf9
AD
3427}
3428
e3ecdffa
AD
3429/**
3430 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3431 *
3432 * @adev: amdgpu_device pointer
3433 *
3434 * The list of all the hardware IPs that make up the asic is walked and
3435 * the check_soft_reset callbacks are run. check_soft_reset determines
3436 * if the asic is still hung or not.
3437 * Returns true if any of the IPs are still in a hung state, false if not.
3438 */
06ec9070 3439static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3440{
3441 int i;
3442 bool asic_hang = false;
3443
f993d628
ML
3444 if (amdgpu_sriov_vf(adev))
3445 return true;
3446
8bc04c29
AD
3447 if (amdgpu_asic_need_full_reset(adev))
3448 return true;
3449
63fbf42f 3450 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3451 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3452 continue;
a1255107
AD
3453 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3454 adev->ip_blocks[i].status.hang =
3455 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3456 if (adev->ip_blocks[i].status.hang) {
3457 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3458 asic_hang = true;
3459 }
3460 }
3461 return asic_hang;
3462}
3463
e3ecdffa
AD
3464/**
3465 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3466 *
3467 * @adev: amdgpu_device pointer
3468 *
3469 * The list of all the hardware IPs that make up the asic is walked and the
3470 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3471 * handles any IP specific hardware or software state changes that are
3472 * necessary for a soft reset to succeed.
3473 * Returns 0 on success, negative error code on failure.
3474 */
06ec9070 3475static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3476{
3477 int i, r = 0;
3478
3479 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3480 if (!adev->ip_blocks[i].status.valid)
d31a501e 3481 continue;
a1255107
AD
3482 if (adev->ip_blocks[i].status.hang &&
3483 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3484 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3485 if (r)
3486 return r;
3487 }
3488 }
3489
3490 return 0;
3491}
3492
e3ecdffa
AD
3493/**
3494 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3495 *
3496 * @adev: amdgpu_device pointer
3497 *
3498 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3499 * reset is necessary to recover.
3500 * Returns true if a full asic reset is required, false if not.
3501 */
06ec9070 3502static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3503{
da146d3b
AD
3504 int i;
3505
8bc04c29
AD
3506 if (amdgpu_asic_need_full_reset(adev))
3507 return true;
3508
da146d3b 3509 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3510 if (!adev->ip_blocks[i].status.valid)
da146d3b 3511 continue;
a1255107
AD
3512 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3513 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3514 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3515 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3516 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3517 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3518 DRM_INFO("Some block need full reset!\n");
3519 return true;
3520 }
3521 }
35d782fe
CZ
3522 }
3523 return false;
3524}
3525
e3ecdffa
AD
3526/**
3527 * amdgpu_device_ip_soft_reset - do a soft reset
3528 *
3529 * @adev: amdgpu_device pointer
3530 *
3531 * The list of all the hardware IPs that make up the asic is walked and the
3532 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3533 * IP specific hardware or software state changes that are necessary to soft
3534 * reset the IP.
3535 * Returns 0 on success, negative error code on failure.
3536 */
06ec9070 3537static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3538{
3539 int i, r = 0;
3540
3541 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3542 if (!adev->ip_blocks[i].status.valid)
35d782fe 3543 continue;
a1255107
AD
3544 if (adev->ip_blocks[i].status.hang &&
3545 adev->ip_blocks[i].version->funcs->soft_reset) {
3546 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3547 if (r)
3548 return r;
3549 }
3550 }
3551
3552 return 0;
3553}
3554
e3ecdffa
AD
3555/**
3556 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3557 *
3558 * @adev: amdgpu_device pointer
3559 *
3560 * The list of all the hardware IPs that make up the asic is walked and the
3561 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3562 * handles any IP specific hardware or software state changes that are
3563 * necessary after the IP has been soft reset.
3564 * Returns 0 on success, negative error code on failure.
3565 */
06ec9070 3566static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3567{
3568 int i, r = 0;
3569
3570 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3571 if (!adev->ip_blocks[i].status.valid)
35d782fe 3572 continue;
a1255107
AD
3573 if (adev->ip_blocks[i].status.hang &&
3574 adev->ip_blocks[i].version->funcs->post_soft_reset)
3575 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3576 if (r)
3577 return r;
3578 }
3579
3580 return 0;
3581}
3582
e3ecdffa 3583/**
c33adbc7 3584 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3585 *
3586 * @adev: amdgpu_device pointer
3587 *
3588 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3589 * restore things like GPUVM page tables after a GPU reset where
3590 * the contents of VRAM might be lost.
403009bf
CK
3591 *
3592 * Returns:
3593 * 0 on success, negative error code on failure.
e3ecdffa 3594 */
c33adbc7 3595static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3596{
c41d1cf6 3597 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3598 struct amdgpu_bo *shadow;
3599 long r = 1, tmo;
c41d1cf6
ML
3600
3601 if (amdgpu_sriov_runtime(adev))
b045d3af 3602 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3603 else
3604 tmo = msecs_to_jiffies(100);
3605
3606 DRM_INFO("recover vram bo from shadow start\n");
3607 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3608 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3609
3610 /* No need to recover an evicted BO */
3611 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3612 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3613 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3614 continue;
3615
3616 r = amdgpu_bo_restore_shadow(shadow, &next);
3617 if (r)
3618 break;
3619
c41d1cf6 3620 if (fence) {
1712fb1a 3621 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3622 dma_fence_put(fence);
3623 fence = next;
1712fb1a 3624 if (tmo == 0) {
3625 r = -ETIMEDOUT;
c41d1cf6 3626 break;
1712fb1a 3627 } else if (tmo < 0) {
3628 r = tmo;
3629 break;
3630 }
403009bf
CK
3631 } else {
3632 fence = next;
c41d1cf6 3633 }
c41d1cf6
ML
3634 }
3635 mutex_unlock(&adev->shadow_list_lock);
3636
403009bf
CK
3637 if (fence)
3638 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3639 dma_fence_put(fence);
3640
1712fb1a 3641 if (r < 0 || tmo <= 0) {
3642 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3643 return -EIO;
3644 }
c41d1cf6 3645
403009bf
CK
3646 DRM_INFO("recover vram bo from shadow done\n");
3647 return 0;
c41d1cf6
ML
3648}
3649
a90ad3c2 3650
e3ecdffa 3651/**
06ec9070 3652 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3653 *
3654 * @adev: amdgpu device pointer
87e3f136 3655 * @from_hypervisor: request from hypervisor
5740682e
ML
3656 *
3657 * do VF FLR and reinitialize Asic
3f48c681 3658 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3659 */
3660static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3661 bool from_hypervisor)
5740682e
ML
3662{
3663 int r;
3664
3665 if (from_hypervisor)
3666 r = amdgpu_virt_request_full_gpu(adev, true);
3667 else
3668 r = amdgpu_virt_reset_gpu(adev);
3669 if (r)
3670 return r;
a90ad3c2
ML
3671
3672 /* Resume IP prior to SMC */
06ec9070 3673 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3674 if (r)
3675 goto error;
a90ad3c2 3676
c9ffa427 3677 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3678 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3679 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3680
7a3e0bb2
RZ
3681 r = amdgpu_device_fw_loading(adev);
3682 if (r)
3683 return r;
3684
a90ad3c2 3685 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3686 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3687 if (r)
3688 goto error;
a90ad3c2
ML
3689
3690 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3691 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3692 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3693
abc34253
ED
3694error:
3695 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3696 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3697 amdgpu_inc_vram_lost(adev);
c33adbc7 3698 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3699 }
3700
3701 return r;
3702}
3703
12938fad
CK
3704/**
3705 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3706 *
3707 * @adev: amdgpu device pointer
3708 *
3709 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3710 * a hung GPU.
3711 */
3712bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3713{
3714 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3715 DRM_INFO("Timeout, but no hardware hang detected.\n");
3716 return false;
3717 }
3718
3ba7b418
AG
3719 if (amdgpu_gpu_recovery == 0)
3720 goto disabled;
3721
3722 if (amdgpu_sriov_vf(adev))
3723 return true;
3724
3725 if (amdgpu_gpu_recovery == -1) {
3726 switch (adev->asic_type) {
fc42d47c
AG
3727 case CHIP_BONAIRE:
3728 case CHIP_HAWAII:
3ba7b418
AG
3729 case CHIP_TOPAZ:
3730 case CHIP_TONGA:
3731 case CHIP_FIJI:
3732 case CHIP_POLARIS10:
3733 case CHIP_POLARIS11:
3734 case CHIP_POLARIS12:
3735 case CHIP_VEGAM:
3736 case CHIP_VEGA20:
3737 case CHIP_VEGA10:
3738 case CHIP_VEGA12:
c43b849f 3739 case CHIP_RAVEN:
3ba7b418
AG
3740 break;
3741 default:
3742 goto disabled;
3743 }
12938fad
CK
3744 }
3745
3746 return true;
3ba7b418
AG
3747
3748disabled:
3749 DRM_INFO("GPU recovery disabled.\n");
3750 return false;
12938fad
CK
3751}
3752
5c6dd71e 3753
26bc5340
AG
3754static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3755 struct amdgpu_job *job,
3756 bool *need_full_reset_arg)
3757{
3758 int i, r = 0;
3759 bool need_full_reset = *need_full_reset_arg;
71182665 3760
71182665 3761 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3762 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3763 struct amdgpu_ring *ring = adev->rings[i];
3764
51687759 3765 if (!ring || !ring->sched.thread)
0875dc9e 3766 continue;
5740682e 3767
2f9d4084
ML
3768 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3769 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3770 }
d38ceaf9 3771
222b5f04
AG
3772 if(job)
3773 drm_sched_increase_karma(&job->base);
3774
1d721ed6 3775 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3776 if (!amdgpu_sriov_vf(adev)) {
3777
3778 if (!need_full_reset)
3779 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3780
3781 if (!need_full_reset) {
3782 amdgpu_device_ip_pre_soft_reset(adev);
3783 r = amdgpu_device_ip_soft_reset(adev);
3784 amdgpu_device_ip_post_soft_reset(adev);
3785 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3786 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3787 need_full_reset = true;
3788 }
3789 }
3790
3791 if (need_full_reset)
3792 r = amdgpu_device_ip_suspend(adev);
3793
3794 *need_full_reset_arg = need_full_reset;
3795 }
3796
3797 return r;
3798}
3799
ce316fa5
LM
3800static int amdgpu_do_asic_reset(struct amdgpu_device *adev,
3801 struct amdgpu_hive_info *hive,
26bc5340
AG
3802 struct list_head *device_list_handle,
3803 bool *need_full_reset_arg)
3804{
3805 struct amdgpu_device *tmp_adev = NULL;
3806 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3807 int r = 0;
ce316fa5
LM
3808 int cpu = smp_processor_id();
3809 bool use_baco =
3810 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
3811 true : false;
26bc5340
AG
3812
3813 /*
3814 * ASIC reset has to be done on all HGMI hive nodes ASAP
3815 * to allow proper links negotiation in FW (within 1 sec)
3816 */
3817 if (need_full_reset) {
3818 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
ce316fa5
LM
3819 /*
3820 * For XGMI run all resets in parallel to speed up the
3821 * process by scheduling the highpri wq on different
3822 * cpus. For XGMI with baco reset, all nodes must enter
3823 * baco within close proximity before anyone exit.
3824 */
d4535e2c 3825 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
ce316fa5
LM
3826 if (!queue_work_on(cpu, system_highpri_wq,
3827 &tmp_adev->xgmi_reset_work))
d4535e2c 3828 r = -EALREADY;
ce316fa5 3829 cpu = cpumask_next(cpu, cpu_online_mask);
d4535e2c
AG
3830 } else
3831 r = amdgpu_asic_reset(tmp_adev);
ce316fa5 3832 if (r)
d4535e2c 3833 break;
d4535e2c
AG
3834 }
3835
ce316fa5 3836 /* For XGMI wait for all work to complete before proceed */
d4535e2c
AG
3837 if (!r) {
3838 list_for_each_entry(tmp_adev, device_list_handle,
3839 gmc.xgmi.head) {
3840 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3841 flush_work(&tmp_adev->xgmi_reset_work);
3842 r = tmp_adev->asic_reset_res;
3843 if (r)
3844 break;
ce316fa5
LM
3845 if (use_baco)
3846 tmp_adev->in_baco = true;
d4535e2c
AG
3847 }
3848 }
26bc5340 3849 }
26bc5340 3850
ce316fa5
LM
3851 /*
3852 * For XGMI with baco reset, need exit baco phase by scheduling
3853 * xgmi_reset_work one more time. PSP reset and sGPU skips this
3854 * phase. Not assume the situation that PSP reset and baco reset
3855 * coexist within an XGMI hive.
3856 */
3857
3858 if (!r && use_baco) {
3859 cpu = smp_processor_id();
3860 list_for_each_entry(tmp_adev, device_list_handle,
3861 gmc.xgmi.head) {
3862 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3863 if (!queue_work_on(cpu,
3864 system_highpri_wq,
3865 &tmp_adev->xgmi_reset_work))
3866 r = -EALREADY;
3867 if (r)
3868 break;
3869 cpu = cpumask_next(cpu, cpu_online_mask);
3870 }
3871 }
3872 }
3873
3874 if (!r && use_baco) {
3875 list_for_each_entry(tmp_adev, device_list_handle,
3876 gmc.xgmi.head) {
3877 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3878 flush_work(&tmp_adev->xgmi_reset_work);
3879 r = tmp_adev->asic_reset_res;
3880 if (r)
3881 break;
3882 tmp_adev->in_baco = false;
3883 }
3884 }
3885 }
3886
3887 if (r) {
3888 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3889 r, tmp_adev->ddev->unique);
3890 goto end;
3891 }
3892 }
26bc5340 3893
00eaa571
LM
3894 if (!r && amdgpu_ras_intr_triggered())
3895 amdgpu_ras_intr_cleared();
3896
26bc5340
AG
3897 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3898 if (need_full_reset) {
3899 /* post card */
3900 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3901 DRM_WARN("asic atom init failed!");
3902
3903 if (!r) {
3904 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3905 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3906 if (r)
3907 goto out;
3908
3909 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3910 if (vram_lost) {
77e7f829 3911 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3912 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3913 }
3914
3915 r = amdgpu_gtt_mgr_recover(
3916 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3917 if (r)
3918 goto out;
3919
3920 r = amdgpu_device_fw_loading(tmp_adev);
3921 if (r)
3922 return r;
3923
3924 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3925 if (r)
3926 goto out;
3927
3928 if (vram_lost)
3929 amdgpu_device_fill_reset_magic(tmp_adev);
3930
fdafb359
EQ
3931 /*
3932 * Add this ASIC as tracked as reset was already
3933 * complete successfully.
3934 */
3935 amdgpu_register_gpu_instance(tmp_adev);
3936
7c04ca50 3937 r = amdgpu_device_ip_late_init(tmp_adev);
3938 if (r)
3939 goto out;
3940
e79a04d5 3941 /* must succeed. */
511fdbc3 3942 amdgpu_ras_resume(tmp_adev);
e79a04d5 3943
26bc5340
AG
3944 /* Update PSP FW topology after reset */
3945 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3946 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3947 }
3948 }
3949
3950
3951out:
3952 if (!r) {
3953 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3954 r = amdgpu_ib_ring_tests(tmp_adev);
3955 if (r) {
3956 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3957 r = amdgpu_device_ip_suspend(tmp_adev);
3958 need_full_reset = true;
3959 r = -EAGAIN;
3960 goto end;
3961 }
3962 }
3963
3964 if (!r)
3965 r = amdgpu_device_recover_vram(tmp_adev);
3966 else
3967 tmp_adev->asic_reset_res = r;
3968 }
3969
3970end:
3971 *need_full_reset_arg = need_full_reset;
3972 return r;
3973}
3974
1d721ed6 3975static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3976{
1d721ed6
AG
3977 if (trylock) {
3978 if (!mutex_trylock(&adev->lock_reset))
3979 return false;
3980 } else
3981 mutex_lock(&adev->lock_reset);
5740682e 3982
26bc5340
AG
3983 atomic_inc(&adev->gpu_reset_counter);
3984 adev->in_gpu_reset = 1;
a3a09142
AD
3985 switch (amdgpu_asic_reset_method(adev)) {
3986 case AMD_RESET_METHOD_MODE1:
3987 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3988 break;
3989 case AMD_RESET_METHOD_MODE2:
3990 adev->mp1_state = PP_MP1_STATE_RESET;
3991 break;
3992 default:
3993 adev->mp1_state = PP_MP1_STATE_NONE;
3994 break;
3995 }
1d721ed6
AG
3996
3997 return true;
26bc5340 3998}
d38ceaf9 3999
26bc5340
AG
4000static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4001{
89041940 4002 amdgpu_vf_error_trans_all(adev);
a3a09142 4003 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
4004 adev->in_gpu_reset = 0;
4005 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4006}
4007
26bc5340
AG
4008/**
4009 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4010 *
4011 * @adev: amdgpu device pointer
4012 * @job: which job trigger hang
4013 *
4014 * Attempt to reset the GPU if it has hung (all asics).
4015 * Attempt to do soft-reset or full-reset and reinitialize Asic
4016 * Returns 0 for success or an error on failure.
4017 */
4018
4019int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4020 struct amdgpu_job *job)
4021{
1d721ed6
AG
4022 struct list_head device_list, *device_list_handle = NULL;
4023 bool need_full_reset, job_signaled;
26bc5340 4024 struct amdgpu_hive_info *hive = NULL;
26bc5340 4025 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4026 int i, r = 0;
7c6e68c7 4027 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4028 bool use_baco =
4029 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4030 true : false;
26bc5340 4031
d5ea093e
AG
4032 /*
4033 * Flush RAM to disk so that after reboot
4034 * the user can read log and see why the system rebooted.
4035 */
b823821f 4036 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4037
4038 DRM_WARN("Emergency reboot.");
4039
4040 ksys_sync_helper();
4041 emergency_restart();
4042 }
4043
1d721ed6 4044 need_full_reset = job_signaled = false;
26bc5340
AG
4045 INIT_LIST_HEAD(&device_list);
4046
b823821f
LM
4047 dev_info(adev->dev, "GPU %s begin!\n",
4048 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4049
beff74bc 4050 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4051
1d721ed6
AG
4052 hive = amdgpu_get_xgmi_hive(adev, false);
4053
26bc5340 4054 /*
1d721ed6
AG
4055 * Here we trylock to avoid chain of resets executing from
4056 * either trigger by jobs on different adevs in XGMI hive or jobs on
4057 * different schedulers for same device while this TO handler is running.
4058 * We always reset all schedulers for device and all devices for XGMI
4059 * hive so that should take care of them too.
26bc5340 4060 */
1d721ed6
AG
4061
4062 if (hive && !mutex_trylock(&hive->reset_lock)) {
4063 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4064 job ? job->base.id : -1, hive->hive_id);
26bc5340 4065 return 0;
1d721ed6 4066 }
26bc5340
AG
4067
4068 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4069 if (!amdgpu_device_lock_adev(adev, !hive)) {
4070 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4071 job ? job->base.id : -1);
1d721ed6 4072 return 0;
26bc5340
AG
4073 }
4074
7c6e68c7
AG
4075 /* Block kfd: SRIOV would do it separately */
4076 if (!amdgpu_sriov_vf(adev))
4077 amdgpu_amdkfd_pre_reset(adev);
4078
26bc5340 4079 /* Build list of devices to reset */
1d721ed6 4080 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4081 if (!hive) {
7c6e68c7
AG
4082 /*unlock kfd: SRIOV would do it separately */
4083 if (!amdgpu_sriov_vf(adev))
4084 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4085 amdgpu_device_unlock_adev(adev);
4086 return -ENODEV;
4087 }
4088
4089 /*
4090 * In case we are in XGMI hive mode device reset is done for all the
4091 * nodes in the hive to retrain all XGMI links and hence the reset
4092 * sequence is executed in loop on all nodes.
4093 */
4094 device_list_handle = &hive->device_list;
4095 } else {
4096 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4097 device_list_handle = &device_list;
4098 }
4099
1d721ed6
AG
4100 /* block all schedulers and reset given job's ring */
4101 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4102 if (tmp_adev != adev) {
12ffa55d 4103 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4104 if (!amdgpu_sriov_vf(tmp_adev))
4105 amdgpu_amdkfd_pre_reset(tmp_adev);
4106 }
4107
12ffa55d
AG
4108 /*
4109 * Mark these ASICs to be reseted as untracked first
4110 * And add them back after reset completed
4111 */
4112 amdgpu_unregister_gpu_instance(tmp_adev);
4113
f1c1314b 4114 /* disable ras on ALL IPs */
b823821f
LM
4115 if (!(in_ras_intr && !use_baco) &&
4116 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4117 amdgpu_ras_suspend(tmp_adev);
4118
1d721ed6
AG
4119 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4120 struct amdgpu_ring *ring = tmp_adev->rings[i];
4121
4122 if (!ring || !ring->sched.thread)
4123 continue;
4124
0b2d2c2e 4125 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4126
b823821f 4127 if (in_ras_intr && !use_baco)
7c6e68c7 4128 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4129 }
4130 }
4131
4132
b823821f 4133 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4134 goto skip_sched_resume;
4135
1d721ed6
AG
4136 /*
4137 * Must check guilty signal here since after this point all old
4138 * HW fences are force signaled.
4139 *
4140 * job->base holds a reference to parent fence
4141 */
4142 if (job && job->base.s_fence->parent &&
4143 dma_fence_is_signaled(job->base.s_fence->parent))
4144 job_signaled = true;
4145
1d721ed6
AG
4146 if (job_signaled) {
4147 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4148 goto skip_hw_reset;
4149 }
4150
4151
4152 /* Guilty job will be freed after this*/
0b2d2c2e 4153 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4154 if (r) {
4155 /*TODO Should we stop ?*/
4156 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4157 r, adev->ddev->unique);
4158 adev->asic_reset_res = r;
4159 }
4160
26bc5340
AG
4161retry: /* Rest of adevs pre asic reset from XGMI hive. */
4162 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4163
4164 if (tmp_adev == adev)
4165 continue;
4166
26bc5340
AG
4167 r = amdgpu_device_pre_asic_reset(tmp_adev,
4168 NULL,
4169 &need_full_reset);
4170 /*TODO Should we stop ?*/
4171 if (r) {
4172 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4173 r, tmp_adev->ddev->unique);
4174 tmp_adev->asic_reset_res = r;
4175 }
4176 }
4177
4178 /* Actual ASIC resets if needed.*/
4179 /* TODO Implement XGMI hive reset logic for SRIOV */
4180 if (amdgpu_sriov_vf(adev)) {
4181 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4182 if (r)
4183 adev->asic_reset_res = r;
4184 } else {
ce316fa5
LM
4185 r = amdgpu_do_asic_reset(adev, hive, device_list_handle,
4186 &need_full_reset);
26bc5340
AG
4187 if (r && r == -EAGAIN)
4188 goto retry;
4189 }
4190
1d721ed6
AG
4191skip_hw_reset:
4192
26bc5340
AG
4193 /* Post ASIC reset for all devs .*/
4194 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4195
1d721ed6
AG
4196 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4197 struct amdgpu_ring *ring = tmp_adev->rings[i];
4198
4199 if (!ring || !ring->sched.thread)
4200 continue;
4201
4202 /* No point to resubmit jobs if we didn't HW reset*/
4203 if (!tmp_adev->asic_reset_res && !job_signaled)
4204 drm_sched_resubmit_jobs(&ring->sched);
4205
4206 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4207 }
4208
4209 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4210 drm_helper_resume_force_mode(tmp_adev->ddev);
4211 }
4212
4213 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4214
4215 if (r) {
4216 /* bad news, how to tell it to userspace ? */
12ffa55d 4217 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4218 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4219 } else {
12ffa55d 4220 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4221 }
7c6e68c7 4222 }
26bc5340 4223
7c6e68c7
AG
4224skip_sched_resume:
4225 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4226 /*unlock kfd: SRIOV would do it separately */
b823821f 4227 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4228 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4229 amdgpu_device_unlock_adev(tmp_adev);
4230 }
4231
1d721ed6 4232 if (hive)
22d6575b 4233 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4234
4235 if (r)
4236 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4237 return r;
4238}
4239
e3ecdffa
AD
4240/**
4241 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4242 *
4243 * @adev: amdgpu_device pointer
4244 *
4245 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4246 * and lanes) of the slot the device is in. Handles APUs and
4247 * virtualized environments where PCIE config space may not be available.
4248 */
5494d864 4249static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4250{
5d9a6330 4251 struct pci_dev *pdev;
c5313457
HK
4252 enum pci_bus_speed speed_cap, platform_speed_cap;
4253 enum pcie_link_width platform_link_width;
d0dd7f0c 4254
cd474ba0
AD
4255 if (amdgpu_pcie_gen_cap)
4256 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4257
cd474ba0
AD
4258 if (amdgpu_pcie_lane_cap)
4259 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4260
cd474ba0
AD
4261 /* covers APUs as well */
4262 if (pci_is_root_bus(adev->pdev->bus)) {
4263 if (adev->pm.pcie_gen_mask == 0)
4264 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4265 if (adev->pm.pcie_mlw_mask == 0)
4266 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4267 return;
cd474ba0 4268 }
d0dd7f0c 4269
c5313457
HK
4270 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4271 return;
4272
dbaa922b
AD
4273 pcie_bandwidth_available(adev->pdev, NULL,
4274 &platform_speed_cap, &platform_link_width);
c5313457 4275
cd474ba0 4276 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4277 /* asic caps */
4278 pdev = adev->pdev;
4279 speed_cap = pcie_get_speed_cap(pdev);
4280 if (speed_cap == PCI_SPEED_UNKNOWN) {
4281 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4282 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4283 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4284 } else {
5d9a6330
AD
4285 if (speed_cap == PCIE_SPEED_16_0GT)
4286 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4287 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4288 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4289 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4290 else if (speed_cap == PCIE_SPEED_8_0GT)
4291 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4292 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4293 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4294 else if (speed_cap == PCIE_SPEED_5_0GT)
4295 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4296 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4297 else
4298 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4299 }
4300 /* platform caps */
c5313457 4301 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4302 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4303 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4304 } else {
c5313457 4305 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4306 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4307 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4308 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4309 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4310 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4311 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4312 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4313 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4314 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4315 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4316 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4317 else
4318 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4319
cd474ba0
AD
4320 }
4321 }
4322 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4323 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4324 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4325 } else {
c5313457 4326 switch (platform_link_width) {
5d9a6330 4327 case PCIE_LNK_X32:
cd474ba0
AD
4328 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4329 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4330 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4331 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4332 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4333 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4334 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4335 break;
5d9a6330 4336 case PCIE_LNK_X16:
cd474ba0
AD
4337 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4338 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4339 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4340 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4341 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4342 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4343 break;
5d9a6330 4344 case PCIE_LNK_X12:
cd474ba0
AD
4345 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4346 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4347 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4348 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4349 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4350 break;
5d9a6330 4351 case PCIE_LNK_X8:
cd474ba0
AD
4352 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4353 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4354 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4355 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4356 break;
5d9a6330 4357 case PCIE_LNK_X4:
cd474ba0
AD
4358 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4359 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4360 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4361 break;
5d9a6330 4362 case PCIE_LNK_X2:
cd474ba0
AD
4363 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4364 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4365 break;
5d9a6330 4366 case PCIE_LNK_X1:
cd474ba0
AD
4367 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4368 break;
4369 default:
4370 break;
4371 }
d0dd7f0c
AD
4372 }
4373 }
4374}
d38ceaf9 4375
361dbd01
AD
4376int amdgpu_device_baco_enter(struct drm_device *dev)
4377{
4378 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4379 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4380
4381 if (!amdgpu_device_supports_baco(adev->ddev))
4382 return -ENOTSUPP;
4383
7a22677b
LM
4384 if (ras && ras->supported)
4385 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4386
361dbd01
AD
4387 if (is_support_sw_smu(adev)) {
4388 struct smu_context *smu = &adev->smu;
4389 int ret;
4390
4391 ret = smu_baco_enter(smu);
4392 if (ret)
4393 return ret;
361dbd01
AD
4394 } else {
4395 void *pp_handle = adev->powerplay.pp_handle;
4396 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4397
4398 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4399 return -ENOENT;
4400
4401 /* enter BACO state */
4402 if (pp_funcs->set_asic_baco_state(pp_handle, 1))
4403 return -EIO;
361dbd01 4404 }
7a22677b
LM
4405
4406 return 0;
361dbd01
AD
4407}
4408
4409int amdgpu_device_baco_exit(struct drm_device *dev)
4410{
4411 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4412 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4413
4414 if (!amdgpu_device_supports_baco(adev->ddev))
4415 return -ENOTSUPP;
4416
4417 if (is_support_sw_smu(adev)) {
4418 struct smu_context *smu = &adev->smu;
4419 int ret;
4420
4421 ret = smu_baco_exit(smu);
4422 if (ret)
4423 return ret;
4424
361dbd01
AD
4425 } else {
4426 void *pp_handle = adev->powerplay.pp_handle;
4427 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4428
4429 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4430 return -ENOENT;
4431
4432 /* exit BACO state */
4433 if (pp_funcs->set_asic_baco_state(pp_handle, 0))
4434 return -EIO;
361dbd01 4435 }
7a22677b
LM
4436
4437 if (ras && ras->supported)
4438 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4439
4440 return 0;
361dbd01 4441}