drm/amd/powerplay: enable thermal throttling logging support V2
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 83
2dc80b00
S
84#define AMDGPU_RESUME_MS 2000
85
050091ab 86const char *amdgpu_asic_name[] = {
da69c161
KW
87 "TAHITI",
88 "PITCAIRN",
89 "VERDE",
90 "OLAND",
91 "HAINAN",
d38ceaf9
AD
92 "BONAIRE",
93 "KAVERI",
94 "KABINI",
95 "HAWAII",
96 "MULLINS",
97 "TOPAZ",
98 "TONGA",
48299f95 99 "FIJI",
d38ceaf9 100 "CARRIZO",
139f4917 101 "STONEY",
2cc0c0b5
FC
102 "POLARIS10",
103 "POLARIS11",
c4642a47 104 "POLARIS12",
48ff108d 105 "VEGAM",
d4196f01 106 "VEGA10",
8fab806a 107 "VEGA12",
956fcddc 108 "VEGA20",
2ca8a5d2 109 "RAVEN",
d6c3b24e 110 "ARCTURUS",
1eee4228 111 "RENOIR",
852a6626 112 "NAVI10",
87dbad02 113 "NAVI14",
9802f5d7 114 "NAVI12",
d38ceaf9
AD
115 "LAST",
116};
117
dcea6e65
KR
118/**
119 * DOC: pcie_replay_count
120 *
121 * The amdgpu driver provides a sysfs API for reporting the total number
122 * of PCIe replays (NAKs)
123 * The file pcie_replay_count is used for this and returns the total
124 * number of replays as a sum of the NAKs generated and NAKs received
125 */
126
127static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
128 struct device_attribute *attr, char *buf)
129{
130 struct drm_device *ddev = dev_get_drvdata(dev);
131 struct amdgpu_device *adev = ddev->dev_private;
132 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
133
134 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
135}
136
137static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
138 amdgpu_device_get_pcie_replay_count, NULL);
139
5494d864
AD
140static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
141
bd607166
KR
142/**
143 * DOC: product_name
144 *
145 * The amdgpu driver provides a sysfs API for reporting the product name
146 * for the device
147 * The file serial_number is used for this and returns the product name
148 * as returned from the FRU.
149 * NOTE: This is only available for certain server cards
150 */
151
152static ssize_t amdgpu_device_get_product_name(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
156 struct amdgpu_device *adev = ddev->dev_private;
157
158 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
159}
160
161static DEVICE_ATTR(product_name, S_IRUGO,
162 amdgpu_device_get_product_name, NULL);
163
164/**
165 * DOC: product_number
166 *
167 * The amdgpu driver provides a sysfs API for reporting the part number
168 * for the device
169 * The file serial_number is used for this and returns the part number
170 * as returned from the FRU.
171 * NOTE: This is only available for certain server cards
172 */
173
174static ssize_t amdgpu_device_get_product_number(struct device *dev,
175 struct device_attribute *attr, char *buf)
176{
177 struct drm_device *ddev = dev_get_drvdata(dev);
178 struct amdgpu_device *adev = ddev->dev_private;
179
180 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
181}
182
183static DEVICE_ATTR(product_number, S_IRUGO,
184 amdgpu_device_get_product_number, NULL);
185
186/**
187 * DOC: serial_number
188 *
189 * The amdgpu driver provides a sysfs API for reporting the serial number
190 * for the device
191 * The file serial_number is used for this and returns the serial number
192 * as returned from the FRU.
193 * NOTE: This is only available for certain server cards
194 */
195
196static ssize_t amdgpu_device_get_serial_number(struct device *dev,
197 struct device_attribute *attr, char *buf)
198{
199 struct drm_device *ddev = dev_get_drvdata(dev);
200 struct amdgpu_device *adev = ddev->dev_private;
201
202 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
203}
204
205static DEVICE_ATTR(serial_number, S_IRUGO,
206 amdgpu_device_get_serial_number, NULL);
207
e3ecdffa 208/**
31af062a 209 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
210 *
211 * @dev: drm_device pointer
212 *
213 * Returns true if the device is a dGPU with HG/PX power control,
214 * otherwise return false.
215 */
31af062a 216bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
217{
218 struct amdgpu_device *adev = dev->dev_private;
219
2f7d10b3 220 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
221 return true;
222 return false;
223}
224
a69cba42
AD
225/**
226 * amdgpu_device_supports_baco - Does the device support BACO
227 *
228 * @dev: drm_device pointer
229 *
230 * Returns true if the device supporte BACO,
231 * otherwise return false.
232 */
233bool amdgpu_device_supports_baco(struct drm_device *dev)
234{
235 struct amdgpu_device *adev = dev->dev_private;
236
237 return amdgpu_asic_supports_baco(adev);
238}
239
e35e2b11
TY
240/**
241 * VRAM access helper functions.
242 *
243 * amdgpu_device_vram_access - read/write a buffer in vram
244 *
245 * @adev: amdgpu_device pointer
246 * @pos: offset of the buffer in vram
247 * @buf: virtual address of the buffer in system memory
248 * @size: read/write size, sizeof(@buf) must > @size
249 * @write: true - write to vram, otherwise - read from vram
250 */
251void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
252 uint32_t *buf, size_t size, bool write)
253{
e35e2b11 254 unsigned long flags;
ce05ac56
CK
255 uint32_t hi = ~0;
256 uint64_t last;
257
9d11eb0d
CK
258
259#ifdef CONFIG_64BIT
260 last = min(pos + size, adev->gmc.visible_vram_size);
261 if (last > pos) {
262 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
263 size_t count = last - pos;
264
265 if (write) {
266 memcpy_toio(addr, buf, count);
267 mb();
268 amdgpu_asic_flush_hdp(adev, NULL);
269 } else {
270 amdgpu_asic_invalidate_hdp(adev, NULL);
271 mb();
272 memcpy_fromio(buf, addr, count);
273 }
274
275 if (count == size)
276 return;
277
278 pos += count;
279 buf += count / 4;
280 size -= count;
281 }
282#endif
283
ce05ac56
CK
284 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
285 for (last = pos + size; pos < last; pos += 4) {
286 uint32_t tmp = pos >> 31;
e35e2b11 287
e35e2b11 288 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
289 if (tmp != hi) {
290 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
291 hi = tmp;
292 }
e35e2b11
TY
293 if (write)
294 WREG32_NO_KIQ(mmMM_DATA, *buf++);
295 else
296 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 297 }
ce05ac56 298 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
299}
300
d38ceaf9 301/*
2eee0229 302 * device register access helper functions.
d38ceaf9 303 */
e3ecdffa 304/**
2eee0229 305 * amdgpu_device_rreg - read a register
e3ecdffa
AD
306 *
307 * @adev: amdgpu_device pointer
308 * @reg: dword aligned register offset
309 * @acc_flags: access flags which require special behavior
310 *
311 * Returns the 32 bit value from the offset specified.
312 */
2eee0229
HZ
313uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
314 uint32_t acc_flags)
d38ceaf9 315{
f4b373f4
TSD
316 uint32_t ret;
317
f384ff95 318 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 319 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 320
ec59847e 321 if ((reg * 4) < adev->rmmio_size)
f4b373f4 322 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
323 else
324 ret = adev->pcie_rreg(adev, (reg * 4));
325 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
f4b373f4 326 return ret;
d38ceaf9
AD
327}
328
421a2a30
ML
329/*
330 * MMIO register read with bytes helper functions
331 * @offset:bytes offset from MMIO start
332 *
333*/
334
e3ecdffa
AD
335/**
336 * amdgpu_mm_rreg8 - read a memory mapped IO register
337 *
338 * @adev: amdgpu_device pointer
339 * @offset: byte aligned register offset
340 *
341 * Returns the 8 bit value from the offset specified.
342 */
421a2a30
ML
343uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
344 if (offset < adev->rmmio_size)
345 return (readb(adev->rmmio + offset));
346 BUG();
347}
348
349/*
350 * MMIO register write with bytes helper functions
351 * @offset:bytes offset from MMIO start
352 * @value: the value want to be written to the register
353 *
354*/
e3ecdffa
AD
355/**
356 * amdgpu_mm_wreg8 - read a memory mapped IO register
357 *
358 * @adev: amdgpu_device pointer
359 * @offset: byte aligned register offset
360 * @value: 8 bit value to write
361 *
362 * Writes the value specified to the offset specified.
363 */
421a2a30
ML
364void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
365 if (offset < adev->rmmio_size)
366 writeb(value, adev->rmmio + offset);
367 else
368 BUG();
369}
370
2eee0229
HZ
371void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
372 uint32_t v, uint32_t acc_flags)
2e0cc4d4 373{
2eee0229 374 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 375
ec59847e 376 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 377 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
378 else
379 adev->pcie_wreg(adev, (reg * 4), v);
2e0cc4d4
ML
380}
381
e3ecdffa 382/**
2eee0229 383 * amdgpu_device_wreg - write to a register
e3ecdffa
AD
384 *
385 * @adev: amdgpu_device pointer
386 * @reg: dword aligned register offset
387 * @v: 32 bit value to write to the register
388 * @acc_flags: access flags which require special behavior
389 *
390 * Writes the value specified to the offset specified.
391 */
2eee0229
HZ
392void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
393 uint32_t acc_flags)
d38ceaf9 394{
f384ff95 395 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 396 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 397
2eee0229 398 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
2e0cc4d4 399}
d38ceaf9 400
2e0cc4d4
ML
401/*
402 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
403 *
404 * this function is invoked only the debugfs register access
405 * */
406void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
407 uint32_t acc_flags)
408{
409 if (amdgpu_sriov_fullaccess(adev) &&
410 adev->gfx.rlc.funcs &&
411 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 412
2e0cc4d4
ML
413 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
414 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 415 }
2e0cc4d4 416
2eee0229 417 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
d38ceaf9
AD
418}
419
e3ecdffa
AD
420/**
421 * amdgpu_io_rreg - read an IO register
422 *
423 * @adev: amdgpu_device pointer
424 * @reg: dword aligned register offset
425 *
426 * Returns the 32 bit value from the offset specified.
427 */
d38ceaf9
AD
428u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
429{
430 if ((reg * 4) < adev->rio_mem_size)
431 return ioread32(adev->rio_mem + (reg * 4));
432 else {
433 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
434 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
435 }
436}
437
e3ecdffa
AD
438/**
439 * amdgpu_io_wreg - write to an IO register
440 *
441 * @adev: amdgpu_device pointer
442 * @reg: dword aligned register offset
443 * @v: 32 bit value to write to the register
444 *
445 * Writes the value specified to the offset specified.
446 */
d38ceaf9
AD
447void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
448{
d38ceaf9
AD
449 if ((reg * 4) < adev->rio_mem_size)
450 iowrite32(v, adev->rio_mem + (reg * 4));
451 else {
452 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
453 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
454 }
455}
456
457/**
458 * amdgpu_mm_rdoorbell - read a doorbell dword
459 *
460 * @adev: amdgpu_device pointer
461 * @index: doorbell index
462 *
463 * Returns the value in the doorbell aperture at the
464 * requested doorbell index (CIK).
465 */
466u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
467{
468 if (index < adev->doorbell.num_doorbells) {
469 return readl(adev->doorbell.ptr + index);
470 } else {
471 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
472 return 0;
473 }
474}
475
476/**
477 * amdgpu_mm_wdoorbell - write a doorbell dword
478 *
479 * @adev: amdgpu_device pointer
480 * @index: doorbell index
481 * @v: value to write
482 *
483 * Writes @v to the doorbell aperture at the
484 * requested doorbell index (CIK).
485 */
486void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
487{
488 if (index < adev->doorbell.num_doorbells) {
489 writel(v, adev->doorbell.ptr + index);
490 } else {
491 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
492 }
493}
494
832be404
KW
495/**
496 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
497 *
498 * @adev: amdgpu_device pointer
499 * @index: doorbell index
500 *
501 * Returns the value in the doorbell aperture at the
502 * requested doorbell index (VEGA10+).
503 */
504u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
505{
506 if (index < adev->doorbell.num_doorbells) {
507 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
508 } else {
509 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
510 return 0;
511 }
512}
513
514/**
515 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
516 *
517 * @adev: amdgpu_device pointer
518 * @index: doorbell index
519 * @v: value to write
520 *
521 * Writes @v to the doorbell aperture at the
522 * requested doorbell index (VEGA10+).
523 */
524void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
525{
526 if (index < adev->doorbell.num_doorbells) {
527 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
528 } else {
529 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
530 }
531}
532
d38ceaf9
AD
533/**
534 * amdgpu_invalid_rreg - dummy reg read function
535 *
536 * @adev: amdgpu device pointer
537 * @reg: offset of register
538 *
539 * Dummy register read function. Used for register blocks
540 * that certain asics don't have (all asics).
541 * Returns the value in the register.
542 */
543static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
544{
545 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
546 BUG();
547 return 0;
548}
549
550/**
551 * amdgpu_invalid_wreg - dummy reg write function
552 *
553 * @adev: amdgpu device pointer
554 * @reg: offset of register
555 * @v: value to write to the register
556 *
557 * Dummy register read function. Used for register blocks
558 * that certain asics don't have (all asics).
559 */
560static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
561{
562 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
563 reg, v);
564 BUG();
565}
566
4fa1c6a6
TZ
567/**
568 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
569 *
570 * @adev: amdgpu device pointer
571 * @reg: offset of register
572 *
573 * Dummy register read function. Used for register blocks
574 * that certain asics don't have (all asics).
575 * Returns the value in the register.
576 */
577static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
578{
579 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
580 BUG();
581 return 0;
582}
583
584/**
585 * amdgpu_invalid_wreg64 - dummy reg write function
586 *
587 * @adev: amdgpu device pointer
588 * @reg: offset of register
589 * @v: value to write to the register
590 *
591 * Dummy register read function. Used for register blocks
592 * that certain asics don't have (all asics).
593 */
594static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
595{
596 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
597 reg, v);
598 BUG();
599}
600
d38ceaf9
AD
601/**
602 * amdgpu_block_invalid_rreg - dummy reg read function
603 *
604 * @adev: amdgpu device pointer
605 * @block: offset of instance
606 * @reg: offset of register
607 *
608 * Dummy register read function. Used for register blocks
609 * that certain asics don't have (all asics).
610 * Returns the value in the register.
611 */
612static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
613 uint32_t block, uint32_t reg)
614{
615 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
616 reg, block);
617 BUG();
618 return 0;
619}
620
621/**
622 * amdgpu_block_invalid_wreg - dummy reg write function
623 *
624 * @adev: amdgpu device pointer
625 * @block: offset of instance
626 * @reg: offset of register
627 * @v: value to write to the register
628 *
629 * Dummy register read function. Used for register blocks
630 * that certain asics don't have (all asics).
631 */
632static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
633 uint32_t block,
634 uint32_t reg, uint32_t v)
635{
636 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
637 reg, block, v);
638 BUG();
639}
640
e3ecdffa
AD
641/**
642 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
643 *
644 * @adev: amdgpu device pointer
645 *
646 * Allocates a scratch page of VRAM for use by various things in the
647 * driver.
648 */
06ec9070 649static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 650{
a4a02777
CK
651 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
652 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
653 &adev->vram_scratch.robj,
654 &adev->vram_scratch.gpu_addr,
655 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
656}
657
e3ecdffa
AD
658/**
659 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
660 *
661 * @adev: amdgpu device pointer
662 *
663 * Frees the VRAM scratch page.
664 */
06ec9070 665static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 666{
078af1a3 667 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
668}
669
670/**
9c3f2b54 671 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
672 *
673 * @adev: amdgpu_device pointer
674 * @registers: pointer to the register array
675 * @array_size: size of the register array
676 *
677 * Programs an array or registers with and and or masks.
678 * This is a helper for setting golden registers.
679 */
9c3f2b54
AD
680void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
681 const u32 *registers,
682 const u32 array_size)
d38ceaf9
AD
683{
684 u32 tmp, reg, and_mask, or_mask;
685 int i;
686
687 if (array_size % 3)
688 return;
689
690 for (i = 0; i < array_size; i +=3) {
691 reg = registers[i + 0];
692 and_mask = registers[i + 1];
693 or_mask = registers[i + 2];
694
695 if (and_mask == 0xffffffff) {
696 tmp = or_mask;
697 } else {
698 tmp = RREG32(reg);
699 tmp &= ~and_mask;
e0d07657
HZ
700 if (adev->family >= AMDGPU_FAMILY_AI)
701 tmp |= (or_mask & and_mask);
702 else
703 tmp |= or_mask;
d38ceaf9
AD
704 }
705 WREG32(reg, tmp);
706 }
707}
708
e3ecdffa
AD
709/**
710 * amdgpu_device_pci_config_reset - reset the GPU
711 *
712 * @adev: amdgpu_device pointer
713 *
714 * Resets the GPU using the pci config reset sequence.
715 * Only applicable to asics prior to vega10.
716 */
8111c387 717void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
718{
719 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
720}
721
722/*
723 * GPU doorbell aperture helpers function.
724 */
725/**
06ec9070 726 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
727 *
728 * @adev: amdgpu_device pointer
729 *
730 * Init doorbell driver information (CIK)
731 * Returns 0 on success, error on failure.
732 */
06ec9070 733static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 734{
6585661d 735
705e519e
CK
736 /* No doorbell on SI hardware generation */
737 if (adev->asic_type < CHIP_BONAIRE) {
738 adev->doorbell.base = 0;
739 adev->doorbell.size = 0;
740 adev->doorbell.num_doorbells = 0;
741 adev->doorbell.ptr = NULL;
742 return 0;
743 }
744
d6895ad3
CK
745 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
746 return -EINVAL;
747
22357775
AD
748 amdgpu_asic_init_doorbell_index(adev);
749
d38ceaf9
AD
750 /* doorbell bar mapping */
751 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
752 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
753
edf600da 754 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 755 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
756 if (adev->doorbell.num_doorbells == 0)
757 return -EINVAL;
758
ec3db8a6 759 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
760 * paging queue doorbell use the second page. The
761 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
762 * doorbells are in the first page. So with paging queue enabled,
763 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
764 */
765 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 766 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 767
8972e5d2
CK
768 adev->doorbell.ptr = ioremap(adev->doorbell.base,
769 adev->doorbell.num_doorbells *
770 sizeof(u32));
771 if (adev->doorbell.ptr == NULL)
d38ceaf9 772 return -ENOMEM;
d38ceaf9
AD
773
774 return 0;
775}
776
777/**
06ec9070 778 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
779 *
780 * @adev: amdgpu_device pointer
781 *
782 * Tear down doorbell driver information (CIK)
783 */
06ec9070 784static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
785{
786 iounmap(adev->doorbell.ptr);
787 adev->doorbell.ptr = NULL;
788}
789
22cb0164 790
d38ceaf9
AD
791
792/*
06ec9070 793 * amdgpu_device_wb_*()
455a7bc2 794 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 795 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
796 */
797
798/**
06ec9070 799 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
800 *
801 * @adev: amdgpu_device pointer
802 *
803 * Disables Writeback and frees the Writeback memory (all asics).
804 * Used at driver shutdown.
805 */
06ec9070 806static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
807{
808 if (adev->wb.wb_obj) {
a76ed485
AD
809 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
810 &adev->wb.gpu_addr,
811 (void **)&adev->wb.wb);
d38ceaf9
AD
812 adev->wb.wb_obj = NULL;
813 }
814}
815
816/**
06ec9070 817 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
818 *
819 * @adev: amdgpu_device pointer
820 *
455a7bc2 821 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
822 * Used at driver startup.
823 * Returns 0 on success or an -error on failure.
824 */
06ec9070 825static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
826{
827 int r;
828
829 if (adev->wb.wb_obj == NULL) {
97407b63
AD
830 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
831 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
832 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
833 &adev->wb.wb_obj, &adev->wb.gpu_addr,
834 (void **)&adev->wb.wb);
d38ceaf9
AD
835 if (r) {
836 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
837 return r;
838 }
d38ceaf9
AD
839
840 adev->wb.num_wb = AMDGPU_MAX_WB;
841 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
842
843 /* clear wb memory */
73469585 844 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
845 }
846
847 return 0;
848}
849
850/**
131b4b36 851 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
852 *
853 * @adev: amdgpu_device pointer
854 * @wb: wb index
855 *
856 * Allocate a wb slot for use by the driver (all asics).
857 * Returns 0 on success or -EINVAL on failure.
858 */
131b4b36 859int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
860{
861 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 862
97407b63 863 if (offset < adev->wb.num_wb) {
7014285a 864 __set_bit(offset, adev->wb.used);
63ae07ca 865 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
866 return 0;
867 } else {
868 return -EINVAL;
869 }
870}
871
d38ceaf9 872/**
131b4b36 873 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
874 *
875 * @adev: amdgpu_device pointer
876 * @wb: wb index
877 *
878 * Free a wb slot allocated for use by the driver (all asics)
879 */
131b4b36 880void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 881{
73469585 882 wb >>= 3;
d38ceaf9 883 if (wb < adev->wb.num_wb)
73469585 884 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
885}
886
d6895ad3
CK
887/**
888 * amdgpu_device_resize_fb_bar - try to resize FB BAR
889 *
890 * @adev: amdgpu_device pointer
891 *
892 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
893 * to fail, but if any of the BARs is not accessible after the size we abort
894 * driver loading by returning -ENODEV.
895 */
896int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
897{
770d13b1 898 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 899 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
900 struct pci_bus *root;
901 struct resource *res;
902 unsigned i;
d6895ad3
CK
903 u16 cmd;
904 int r;
905
0c03b912 906 /* Bypass for VF */
907 if (amdgpu_sriov_vf(adev))
908 return 0;
909
31b8adab
CK
910 /* Check if the root BUS has 64bit memory resources */
911 root = adev->pdev->bus;
912 while (root->parent)
913 root = root->parent;
914
915 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 916 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
917 res->start > 0x100000000ull)
918 break;
919 }
920
921 /* Trying to resize is pointless without a root hub window above 4GB */
922 if (!res)
923 return 0;
924
d6895ad3
CK
925 /* Disable memory decoding while we change the BAR addresses and size */
926 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
927 pci_write_config_word(adev->pdev, PCI_COMMAND,
928 cmd & ~PCI_COMMAND_MEMORY);
929
930 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 931 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
932 if (adev->asic_type >= CHIP_BONAIRE)
933 pci_release_resource(adev->pdev, 2);
934
935 pci_release_resource(adev->pdev, 0);
936
937 r = pci_resize_resource(adev->pdev, 0, rbar_size);
938 if (r == -ENOSPC)
939 DRM_INFO("Not enough PCI address space for a large BAR.");
940 else if (r && r != -ENOTSUPP)
941 DRM_ERROR("Problem resizing BAR0 (%d).", r);
942
943 pci_assign_unassigned_bus_resources(adev->pdev->bus);
944
945 /* When the doorbell or fb BAR isn't available we have no chance of
946 * using the device.
947 */
06ec9070 948 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
949 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
950 return -ENODEV;
951
952 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
953
954 return 0;
955}
a05502e5 956
d38ceaf9
AD
957/*
958 * GPU helpers function.
959 */
960/**
39c640c0 961 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
962 *
963 * @adev: amdgpu_device pointer
964 *
c836fec5
JQ
965 * Check if the asic has been initialized (all asics) at driver startup
966 * or post is needed if hw reset is performed.
967 * Returns true if need or false if not.
d38ceaf9 968 */
39c640c0 969bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
970{
971 uint32_t reg;
972
bec86378
ML
973 if (amdgpu_sriov_vf(adev))
974 return false;
975
976 if (amdgpu_passthrough(adev)) {
1da2c326
ML
977 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
978 * some old smc fw still need driver do vPost otherwise gpu hang, while
979 * those smc fw version above 22.15 doesn't have this flaw, so we force
980 * vpost executed for smc version below 22.15
bec86378
ML
981 */
982 if (adev->asic_type == CHIP_FIJI) {
983 int err;
984 uint32_t fw_ver;
985 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
986 /* force vPost if error occured */
987 if (err)
988 return true;
989
990 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
991 if (fw_ver < 0x00160e00)
992 return true;
bec86378 993 }
bec86378 994 }
91fe77eb 995
996 if (adev->has_hw_reset) {
997 adev->has_hw_reset = false;
998 return true;
999 }
1000
1001 /* bios scratch used on CIK+ */
1002 if (adev->asic_type >= CHIP_BONAIRE)
1003 return amdgpu_atombios_scratch_need_asic_init(adev);
1004
1005 /* check MEM_SIZE for older asics */
1006 reg = amdgpu_asic_get_config_memsize(adev);
1007
1008 if ((reg != 0) && (reg != 0xffffffff))
1009 return false;
1010
1011 return true;
bec86378
ML
1012}
1013
d38ceaf9
AD
1014/* if we get transitioned to only one device, take VGA back */
1015/**
06ec9070 1016 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1017 *
1018 * @cookie: amdgpu_device pointer
1019 * @state: enable/disable vga decode
1020 *
1021 * Enable/disable vga decode (all asics).
1022 * Returns VGA resource flags.
1023 */
06ec9070 1024static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1025{
1026 struct amdgpu_device *adev = cookie;
1027 amdgpu_asic_set_vga_state(adev, state);
1028 if (state)
1029 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1030 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1031 else
1032 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1033}
1034
e3ecdffa
AD
1035/**
1036 * amdgpu_device_check_block_size - validate the vm block size
1037 *
1038 * @adev: amdgpu_device pointer
1039 *
1040 * Validates the vm block size specified via module parameter.
1041 * The vm block size defines number of bits in page table versus page directory,
1042 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1043 * page table and the remaining bits are in the page directory.
1044 */
06ec9070 1045static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1046{
1047 /* defines number of bits in page table versus page directory,
1048 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1049 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1050 if (amdgpu_vm_block_size == -1)
1051 return;
a1adf8be 1052
bab4fee7 1053 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1054 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1055 amdgpu_vm_block_size);
97489129 1056 amdgpu_vm_block_size = -1;
a1adf8be 1057 }
a1adf8be
CZ
1058}
1059
e3ecdffa
AD
1060/**
1061 * amdgpu_device_check_vm_size - validate the vm size
1062 *
1063 * @adev: amdgpu_device pointer
1064 *
1065 * Validates the vm size in GB specified via module parameter.
1066 * The VM size is the size of the GPU virtual memory space in GB.
1067 */
06ec9070 1068static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1069{
64dab074
AD
1070 /* no need to check the default value */
1071 if (amdgpu_vm_size == -1)
1072 return;
1073
83ca145d
ZJ
1074 if (amdgpu_vm_size < 1) {
1075 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1076 amdgpu_vm_size);
f3368128 1077 amdgpu_vm_size = -1;
83ca145d 1078 }
83ca145d
ZJ
1079}
1080
7951e376
RZ
1081static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1082{
1083 struct sysinfo si;
a9d4fe2f 1084 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1085 uint64_t total_memory;
1086 uint64_t dram_size_seven_GB = 0x1B8000000;
1087 uint64_t dram_size_three_GB = 0xB8000000;
1088
1089 if (amdgpu_smu_memory_pool_size == 0)
1090 return;
1091
1092 if (!is_os_64) {
1093 DRM_WARN("Not 64-bit OS, feature not supported\n");
1094 goto def_value;
1095 }
1096 si_meminfo(&si);
1097 total_memory = (uint64_t)si.totalram * si.mem_unit;
1098
1099 if ((amdgpu_smu_memory_pool_size == 1) ||
1100 (amdgpu_smu_memory_pool_size == 2)) {
1101 if (total_memory < dram_size_three_GB)
1102 goto def_value1;
1103 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1104 (amdgpu_smu_memory_pool_size == 8)) {
1105 if (total_memory < dram_size_seven_GB)
1106 goto def_value1;
1107 } else {
1108 DRM_WARN("Smu memory pool size not supported\n");
1109 goto def_value;
1110 }
1111 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1112
1113 return;
1114
1115def_value1:
1116 DRM_WARN("No enough system memory\n");
1117def_value:
1118 adev->pm.smu_prv_buffer_size = 0;
1119}
1120
d38ceaf9 1121/**
06ec9070 1122 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1123 *
1124 * @adev: amdgpu_device pointer
1125 *
1126 * Validates certain module parameters and updates
1127 * the associated values used by the driver (all asics).
1128 */
912dfc84 1129static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1130{
5b011235
CZ
1131 if (amdgpu_sched_jobs < 4) {
1132 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1133 amdgpu_sched_jobs);
1134 amdgpu_sched_jobs = 4;
76117507 1135 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1136 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1137 amdgpu_sched_jobs);
1138 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1139 }
d38ceaf9 1140
83e74db6 1141 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1142 /* gart size must be greater or equal to 32M */
1143 dev_warn(adev->dev, "gart size (%d) too small\n",
1144 amdgpu_gart_size);
83e74db6 1145 amdgpu_gart_size = -1;
d38ceaf9
AD
1146 }
1147
36d38372 1148 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1149 /* gtt size must be greater or equal to 32M */
36d38372
CK
1150 dev_warn(adev->dev, "gtt size (%d) too small\n",
1151 amdgpu_gtt_size);
1152 amdgpu_gtt_size = -1;
d38ceaf9
AD
1153 }
1154
d07f14be
RH
1155 /* valid range is between 4 and 9 inclusive */
1156 if (amdgpu_vm_fragment_size != -1 &&
1157 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1158 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1159 amdgpu_vm_fragment_size = -1;
1160 }
1161
7951e376
RZ
1162 amdgpu_device_check_smu_prv_buffer_size(adev);
1163
06ec9070 1164 amdgpu_device_check_vm_size(adev);
d38ceaf9 1165
06ec9070 1166 amdgpu_device_check_block_size(adev);
6a7f76e7 1167
19aede77 1168 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1169
c6252390 1170 amdgpu_gmc_tmz_set(adev);
01a8dcec 1171
e3c00faa 1172 return 0;
d38ceaf9
AD
1173}
1174
1175/**
1176 * amdgpu_switcheroo_set_state - set switcheroo state
1177 *
1178 * @pdev: pci dev pointer
1694467b 1179 * @state: vga_switcheroo state
d38ceaf9
AD
1180 *
1181 * Callback for the switcheroo driver. Suspends or resumes the
1182 * the asics before or after it is powered up using ACPI methods.
1183 */
1184static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1185{
1186 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1187 int r;
d38ceaf9 1188
31af062a 1189 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1190 return;
1191
1192 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1193 pr_info("switched on\n");
d38ceaf9
AD
1194 /* don't suspend or resume card normally */
1195 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1196
de185019
AD
1197 pci_set_power_state(dev->pdev, PCI_D0);
1198 pci_restore_state(dev->pdev);
1199 r = pci_enable_device(dev->pdev);
1200 if (r)
1201 DRM_WARN("pci_enable_device failed (%d)\n", r);
1202 amdgpu_device_resume(dev, true);
d38ceaf9 1203
d38ceaf9
AD
1204 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1205 drm_kms_helper_poll_enable(dev);
1206 } else {
dd4fa6c1 1207 pr_info("switched off\n");
d38ceaf9
AD
1208 drm_kms_helper_poll_disable(dev);
1209 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1210 amdgpu_device_suspend(dev, true);
1211 pci_save_state(dev->pdev);
1212 /* Shut down the device */
1213 pci_disable_device(dev->pdev);
1214 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1215 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1216 }
1217}
1218
1219/**
1220 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1221 *
1222 * @pdev: pci dev pointer
1223 *
1224 * Callback for the switcheroo driver. Check of the switcheroo
1225 * state can be changed.
1226 * Returns true if the state can be changed, false if not.
1227 */
1228static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1229{
1230 struct drm_device *dev = pci_get_drvdata(pdev);
1231
1232 /*
1233 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1234 * locking inversion with the driver load path. And the access here is
1235 * completely racy anyway. So don't bother with locking for now.
1236 */
7e13ad89 1237 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1238}
1239
1240static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1241 .set_gpu_state = amdgpu_switcheroo_set_state,
1242 .reprobe = NULL,
1243 .can_switch = amdgpu_switcheroo_can_switch,
1244};
1245
e3ecdffa
AD
1246/**
1247 * amdgpu_device_ip_set_clockgating_state - set the CG state
1248 *
87e3f136 1249 * @dev: amdgpu_device pointer
e3ecdffa
AD
1250 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1251 * @state: clockgating state (gate or ungate)
1252 *
1253 * Sets the requested clockgating state for all instances of
1254 * the hardware IP specified.
1255 * Returns the error code from the last instance.
1256 */
43fa561f 1257int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1258 enum amd_ip_block_type block_type,
1259 enum amd_clockgating_state state)
d38ceaf9 1260{
43fa561f 1261 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1262 int i, r = 0;
1263
1264 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1265 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1266 continue;
c722865a
RZ
1267 if (adev->ip_blocks[i].version->type != block_type)
1268 continue;
1269 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1270 continue;
1271 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1272 (void *)adev, state);
1273 if (r)
1274 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1275 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1276 }
1277 return r;
1278}
1279
e3ecdffa
AD
1280/**
1281 * amdgpu_device_ip_set_powergating_state - set the PG state
1282 *
87e3f136 1283 * @dev: amdgpu_device pointer
e3ecdffa
AD
1284 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1285 * @state: powergating state (gate or ungate)
1286 *
1287 * Sets the requested powergating state for all instances of
1288 * the hardware IP specified.
1289 * Returns the error code from the last instance.
1290 */
43fa561f 1291int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1292 enum amd_ip_block_type block_type,
1293 enum amd_powergating_state state)
d38ceaf9 1294{
43fa561f 1295 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1296 int i, r = 0;
1297
1298 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1299 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1300 continue;
c722865a
RZ
1301 if (adev->ip_blocks[i].version->type != block_type)
1302 continue;
1303 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1304 continue;
1305 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1306 (void *)adev, state);
1307 if (r)
1308 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1309 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1310 }
1311 return r;
1312}
1313
e3ecdffa
AD
1314/**
1315 * amdgpu_device_ip_get_clockgating_state - get the CG state
1316 *
1317 * @adev: amdgpu_device pointer
1318 * @flags: clockgating feature flags
1319 *
1320 * Walks the list of IPs on the device and updates the clockgating
1321 * flags for each IP.
1322 * Updates @flags with the feature flags for each hardware IP where
1323 * clockgating is enabled.
1324 */
2990a1fc
AD
1325void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1326 u32 *flags)
6cb2d4e4
HR
1327{
1328 int i;
1329
1330 for (i = 0; i < adev->num_ip_blocks; i++) {
1331 if (!adev->ip_blocks[i].status.valid)
1332 continue;
1333 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1334 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1335 }
1336}
1337
e3ecdffa
AD
1338/**
1339 * amdgpu_device_ip_wait_for_idle - wait for idle
1340 *
1341 * @adev: amdgpu_device pointer
1342 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1343 *
1344 * Waits for the request hardware IP to be idle.
1345 * Returns 0 for success or a negative error code on failure.
1346 */
2990a1fc
AD
1347int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1348 enum amd_ip_block_type block_type)
5dbbb60b
AD
1349{
1350 int i, r;
1351
1352 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1353 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1354 continue;
a1255107
AD
1355 if (adev->ip_blocks[i].version->type == block_type) {
1356 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1357 if (r)
1358 return r;
1359 break;
1360 }
1361 }
1362 return 0;
1363
1364}
1365
e3ecdffa
AD
1366/**
1367 * amdgpu_device_ip_is_idle - is the hardware IP idle
1368 *
1369 * @adev: amdgpu_device pointer
1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371 *
1372 * Check if the hardware IP is idle or not.
1373 * Returns true if it the IP is idle, false if not.
1374 */
2990a1fc
AD
1375bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1376 enum amd_ip_block_type block_type)
5dbbb60b
AD
1377{
1378 int i;
1379
1380 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1381 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1382 continue;
a1255107
AD
1383 if (adev->ip_blocks[i].version->type == block_type)
1384 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1385 }
1386 return true;
1387
1388}
1389
e3ecdffa
AD
1390/**
1391 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1392 *
1393 * @adev: amdgpu_device pointer
87e3f136 1394 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1395 *
1396 * Returns a pointer to the hardware IP block structure
1397 * if it exists for the asic, otherwise NULL.
1398 */
2990a1fc
AD
1399struct amdgpu_ip_block *
1400amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1401 enum amd_ip_block_type type)
d38ceaf9
AD
1402{
1403 int i;
1404
1405 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1406 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1407 return &adev->ip_blocks[i];
1408
1409 return NULL;
1410}
1411
1412/**
2990a1fc 1413 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1414 *
1415 * @adev: amdgpu_device pointer
5fc3aeeb 1416 * @type: enum amd_ip_block_type
d38ceaf9
AD
1417 * @major: major version
1418 * @minor: minor version
1419 *
1420 * return 0 if equal or greater
1421 * return 1 if smaller or the ip_block doesn't exist
1422 */
2990a1fc
AD
1423int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1424 enum amd_ip_block_type type,
1425 u32 major, u32 minor)
d38ceaf9 1426{
2990a1fc 1427 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1428
a1255107
AD
1429 if (ip_block && ((ip_block->version->major > major) ||
1430 ((ip_block->version->major == major) &&
1431 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1432 return 0;
1433
1434 return 1;
1435}
1436
a1255107 1437/**
2990a1fc 1438 * amdgpu_device_ip_block_add
a1255107
AD
1439 *
1440 * @adev: amdgpu_device pointer
1441 * @ip_block_version: pointer to the IP to add
1442 *
1443 * Adds the IP block driver information to the collection of IPs
1444 * on the asic.
1445 */
2990a1fc
AD
1446int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1447 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1448{
1449 if (!ip_block_version)
1450 return -EINVAL;
1451
e966a725 1452 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1453 ip_block_version->funcs->name);
1454
a1255107
AD
1455 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1456
1457 return 0;
1458}
1459
e3ecdffa
AD
1460/**
1461 * amdgpu_device_enable_virtual_display - enable virtual display feature
1462 *
1463 * @adev: amdgpu_device pointer
1464 *
1465 * Enabled the virtual display feature if the user has enabled it via
1466 * the module parameter virtual_display. This feature provides a virtual
1467 * display hardware on headless boards or in virtualized environments.
1468 * This function parses and validates the configuration string specified by
1469 * the user and configues the virtual display configuration (number of
1470 * virtual connectors, crtcs, etc.) specified.
1471 */
483ef985 1472static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1473{
1474 adev->enable_virtual_display = false;
1475
1476 if (amdgpu_virtual_display) {
1477 struct drm_device *ddev = adev->ddev;
1478 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1479 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1480
1481 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1482 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1483 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1484 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1485 if (!strcmp("all", pciaddname)
1486 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1487 long num_crtc;
1488 int res = -1;
1489
9accf2fd 1490 adev->enable_virtual_display = true;
0f66356d
ED
1491
1492 if (pciaddname_tmp)
1493 res = kstrtol(pciaddname_tmp, 10,
1494 &num_crtc);
1495
1496 if (!res) {
1497 if (num_crtc < 1)
1498 num_crtc = 1;
1499 if (num_crtc > 6)
1500 num_crtc = 6;
1501 adev->mode_info.num_crtc = num_crtc;
1502 } else {
1503 adev->mode_info.num_crtc = 1;
1504 }
9accf2fd
ED
1505 break;
1506 }
1507 }
1508
0f66356d
ED
1509 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1510 amdgpu_virtual_display, pci_address_name,
1511 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1512
1513 kfree(pciaddstr);
1514 }
1515}
1516
e3ecdffa
AD
1517/**
1518 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1519 *
1520 * @adev: amdgpu_device pointer
1521 *
1522 * Parses the asic configuration parameters specified in the gpu info
1523 * firmware and makes them availale to the driver for use in configuring
1524 * the asic.
1525 * Returns 0 on success, -EINVAL on failure.
1526 */
e2a75f88
AD
1527static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1528{
e2a75f88
AD
1529 const char *chip_name;
1530 char fw_name[30];
1531 int err;
1532 const struct gpu_info_firmware_header_v1_0 *hdr;
1533
ab4fe3e1
HR
1534 adev->firmware.gpu_info_fw = NULL;
1535
e2a75f88 1536 switch (adev->asic_type) {
e2a75f88
AD
1537#ifdef CONFIG_DRM_AMDGPU_SI
1538 case CHIP_VERDE:
1539 case CHIP_TAHITI:
1540 case CHIP_PITCAIRN:
1541 case CHIP_OLAND:
1542 case CHIP_HAINAN:
1543#endif
1544#ifdef CONFIG_DRM_AMDGPU_CIK
1545 case CHIP_BONAIRE:
1546 case CHIP_HAWAII:
1547 case CHIP_KAVERI:
1548 case CHIP_KABINI:
1549 case CHIP_MULLINS:
1550#endif
da87c30b
AD
1551 case CHIP_TOPAZ:
1552 case CHIP_TONGA:
1553 case CHIP_FIJI:
1554 case CHIP_POLARIS10:
1555 case CHIP_POLARIS11:
1556 case CHIP_POLARIS12:
1557 case CHIP_VEGAM:
1558 case CHIP_CARRIZO:
1559 case CHIP_STONEY:
27c0bc71 1560 case CHIP_VEGA20:
e2a75f88
AD
1561 default:
1562 return 0;
1563 case CHIP_VEGA10:
1564 chip_name = "vega10";
1565 break;
3f76dced
AD
1566 case CHIP_VEGA12:
1567 chip_name = "vega12";
1568 break;
2d2e5e7e 1569 case CHIP_RAVEN:
54f78a76 1570 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1571 chip_name = "raven2";
54f78a76 1572 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1573 chip_name = "picasso";
54c4d17e
FX
1574 else
1575 chip_name = "raven";
2d2e5e7e 1576 break;
65e60f6e
LM
1577 case CHIP_ARCTURUS:
1578 chip_name = "arcturus";
1579 break;
b51a26a0
HR
1580 case CHIP_RENOIR:
1581 chip_name = "renoir";
1582 break;
23c6268e
HR
1583 case CHIP_NAVI10:
1584 chip_name = "navi10";
1585 break;
ed42cfe1
XY
1586 case CHIP_NAVI14:
1587 chip_name = "navi14";
1588 break;
42b325e5
XY
1589 case CHIP_NAVI12:
1590 chip_name = "navi12";
1591 break;
e2a75f88
AD
1592 }
1593
1594 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1595 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1596 if (err) {
1597 dev_err(adev->dev,
1598 "Failed to load gpu_info firmware \"%s\"\n",
1599 fw_name);
1600 goto out;
1601 }
ab4fe3e1 1602 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1603 if (err) {
1604 dev_err(adev->dev,
1605 "Failed to validate gpu_info firmware \"%s\"\n",
1606 fw_name);
1607 goto out;
1608 }
1609
ab4fe3e1 1610 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1611 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1612
1613 switch (hdr->version_major) {
1614 case 1:
1615 {
1616 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1617 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1618 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1619
6ba57b7a
AD
1620 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
1621 amdgpu_discovery_get_gfx_info(adev);
ec51d3fa 1622 goto parse_soc_bounding_box;
6ba57b7a 1623 }
ec51d3fa 1624
b5ab16bf
AD
1625 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1626 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1627 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1628 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1629 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1630 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1631 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1632 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1633 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1634 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1635 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1636 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1637 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1638 adev->gfx.cu_info.max_waves_per_simd =
1639 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1640 adev->gfx.cu_info.max_scratch_slots_per_cu =
1641 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1642 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1643 if (hdr->version_minor >= 1) {
35c2e910
HZ
1644 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1645 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1646 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1647 adev->gfx.config.num_sc_per_sh =
1648 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1649 adev->gfx.config.num_packer_per_sc =
1650 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1651 }
ec51d3fa
XY
1652
1653parse_soc_bounding_box:
ec51d3fa
XY
1654 /*
1655 * soc bounding box info is not integrated in disocovery table,
1656 * we always need to parse it from gpu info firmware.
1657 */
48321c3d
HW
1658 if (hdr->version_minor == 2) {
1659 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1660 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1661 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1662 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1663 }
e2a75f88
AD
1664 break;
1665 }
1666 default:
1667 dev_err(adev->dev,
1668 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1669 err = -EINVAL;
1670 goto out;
1671 }
1672out:
e2a75f88
AD
1673 return err;
1674}
1675
e3ecdffa
AD
1676/**
1677 * amdgpu_device_ip_early_init - run early init for hardware IPs
1678 *
1679 * @adev: amdgpu_device pointer
1680 *
1681 * Early initialization pass for hardware IPs. The hardware IPs that make
1682 * up each asic are discovered each IP's early_init callback is run. This
1683 * is the first stage in initializing the asic.
1684 * Returns 0 on success, negative error code on failure.
1685 */
06ec9070 1686static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1687{
aaa36a97 1688 int i, r;
d38ceaf9 1689
483ef985 1690 amdgpu_device_enable_virtual_display(adev);
a6be7570 1691
d38ceaf9 1692 switch (adev->asic_type) {
33f34802
KW
1693#ifdef CONFIG_DRM_AMDGPU_SI
1694 case CHIP_VERDE:
1695 case CHIP_TAHITI:
1696 case CHIP_PITCAIRN:
1697 case CHIP_OLAND:
1698 case CHIP_HAINAN:
295d0daf 1699 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1700 r = si_set_ip_blocks(adev);
1701 if (r)
1702 return r;
1703 break;
1704#endif
a2e73f56
AD
1705#ifdef CONFIG_DRM_AMDGPU_CIK
1706 case CHIP_BONAIRE:
1707 case CHIP_HAWAII:
1708 case CHIP_KAVERI:
1709 case CHIP_KABINI:
1710 case CHIP_MULLINS:
e1ad2d53 1711 if (adev->flags & AMD_IS_APU)
a2e73f56 1712 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1713 else
1714 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1715
1716 r = cik_set_ip_blocks(adev);
1717 if (r)
1718 return r;
1719 break;
1720#endif
da87c30b
AD
1721 case CHIP_TOPAZ:
1722 case CHIP_TONGA:
1723 case CHIP_FIJI:
1724 case CHIP_POLARIS10:
1725 case CHIP_POLARIS11:
1726 case CHIP_POLARIS12:
1727 case CHIP_VEGAM:
1728 case CHIP_CARRIZO:
1729 case CHIP_STONEY:
1730 if (adev->flags & AMD_IS_APU)
1731 adev->family = AMDGPU_FAMILY_CZ;
1732 else
1733 adev->family = AMDGPU_FAMILY_VI;
1734
1735 r = vi_set_ip_blocks(adev);
1736 if (r)
1737 return r;
1738 break;
e48a3cd9
AD
1739 case CHIP_VEGA10:
1740 case CHIP_VEGA12:
e4bd8170 1741 case CHIP_VEGA20:
e48a3cd9 1742 case CHIP_RAVEN:
61cf44c1 1743 case CHIP_ARCTURUS:
b51a26a0 1744 case CHIP_RENOIR:
70534d1e 1745 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1746 adev->family = AMDGPU_FAMILY_RV;
1747 else
1748 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1749
1750 r = soc15_set_ip_blocks(adev);
1751 if (r)
1752 return r;
1753 break;
0a5b8c7b 1754 case CHIP_NAVI10:
7ecb5cd4 1755 case CHIP_NAVI14:
4808cf9c 1756 case CHIP_NAVI12:
0a5b8c7b
HR
1757 adev->family = AMDGPU_FAMILY_NV;
1758
1759 r = nv_set_ip_blocks(adev);
1760 if (r)
1761 return r;
1762 break;
d38ceaf9
AD
1763 default:
1764 /* FIXME: not supported yet */
1765 return -EINVAL;
1766 }
1767
1884734a 1768 amdgpu_amdkfd_device_probe(adev);
1769
3149d9da 1770 if (amdgpu_sriov_vf(adev)) {
122078de
ML
1771 /* handle vbios stuff prior full access mode for new handshake */
1772 if (adev->virt.req_init_data_ver == 1) {
1773 if (!amdgpu_get_bios(adev)) {
1774 DRM_ERROR("failed to get vbios\n");
1775 return -EINVAL;
1776 }
1777
1778 r = amdgpu_atombios_init(adev);
1779 if (r) {
1780 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1781 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1782 return r;
1783 }
1784 }
2f294132 1785 }
122078de 1786
2f294132
ML
1787 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1788 * will not be prepared by host for this VF */
1789 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
3149d9da
XY
1790 r = amdgpu_virt_request_full_gpu(adev, true);
1791 if (r)
2f294132 1792 return r;
3149d9da
XY
1793 }
1794
3b94fb10 1795 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1796 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1797 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1798
d38ceaf9
AD
1799 for (i = 0; i < adev->num_ip_blocks; i++) {
1800 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1801 DRM_ERROR("disabled ip block: %d <%s>\n",
1802 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1803 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1804 } else {
a1255107
AD
1805 if (adev->ip_blocks[i].version->funcs->early_init) {
1806 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1807 if (r == -ENOENT) {
a1255107 1808 adev->ip_blocks[i].status.valid = false;
2c1a2784 1809 } else if (r) {
a1255107
AD
1810 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1811 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1812 return r;
2c1a2784 1813 } else {
a1255107 1814 adev->ip_blocks[i].status.valid = true;
2c1a2784 1815 }
974e6b64 1816 } else {
a1255107 1817 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1818 }
d38ceaf9 1819 }
21a249ca
AD
1820 /* get the vbios after the asic_funcs are set up */
1821 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
1822 r = amdgpu_device_parse_gpu_info_fw(adev);
1823 if (r)
1824 return r;
1825
122078de
ML
1826 /* skip vbios handling for new handshake */
1827 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1828 continue;
1829
21a249ca
AD
1830 /* Read BIOS */
1831 if (!amdgpu_get_bios(adev))
1832 return -EINVAL;
1833
1834 r = amdgpu_atombios_init(adev);
1835 if (r) {
1836 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1837 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1838 return r;
1839 }
1840 }
d38ceaf9
AD
1841 }
1842
395d1fb9
NH
1843 adev->cg_flags &= amdgpu_cg_mask;
1844 adev->pg_flags &= amdgpu_pg_mask;
1845
d38ceaf9
AD
1846 return 0;
1847}
1848
0a4f2520
RZ
1849static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1850{
1851 int i, r;
1852
1853 for (i = 0; i < adev->num_ip_blocks; i++) {
1854 if (!adev->ip_blocks[i].status.sw)
1855 continue;
1856 if (adev->ip_blocks[i].status.hw)
1857 continue;
1858 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1859 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1860 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1861 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1862 if (r) {
1863 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1864 adev->ip_blocks[i].version->funcs->name, r);
1865 return r;
1866 }
1867 adev->ip_blocks[i].status.hw = true;
1868 }
1869 }
1870
1871 return 0;
1872}
1873
1874static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1875{
1876 int i, r;
1877
1878 for (i = 0; i < adev->num_ip_blocks; i++) {
1879 if (!adev->ip_blocks[i].status.sw)
1880 continue;
1881 if (adev->ip_blocks[i].status.hw)
1882 continue;
1883 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1884 if (r) {
1885 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1886 adev->ip_blocks[i].version->funcs->name, r);
1887 return r;
1888 }
1889 adev->ip_blocks[i].status.hw = true;
1890 }
1891
1892 return 0;
1893}
1894
7a3e0bb2
RZ
1895static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1896{
1897 int r = 0;
1898 int i;
80f41f84 1899 uint32_t smu_version;
7a3e0bb2
RZ
1900
1901 if (adev->asic_type >= CHIP_VEGA10) {
1902 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1903 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1904 continue;
1905
1906 /* no need to do the fw loading again if already done*/
1907 if (adev->ip_blocks[i].status.hw == true)
1908 break;
1909
1910 if (adev->in_gpu_reset || adev->in_suspend) {
1911 r = adev->ip_blocks[i].version->funcs->resume(adev);
1912 if (r) {
1913 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1914 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1915 return r;
1916 }
1917 } else {
1918 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1919 if (r) {
1920 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1921 adev->ip_blocks[i].version->funcs->name, r);
1922 return r;
7a3e0bb2 1923 }
7a3e0bb2 1924 }
482f0e53
ML
1925
1926 adev->ip_blocks[i].status.hw = true;
1927 break;
7a3e0bb2
RZ
1928 }
1929 }
482f0e53 1930
8973d9ec
ED
1931 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1932 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1933
80f41f84 1934 return r;
7a3e0bb2
RZ
1935}
1936
e3ecdffa
AD
1937/**
1938 * amdgpu_device_ip_init - run init for hardware IPs
1939 *
1940 * @adev: amdgpu_device pointer
1941 *
1942 * Main initialization pass for hardware IPs. The list of all the hardware
1943 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1944 * are run. sw_init initializes the software state associated with each IP
1945 * and hw_init initializes the hardware associated with each IP.
1946 * Returns 0 on success, negative error code on failure.
1947 */
06ec9070 1948static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1949{
1950 int i, r;
1951
c030f2e4 1952 r = amdgpu_ras_init(adev);
1953 if (r)
1954 return r;
1955
2f294132
ML
1956 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1957 r = amdgpu_virt_request_full_gpu(adev, true);
1958 if (r)
1959 return -EAGAIN;
1960 }
1961
d38ceaf9 1962 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1963 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1964 continue;
a1255107 1965 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1966 if (r) {
a1255107
AD
1967 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1968 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1969 goto init_failed;
2c1a2784 1970 }
a1255107 1971 adev->ip_blocks[i].status.sw = true;
bfca0289 1972
d38ceaf9 1973 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1974 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1975 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1976 if (r) {
1977 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1978 goto init_failed;
2c1a2784 1979 }
a1255107 1980 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1981 if (r) {
1982 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1983 goto init_failed;
2c1a2784 1984 }
06ec9070 1985 r = amdgpu_device_wb_init(adev);
2c1a2784 1986 if (r) {
06ec9070 1987 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1988 goto init_failed;
2c1a2784 1989 }
a1255107 1990 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1991
1992 /* right after GMC hw init, we create CSA */
f92d5c61 1993 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1994 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1995 AMDGPU_GEM_DOMAIN_VRAM,
1996 AMDGPU_CSA_SIZE);
2493664f
ML
1997 if (r) {
1998 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1999 goto init_failed;
2493664f
ML
2000 }
2001 }
d38ceaf9
AD
2002 }
2003 }
2004
c9ffa427
YT
2005 if (amdgpu_sriov_vf(adev))
2006 amdgpu_virt_init_data_exchange(adev);
2007
533aed27
AG
2008 r = amdgpu_ib_pool_init(adev);
2009 if (r) {
2010 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2011 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2012 goto init_failed;
2013 }
2014
c8963ea4
RZ
2015 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2016 if (r)
72d3f592 2017 goto init_failed;
0a4f2520
RZ
2018
2019 r = amdgpu_device_ip_hw_init_phase1(adev);
2020 if (r)
72d3f592 2021 goto init_failed;
0a4f2520 2022
7a3e0bb2
RZ
2023 r = amdgpu_device_fw_loading(adev);
2024 if (r)
72d3f592 2025 goto init_failed;
7a3e0bb2 2026
0a4f2520
RZ
2027 r = amdgpu_device_ip_hw_init_phase2(adev);
2028 if (r)
72d3f592 2029 goto init_failed;
d38ceaf9 2030
121a2bc6
AG
2031 /*
2032 * retired pages will be loaded from eeprom and reserved here,
2033 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2034 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2035 * for I2C communication which only true at this point.
2036 * recovery_init may fail, but it can free all resources allocated by
2037 * itself and its failure should not stop amdgpu init process.
2038 *
2039 * Note: theoretically, this should be called before all vram allocations
2040 * to protect retired page from abusing
2041 */
2042 amdgpu_ras_recovery_init(adev);
2043
3e2e2ab5
HZ
2044 if (adev->gmc.xgmi.num_physical_nodes > 1)
2045 amdgpu_xgmi_add_device(adev);
1884734a 2046 amdgpu_amdkfd_device_init(adev);
c6332b97 2047
bd607166
KR
2048 amdgpu_fru_get_product_info(adev);
2049
72d3f592 2050init_failed:
c9ffa427 2051 if (amdgpu_sriov_vf(adev))
c6332b97 2052 amdgpu_virt_release_full_gpu(adev, true);
2053
72d3f592 2054 return r;
d38ceaf9
AD
2055}
2056
e3ecdffa
AD
2057/**
2058 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2059 *
2060 * @adev: amdgpu_device pointer
2061 *
2062 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2063 * this function before a GPU reset. If the value is retained after a
2064 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2065 */
06ec9070 2066static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2067{
2068 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2069}
2070
e3ecdffa
AD
2071/**
2072 * amdgpu_device_check_vram_lost - check if vram is valid
2073 *
2074 * @adev: amdgpu_device pointer
2075 *
2076 * Checks the reset magic value written to the gart pointer in VRAM.
2077 * The driver calls this after a GPU reset to see if the contents of
2078 * VRAM is lost or now.
2079 * returns true if vram is lost, false if not.
2080 */
06ec9070 2081static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2082{
dadce777
EQ
2083 if (memcmp(adev->gart.ptr, adev->reset_magic,
2084 AMDGPU_RESET_MAGIC_NUM))
2085 return true;
2086
2087 if (!adev->in_gpu_reset)
2088 return false;
2089
2090 /*
2091 * For all ASICs with baco/mode1 reset, the VRAM is
2092 * always assumed to be lost.
2093 */
2094 switch (amdgpu_asic_reset_method(adev)) {
2095 case AMD_RESET_METHOD_BACO:
2096 case AMD_RESET_METHOD_MODE1:
2097 return true;
2098 default:
2099 return false;
2100 }
0c49e0b8
CZ
2101}
2102
e3ecdffa 2103/**
1112a46b 2104 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2105 *
2106 * @adev: amdgpu_device pointer
b8b72130 2107 * @state: clockgating state (gate or ungate)
e3ecdffa 2108 *
e3ecdffa 2109 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2110 * set_clockgating_state callbacks are run.
2111 * Late initialization pass enabling clockgating for hardware IPs.
2112 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2113 * Returns 0 on success, negative error code on failure.
2114 */
fdd34271 2115
1112a46b
RZ
2116static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2117 enum amd_clockgating_state state)
d38ceaf9 2118{
1112a46b 2119 int i, j, r;
d38ceaf9 2120
4a2ba394
SL
2121 if (amdgpu_emu_mode == 1)
2122 return 0;
2123
1112a46b
RZ
2124 for (j = 0; j < adev->num_ip_blocks; j++) {
2125 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2126 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2127 continue;
4a446d55 2128 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2129 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2130 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2131 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2132 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2133 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2134 /* enable clockgating to save power */
a1255107 2135 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2136 state);
4a446d55
AD
2137 if (r) {
2138 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2139 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2140 return r;
2141 }
b0b00ff1 2142 }
d38ceaf9 2143 }
06b18f61 2144
c9f96fd5
RZ
2145 return 0;
2146}
2147
1112a46b 2148static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2149{
1112a46b 2150 int i, j, r;
06b18f61 2151
c9f96fd5
RZ
2152 if (amdgpu_emu_mode == 1)
2153 return 0;
2154
1112a46b
RZ
2155 for (j = 0; j < adev->num_ip_blocks; j++) {
2156 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2157 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2158 continue;
2159 /* skip CG for VCE/UVD, it's handled specially */
2160 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2161 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2162 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2163 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2164 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2165 /* enable powergating to save power */
2166 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2167 state);
c9f96fd5
RZ
2168 if (r) {
2169 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2170 adev->ip_blocks[i].version->funcs->name, r);
2171 return r;
2172 }
2173 }
2174 }
2dc80b00
S
2175 return 0;
2176}
2177
beff74bc
AD
2178static int amdgpu_device_enable_mgpu_fan_boost(void)
2179{
2180 struct amdgpu_gpu_instance *gpu_ins;
2181 struct amdgpu_device *adev;
2182 int i, ret = 0;
2183
2184 mutex_lock(&mgpu_info.mutex);
2185
2186 /*
2187 * MGPU fan boost feature should be enabled
2188 * only when there are two or more dGPUs in
2189 * the system
2190 */
2191 if (mgpu_info.num_dgpu < 2)
2192 goto out;
2193
2194 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2195 gpu_ins = &(mgpu_info.gpu_ins[i]);
2196 adev = gpu_ins->adev;
2197 if (!(adev->flags & AMD_IS_APU) &&
2198 !gpu_ins->mgpu_fan_enabled &&
2199 adev->powerplay.pp_funcs &&
2200 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2201 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2202 if (ret)
2203 break;
2204
2205 gpu_ins->mgpu_fan_enabled = 1;
2206 }
2207 }
2208
2209out:
2210 mutex_unlock(&mgpu_info.mutex);
2211
2212 return ret;
2213}
2214
e3ecdffa
AD
2215/**
2216 * amdgpu_device_ip_late_init - run late init for hardware IPs
2217 *
2218 * @adev: amdgpu_device pointer
2219 *
2220 * Late initialization pass for hardware IPs. The list of all the hardware
2221 * IPs that make up the asic is walked and the late_init callbacks are run.
2222 * late_init covers any special initialization that an IP requires
2223 * after all of the have been initialized or something that needs to happen
2224 * late in the init process.
2225 * Returns 0 on success, negative error code on failure.
2226 */
06ec9070 2227static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2228{
60599a03 2229 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2230 int i = 0, r;
2231
2232 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2233 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2234 continue;
2235 if (adev->ip_blocks[i].version->funcs->late_init) {
2236 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2237 if (r) {
2238 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2239 adev->ip_blocks[i].version->funcs->name, r);
2240 return r;
2241 }
2dc80b00 2242 }
73f847db 2243 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2244 }
2245
a891d239
DL
2246 amdgpu_ras_set_error_query_ready(adev, true);
2247
1112a46b
RZ
2248 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2249 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2250
06ec9070 2251 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2252
beff74bc
AD
2253 r = amdgpu_device_enable_mgpu_fan_boost();
2254 if (r)
2255 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2256
60599a03
EQ
2257
2258 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2259 mutex_lock(&mgpu_info.mutex);
2260
2261 /*
2262 * Reset device p-state to low as this was booted with high.
2263 *
2264 * This should be performed only after all devices from the same
2265 * hive get initialized.
2266 *
2267 * However, it's unknown how many device in the hive in advance.
2268 * As this is counted one by one during devices initializations.
2269 *
2270 * So, we wait for all XGMI interlinked devices initialized.
2271 * This may bring some delays as those devices may come from
2272 * different hives. But that should be OK.
2273 */
2274 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2275 for (i = 0; i < mgpu_info.num_gpu; i++) {
2276 gpu_instance = &(mgpu_info.gpu_ins[i]);
2277 if (gpu_instance->adev->flags & AMD_IS_APU)
2278 continue;
2279
d84a430d
JK
2280 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2281 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2282 if (r) {
2283 DRM_ERROR("pstate setting failed (%d).\n", r);
2284 break;
2285 }
2286 }
2287 }
2288
2289 mutex_unlock(&mgpu_info.mutex);
2290 }
2291
d38ceaf9
AD
2292 return 0;
2293}
2294
e3ecdffa
AD
2295/**
2296 * amdgpu_device_ip_fini - run fini for hardware IPs
2297 *
2298 * @adev: amdgpu_device pointer
2299 *
2300 * Main teardown pass for hardware IPs. The list of all the hardware
2301 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2302 * are run. hw_fini tears down the hardware associated with each IP
2303 * and sw_fini tears down any software state associated with each IP.
2304 * Returns 0 on success, negative error code on failure.
2305 */
06ec9070 2306static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2307{
2308 int i, r;
2309
c030f2e4 2310 amdgpu_ras_pre_fini(adev);
2311
a82400b5
AG
2312 if (adev->gmc.xgmi.num_physical_nodes > 1)
2313 amdgpu_xgmi_remove_device(adev);
2314
1884734a 2315 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2316
2317 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2318 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2319
3e96dbfd
AD
2320 /* need to disable SMC first */
2321 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2322 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2323 continue;
fdd34271 2324 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2325 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2326 /* XXX handle errors */
2327 if (r) {
2328 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2329 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2330 }
a1255107 2331 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2332 break;
2333 }
2334 }
2335
d38ceaf9 2336 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2337 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2338 continue;
8201a67a 2339
a1255107 2340 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2341 /* XXX handle errors */
2c1a2784 2342 if (r) {
a1255107
AD
2343 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2344 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2345 }
8201a67a 2346
a1255107 2347 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2348 }
2349
9950cda2 2350
d38ceaf9 2351 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2352 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2353 continue;
c12aba3a
ML
2354
2355 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2356 amdgpu_ucode_free_bo(adev);
1e256e27 2357 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2358 amdgpu_device_wb_fini(adev);
2359 amdgpu_device_vram_scratch_fini(adev);
533aed27 2360 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2361 }
2362
a1255107 2363 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2364 /* XXX handle errors */
2c1a2784 2365 if (r) {
a1255107
AD
2366 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2367 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2368 }
a1255107
AD
2369 adev->ip_blocks[i].status.sw = false;
2370 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2371 }
2372
a6dcfd9c 2373 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2374 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2375 continue;
a1255107
AD
2376 if (adev->ip_blocks[i].version->funcs->late_fini)
2377 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2378 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2379 }
2380
c030f2e4 2381 amdgpu_ras_fini(adev);
2382
030308fc 2383 if (amdgpu_sriov_vf(adev))
24136135
ML
2384 if (amdgpu_virt_release_full_gpu(adev, false))
2385 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2386
d38ceaf9
AD
2387 return 0;
2388}
2389
e3ecdffa 2390/**
beff74bc 2391 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2392 *
1112a46b 2393 * @work: work_struct.
e3ecdffa 2394 */
beff74bc 2395static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2396{
2397 struct amdgpu_device *adev =
beff74bc 2398 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2399 int r;
2400
2401 r = amdgpu_ib_ring_tests(adev);
2402 if (r)
2403 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2404}
2405
1e317b99
RZ
2406static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2407{
2408 struct amdgpu_device *adev =
2409 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2410
2411 mutex_lock(&adev->gfx.gfx_off_mutex);
2412 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2413 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2414 adev->gfx.gfx_off_state = true;
2415 }
2416 mutex_unlock(&adev->gfx.gfx_off_mutex);
2417}
2418
e3ecdffa 2419/**
e7854a03 2420 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2421 *
2422 * @adev: amdgpu_device pointer
2423 *
2424 * Main suspend function for hardware IPs. The list of all the hardware
2425 * IPs that make up the asic is walked, clockgating is disabled and the
2426 * suspend callbacks are run. suspend puts the hardware and software state
2427 * in each IP into a state suitable for suspend.
2428 * Returns 0 on success, negative error code on failure.
2429 */
e7854a03
AD
2430static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2431{
2432 int i, r;
2433
ced1ba97
PL
2434 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2435 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2436
e7854a03
AD
2437 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2438 if (!adev->ip_blocks[i].status.valid)
2439 continue;
2440 /* displays are handled separately */
2441 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2442 /* XXX handle errors */
2443 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2444 /* XXX handle errors */
2445 if (r) {
2446 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2447 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2448 return r;
e7854a03 2449 }
482f0e53 2450 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2451 }
2452 }
2453
e7854a03
AD
2454 return 0;
2455}
2456
2457/**
2458 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2459 *
2460 * @adev: amdgpu_device pointer
2461 *
2462 * Main suspend function for hardware IPs. The list of all the hardware
2463 * IPs that make up the asic is walked, clockgating is disabled and the
2464 * suspend callbacks are run. suspend puts the hardware and software state
2465 * in each IP into a state suitable for suspend.
2466 * Returns 0 on success, negative error code on failure.
2467 */
2468static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2469{
2470 int i, r;
2471
2472 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2473 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2474 continue;
e7854a03
AD
2475 /* displays are handled in phase1 */
2476 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2477 continue;
bff77e86
LM
2478 /* PSP lost connection when err_event_athub occurs */
2479 if (amdgpu_ras_intr_triggered() &&
2480 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2481 adev->ip_blocks[i].status.hw = false;
2482 continue;
2483 }
d38ceaf9 2484 /* XXX handle errors */
a1255107 2485 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2486 /* XXX handle errors */
2c1a2784 2487 if (r) {
a1255107
AD
2488 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2489 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2490 }
876923fb 2491 adev->ip_blocks[i].status.hw = false;
a3a09142 2492 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2493 if(!amdgpu_sriov_vf(adev)){
2494 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2495 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2496 if (r) {
2497 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2498 adev->mp1_state, r);
2499 return r;
2500 }
a3a09142
AD
2501 }
2502 }
b5507c7e 2503 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2504 }
2505
2506 return 0;
2507}
2508
e7854a03
AD
2509/**
2510 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2511 *
2512 * @adev: amdgpu_device pointer
2513 *
2514 * Main suspend function for hardware IPs. The list of all the hardware
2515 * IPs that make up the asic is walked, clockgating is disabled and the
2516 * suspend callbacks are run. suspend puts the hardware and software state
2517 * in each IP into a state suitable for suspend.
2518 * Returns 0 on success, negative error code on failure.
2519 */
2520int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2521{
2522 int r;
2523
e7819644
YT
2524 if (amdgpu_sriov_vf(adev))
2525 amdgpu_virt_request_full_gpu(adev, false);
2526
e7854a03
AD
2527 r = amdgpu_device_ip_suspend_phase1(adev);
2528 if (r)
2529 return r;
2530 r = amdgpu_device_ip_suspend_phase2(adev);
2531
e7819644
YT
2532 if (amdgpu_sriov_vf(adev))
2533 amdgpu_virt_release_full_gpu(adev, false);
2534
e7854a03
AD
2535 return r;
2536}
2537
06ec9070 2538static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2539{
2540 int i, r;
2541
2cb681b6
ML
2542 static enum amd_ip_block_type ip_order[] = {
2543 AMD_IP_BLOCK_TYPE_GMC,
2544 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2545 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2546 AMD_IP_BLOCK_TYPE_IH,
2547 };
a90ad3c2 2548
2cb681b6
ML
2549 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2550 int j;
2551 struct amdgpu_ip_block *block;
a90ad3c2 2552
2cb681b6
ML
2553 for (j = 0; j < adev->num_ip_blocks; j++) {
2554 block = &adev->ip_blocks[j];
2555
482f0e53 2556 block->status.hw = false;
2cb681b6
ML
2557 if (block->version->type != ip_order[i] ||
2558 !block->status.valid)
2559 continue;
2560
2561 r = block->version->funcs->hw_init(adev);
0aaeefcc 2562 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2563 if (r)
2564 return r;
482f0e53 2565 block->status.hw = true;
a90ad3c2
ML
2566 }
2567 }
2568
2569 return 0;
2570}
2571
06ec9070 2572static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2573{
2574 int i, r;
2575
2cb681b6
ML
2576 static enum amd_ip_block_type ip_order[] = {
2577 AMD_IP_BLOCK_TYPE_SMC,
2578 AMD_IP_BLOCK_TYPE_DCE,
2579 AMD_IP_BLOCK_TYPE_GFX,
2580 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2581 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2582 AMD_IP_BLOCK_TYPE_VCE,
2583 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2584 };
a90ad3c2 2585
2cb681b6
ML
2586 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2587 int j;
2588 struct amdgpu_ip_block *block;
a90ad3c2 2589
2cb681b6
ML
2590 for (j = 0; j < adev->num_ip_blocks; j++) {
2591 block = &adev->ip_blocks[j];
2592
2593 if (block->version->type != ip_order[i] ||
482f0e53
ML
2594 !block->status.valid ||
2595 block->status.hw)
2cb681b6
ML
2596 continue;
2597
895bd048
JZ
2598 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2599 r = block->version->funcs->resume(adev);
2600 else
2601 r = block->version->funcs->hw_init(adev);
2602
0aaeefcc 2603 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2604 if (r)
2605 return r;
482f0e53 2606 block->status.hw = true;
a90ad3c2
ML
2607 }
2608 }
2609
2610 return 0;
2611}
2612
e3ecdffa
AD
2613/**
2614 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2615 *
2616 * @adev: amdgpu_device pointer
2617 *
2618 * First resume function for hardware IPs. The list of all the hardware
2619 * IPs that make up the asic is walked and the resume callbacks are run for
2620 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2621 * after a suspend and updates the software state as necessary. This
2622 * function is also used for restoring the GPU after a GPU reset.
2623 * Returns 0 on success, negative error code on failure.
2624 */
06ec9070 2625static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2626{
2627 int i, r;
2628
a90ad3c2 2629 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2630 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2631 continue;
a90ad3c2 2632 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2633 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2634 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2635
fcf0649f
CZ
2636 r = adev->ip_blocks[i].version->funcs->resume(adev);
2637 if (r) {
2638 DRM_ERROR("resume of IP block <%s> failed %d\n",
2639 adev->ip_blocks[i].version->funcs->name, r);
2640 return r;
2641 }
482f0e53 2642 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2643 }
2644 }
2645
2646 return 0;
2647}
2648
e3ecdffa
AD
2649/**
2650 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2651 *
2652 * @adev: amdgpu_device pointer
2653 *
2654 * First resume function for hardware IPs. The list of all the hardware
2655 * IPs that make up the asic is walked and the resume callbacks are run for
2656 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2657 * functional state after a suspend and updates the software state as
2658 * necessary. This function is also used for restoring the GPU after a GPU
2659 * reset.
2660 * Returns 0 on success, negative error code on failure.
2661 */
06ec9070 2662static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2663{
2664 int i, r;
2665
2666 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2667 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2668 continue;
fcf0649f 2669 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2670 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2671 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2672 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2673 continue;
a1255107 2674 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2675 if (r) {
a1255107
AD
2676 DRM_ERROR("resume of IP block <%s> failed %d\n",
2677 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2678 return r;
2c1a2784 2679 }
482f0e53 2680 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2681 }
2682
2683 return 0;
2684}
2685
e3ecdffa
AD
2686/**
2687 * amdgpu_device_ip_resume - run resume for hardware IPs
2688 *
2689 * @adev: amdgpu_device pointer
2690 *
2691 * Main resume function for hardware IPs. The hardware IPs
2692 * are split into two resume functions because they are
2693 * are also used in in recovering from a GPU reset and some additional
2694 * steps need to be take between them. In this case (S3/S4) they are
2695 * run sequentially.
2696 * Returns 0 on success, negative error code on failure.
2697 */
06ec9070 2698static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2699{
2700 int r;
2701
06ec9070 2702 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2703 if (r)
2704 return r;
7a3e0bb2
RZ
2705
2706 r = amdgpu_device_fw_loading(adev);
2707 if (r)
2708 return r;
2709
06ec9070 2710 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2711
2712 return r;
2713}
2714
e3ecdffa
AD
2715/**
2716 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2717 *
2718 * @adev: amdgpu_device pointer
2719 *
2720 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2721 */
4e99a44e 2722static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2723{
6867e1b5
ML
2724 if (amdgpu_sriov_vf(adev)) {
2725 if (adev->is_atom_fw) {
2726 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2727 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2728 } else {
2729 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2730 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2731 }
2732
2733 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2734 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2735 }
048765ad
AR
2736}
2737
e3ecdffa
AD
2738/**
2739 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2740 *
2741 * @asic_type: AMD asic type
2742 *
2743 * Check if there is DC (new modesetting infrastructre) support for an asic.
2744 * returns true if DC has support, false if not.
2745 */
4562236b
HW
2746bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2747{
2748 switch (asic_type) {
2749#if defined(CONFIG_DRM_AMD_DC)
2750 case CHIP_BONAIRE:
0d6fbccb 2751 case CHIP_KAVERI:
367e6687
AD
2752 case CHIP_KABINI:
2753 case CHIP_MULLINS:
d9fda248
HW
2754 /*
2755 * We have systems in the wild with these ASICs that require
2756 * LVDS and VGA support which is not supported with DC.
2757 *
2758 * Fallback to the non-DC driver here by default so as not to
2759 * cause regressions.
2760 */
2761 return amdgpu_dc > 0;
2762 case CHIP_HAWAII:
4562236b
HW
2763 case CHIP_CARRIZO:
2764 case CHIP_STONEY:
4562236b 2765 case CHIP_POLARIS10:
675fd32b 2766 case CHIP_POLARIS11:
2c8ad2d5 2767 case CHIP_POLARIS12:
675fd32b 2768 case CHIP_VEGAM:
4562236b
HW
2769 case CHIP_TONGA:
2770 case CHIP_FIJI:
42f8ffa1 2771 case CHIP_VEGA10:
dca7b401 2772 case CHIP_VEGA12:
c6034aa2 2773 case CHIP_VEGA20:
b86a1aa3 2774#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2775 case CHIP_RAVEN:
b4f199c7 2776 case CHIP_NAVI10:
8fceceb6 2777 case CHIP_NAVI14:
078655d9 2778 case CHIP_NAVI12:
e1c14c43 2779 case CHIP_RENOIR:
42f8ffa1 2780#endif
fd187853 2781 return amdgpu_dc != 0;
4562236b
HW
2782#endif
2783 default:
93b09a9a
SS
2784 if (amdgpu_dc > 0)
2785 DRM_INFO("Display Core has been requested via kernel parameter "
2786 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2787 return false;
2788 }
2789}
2790
2791/**
2792 * amdgpu_device_has_dc_support - check if dc is supported
2793 *
2794 * @adev: amdgpu_device_pointer
2795 *
2796 * Returns true for supported, false for not supported
2797 */
2798bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2799{
2555039d
XY
2800 if (amdgpu_sriov_vf(adev))
2801 return false;
2802
4562236b
HW
2803 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2804}
2805
d4535e2c
AG
2806
2807static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2808{
2809 struct amdgpu_device *adev =
2810 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2811 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2812
c6a6e2db
AG
2813 /* It's a bug to not have a hive within this function */
2814 if (WARN_ON(!hive))
2815 return;
2816
2817 /*
2818 * Use task barrier to synchronize all xgmi reset works across the
2819 * hive. task_barrier_enter and task_barrier_exit will block
2820 * until all the threads running the xgmi reset works reach
2821 * those points. task_barrier_full will do both blocks.
2822 */
2823 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2824
2825 task_barrier_enter(&hive->tb);
2826 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2827
2828 if (adev->asic_reset_res)
2829 goto fail;
2830
2831 task_barrier_exit(&hive->tb);
2832 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2833
2834 if (adev->asic_reset_res)
2835 goto fail;
43c4d576
JC
2836
2837 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2838 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2839 } else {
2840
2841 task_barrier_full(&hive->tb);
2842 adev->asic_reset_res = amdgpu_asic_reset(adev);
2843 }
ce316fa5 2844
c6a6e2db 2845fail:
d4535e2c 2846 if (adev->asic_reset_res)
fed184e9 2847 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2848 adev->asic_reset_res, adev->ddev->unique);
2849}
2850
71f98027
AD
2851static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2852{
2853 char *input = amdgpu_lockup_timeout;
2854 char *timeout_setting = NULL;
2855 int index = 0;
2856 long timeout;
2857 int ret = 0;
2858
2859 /*
2860 * By default timeout for non compute jobs is 10000.
2861 * And there is no timeout enforced on compute jobs.
2862 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2863 * jobs are 60000 by default.
71f98027
AD
2864 */
2865 adev->gfx_timeout = msecs_to_jiffies(10000);
2866 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2867 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2868 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2869 else
2870 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2871
f440ff44 2872 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2873 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2874 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2875 ret = kstrtol(timeout_setting, 0, &timeout);
2876 if (ret)
2877 return ret;
2878
2879 if (timeout == 0) {
2880 index++;
2881 continue;
2882 } else if (timeout < 0) {
2883 timeout = MAX_SCHEDULE_TIMEOUT;
2884 } else {
2885 timeout = msecs_to_jiffies(timeout);
2886 }
2887
2888 switch (index++) {
2889 case 0:
2890 adev->gfx_timeout = timeout;
2891 break;
2892 case 1:
2893 adev->compute_timeout = timeout;
2894 break;
2895 case 2:
2896 adev->sdma_timeout = timeout;
2897 break;
2898 case 3:
2899 adev->video_timeout = timeout;
2900 break;
2901 default:
2902 break;
2903 }
2904 }
2905 /*
2906 * There is only one value specified and
2907 * it should apply to all non-compute jobs.
2908 */
bcccee89 2909 if (index == 1) {
71f98027 2910 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2911 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2912 adev->compute_timeout = adev->gfx_timeout;
2913 }
71f98027
AD
2914 }
2915
2916 return ret;
2917}
d4535e2c 2918
77f3a5cd
ND
2919static const struct attribute *amdgpu_dev_attributes[] = {
2920 &dev_attr_product_name.attr,
2921 &dev_attr_product_number.attr,
2922 &dev_attr_serial_number.attr,
2923 &dev_attr_pcie_replay_count.attr,
2924 NULL
2925};
2926
d38ceaf9
AD
2927/**
2928 * amdgpu_device_init - initialize the driver
2929 *
2930 * @adev: amdgpu_device pointer
87e3f136 2931 * @ddev: drm dev pointer
d38ceaf9
AD
2932 * @pdev: pci dev pointer
2933 * @flags: driver flags
2934 *
2935 * Initializes the driver info and hw (all asics).
2936 * Returns 0 for success or an error on failure.
2937 * Called at driver startup.
2938 */
2939int amdgpu_device_init(struct amdgpu_device *adev,
2940 struct drm_device *ddev,
2941 struct pci_dev *pdev,
2942 uint32_t flags)
2943{
2944 int r, i;
3840c5bc 2945 bool boco = false;
95844d20 2946 u32 max_MBps;
d38ceaf9
AD
2947
2948 adev->shutdown = false;
2949 adev->dev = &pdev->dev;
2950 adev->ddev = ddev;
2951 adev->pdev = pdev;
2952 adev->flags = flags;
4e66d7d2
YZ
2953
2954 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2955 adev->asic_type = amdgpu_force_asic_type;
2956 else
2957 adev->asic_type = flags & AMD_ASIC_MASK;
2958
d38ceaf9 2959 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2960 if (amdgpu_emu_mode == 1)
8bdab6bb 2961 adev->usec_timeout *= 10;
770d13b1 2962 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2963 adev->accel_working = false;
2964 adev->num_rings = 0;
2965 adev->mman.buffer_funcs = NULL;
2966 adev->mman.buffer_funcs_ring = NULL;
2967 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2968 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2969 adev->gmc.gmc_funcs = NULL;
f54d1867 2970 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2971 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2972
2973 adev->smc_rreg = &amdgpu_invalid_rreg;
2974 adev->smc_wreg = &amdgpu_invalid_wreg;
2975 adev->pcie_rreg = &amdgpu_invalid_rreg;
2976 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2977 adev->pciep_rreg = &amdgpu_invalid_rreg;
2978 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2979 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2980 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2981 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2982 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2983 adev->didt_rreg = &amdgpu_invalid_rreg;
2984 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2985 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2986 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2987 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2988 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2989
3e39ab90
AD
2990 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2991 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2992 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2993
2994 /* mutex initialization are all done here so we
2995 * can recall function without having locking issues */
d38ceaf9 2996 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2997 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2998 mutex_init(&adev->pm.mutex);
2999 mutex_init(&adev->gfx.gpu_clock_mutex);
3000 mutex_init(&adev->srbm_mutex);
b8866c26 3001 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3002 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3003 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3004 mutex_init(&adev->mn_lock);
e23b74aa 3005 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3006 hash_init(adev->mn_hash);
13a752e3 3007 mutex_init(&adev->lock_reset);
32eaeae0 3008 mutex_init(&adev->psp.mutex);
bd052211 3009 mutex_init(&adev->notifier_lock);
d38ceaf9 3010
912dfc84
EQ
3011 r = amdgpu_device_check_arguments(adev);
3012 if (r)
3013 return r;
d38ceaf9 3014
d38ceaf9
AD
3015 spin_lock_init(&adev->mmio_idx_lock);
3016 spin_lock_init(&adev->smc_idx_lock);
3017 spin_lock_init(&adev->pcie_idx_lock);
3018 spin_lock_init(&adev->uvd_ctx_idx_lock);
3019 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3020 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3021 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3022 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3023 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3024
0c4e7fa5
CZ
3025 INIT_LIST_HEAD(&adev->shadow_list);
3026 mutex_init(&adev->shadow_list_lock);
3027
beff74bc
AD
3028 INIT_DELAYED_WORK(&adev->delayed_init_work,
3029 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3030 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3031 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3032
d4535e2c
AG
3033 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3034
d23ee13f 3035 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3036 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3037
0fa49558
AX
3038 /* Registers mapping */
3039 /* TODO: block userspace mapping of io register */
da69c161
KW
3040 if (adev->asic_type >= CHIP_BONAIRE) {
3041 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3042 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3043 } else {
3044 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3045 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3046 }
d38ceaf9 3047
d38ceaf9
AD
3048 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3049 if (adev->rmmio == NULL) {
3050 return -ENOMEM;
3051 }
3052 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3053 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3054
d38ceaf9
AD
3055 /* io port mapping */
3056 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3057 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3058 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3059 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3060 break;
3061 }
3062 }
3063 if (adev->rio_mem == NULL)
b64a18c5 3064 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3065
b2109d8e
JX
3066 /* enable PCIE atomic ops */
3067 r = pci_enable_atomic_ops_to_root(adev->pdev,
3068 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3069 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3070 if (r) {
3071 adev->have_atomics_support = false;
3072 DRM_INFO("PCIE atomic ops is not supported\n");
3073 } else {
3074 adev->have_atomics_support = true;
3075 }
3076
5494d864
AD
3077 amdgpu_device_get_pcie_info(adev);
3078
b239c017
JX
3079 if (amdgpu_mcbp)
3080 DRM_INFO("MCBP is enabled\n");
3081
5f84cc63
JX
3082 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3083 adev->enable_mes = true;
3084
3aa0115d
ML
3085 /* detect hw virtualization here */
3086 amdgpu_detect_virtualization(adev);
3087
dffa11b4
ML
3088 r = amdgpu_device_get_job_timeout_settings(adev);
3089 if (r) {
3090 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3091 return r;
a190d1c7
XY
3092 }
3093
d38ceaf9 3094 /* early init functions */
06ec9070 3095 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3096 if (r)
3097 return r;
3098
6585661d
OZ
3099 /* doorbell bar mapping and doorbell index init*/
3100 amdgpu_device_doorbell_init(adev);
3101
d38ceaf9
AD
3102 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3103 /* this will fail for cards that aren't VGA class devices, just
3104 * ignore it */
06ec9070 3105 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3106
31af062a 3107 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3108 boco = true;
3109 if (amdgpu_has_atpx() &&
3110 (amdgpu_is_atpx_hybrid() ||
3111 amdgpu_has_atpx_dgpu_power_cntl()) &&
3112 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3113 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3114 &amdgpu_switcheroo_ops, boco);
3115 if (boco)
d38ceaf9
AD
3116 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3117
9475a943
SL
3118 if (amdgpu_emu_mode == 1) {
3119 /* post the asic on emulation mode */
3120 emu_soc_asic_init(adev);
bfca0289 3121 goto fence_driver_init;
9475a943 3122 }
bfca0289 3123
4e99a44e
ML
3124 /* detect if we are with an SRIOV vbios */
3125 amdgpu_device_detect_sriov_bios(adev);
048765ad 3126
95e8e59e
AD
3127 /* check if we need to reset the asic
3128 * E.g., driver was not cleanly unloaded previously, etc.
3129 */
f14899fd 3130 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3131 r = amdgpu_asic_reset(adev);
3132 if (r) {
3133 dev_err(adev->dev, "asic reset on init failed\n");
3134 goto failed;
3135 }
3136 }
3137
d38ceaf9 3138 /* Post card if necessary */
39c640c0 3139 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3140 if (!adev->bios) {
bec86378 3141 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3142 r = -EINVAL;
3143 goto failed;
d38ceaf9 3144 }
bec86378 3145 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3146 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3147 if (r) {
3148 dev_err(adev->dev, "gpu post error!\n");
3149 goto failed;
3150 }
d38ceaf9
AD
3151 }
3152
88b64e95
AD
3153 if (adev->is_atom_fw) {
3154 /* Initialize clocks */
3155 r = amdgpu_atomfirmware_get_clock_info(adev);
3156 if (r) {
3157 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3158 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3159 goto failed;
3160 }
3161 } else {
a5bde2f9
AD
3162 /* Initialize clocks */
3163 r = amdgpu_atombios_get_clock_info(adev);
3164 if (r) {
3165 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3166 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3167 goto failed;
a5bde2f9
AD
3168 }
3169 /* init i2c buses */
4562236b
HW
3170 if (!amdgpu_device_has_dc_support(adev))
3171 amdgpu_atombios_i2c_init(adev);
2c1a2784 3172 }
d38ceaf9 3173
bfca0289 3174fence_driver_init:
d38ceaf9
AD
3175 /* Fence driver */
3176 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3177 if (r) {
3178 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3179 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3180 goto failed;
2c1a2784 3181 }
d38ceaf9
AD
3182
3183 /* init the mode config */
3184 drm_mode_config_init(adev->ddev);
3185
06ec9070 3186 r = amdgpu_device_ip_init(adev);
d38ceaf9 3187 if (r) {
8840a387 3188 /* failed in exclusive mode due to timeout */
3189 if (amdgpu_sriov_vf(adev) &&
3190 !amdgpu_sriov_runtime(adev) &&
3191 amdgpu_virt_mmio_blocked(adev) &&
3192 !amdgpu_virt_wait_reset(adev)) {
3193 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3194 /* Don't send request since VF is inactive. */
3195 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3196 adev->virt.ops = NULL;
8840a387 3197 r = -EAGAIN;
3198 goto failed;
3199 }
06ec9070 3200 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3201 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3202 goto failed;
d38ceaf9
AD
3203 }
3204
d69b8971
YZ
3205 dev_info(adev->dev,
3206 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3207 adev->gfx.config.max_shader_engines,
3208 adev->gfx.config.max_sh_per_se,
3209 adev->gfx.config.max_cu_per_sh,
3210 adev->gfx.cu_info.number);
3211
d38ceaf9
AD
3212 adev->accel_working = true;
3213
e59c0205
AX
3214 amdgpu_vm_check_compute_bug(adev);
3215
95844d20
MO
3216 /* Initialize the buffer migration limit. */
3217 if (amdgpu_moverate >= 0)
3218 max_MBps = amdgpu_moverate;
3219 else
3220 max_MBps = 8; /* Allow 8 MB/s. */
3221 /* Get a log2 for easy divisions. */
3222 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3223
9bc92b9c
ML
3224 amdgpu_fbdev_init(adev);
3225
d2f52ac8 3226 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3227 if (r) {
3228 adev->pm_sysfs_en = false;
d2f52ac8 3229 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3230 } else
3231 adev->pm_sysfs_en = true;
d2f52ac8 3232
5bb23532 3233 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3234 if (r) {
3235 adev->ucode_sysfs_en = false;
5bb23532 3236 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3237 } else
3238 adev->ucode_sysfs_en = true;
5bb23532 3239
d38ceaf9
AD
3240 if ((amdgpu_testing & 1)) {
3241 if (adev->accel_working)
3242 amdgpu_test_moves(adev);
3243 else
3244 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3245 }
d38ceaf9
AD
3246 if (amdgpu_benchmarking) {
3247 if (adev->accel_working)
3248 amdgpu_benchmark(adev, amdgpu_benchmarking);
3249 else
3250 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3251 }
3252
b0adca4d
EQ
3253 /*
3254 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3255 * Otherwise the mgpu fan boost feature will be skipped due to the
3256 * gpu instance is counted less.
3257 */
3258 amdgpu_register_gpu_instance(adev);
3259
d38ceaf9
AD
3260 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3261 * explicit gating rather than handling it automatically.
3262 */
06ec9070 3263 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3264 if (r) {
06ec9070 3265 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3266 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3267 goto failed;
2c1a2784 3268 }
d38ceaf9 3269
108c6a63 3270 /* must succeed. */
511fdbc3 3271 amdgpu_ras_resume(adev);
108c6a63 3272
beff74bc
AD
3273 queue_delayed_work(system_wq, &adev->delayed_init_work,
3274 msecs_to_jiffies(AMDGPU_RESUME_MS));
3275
77f3a5cd 3276 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3277 if (r) {
77f3a5cd 3278 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3279 return r;
3280 }
3281
d155bef0
AB
3282 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3283 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3284 if (r)
3285 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3286
d38ceaf9 3287 return 0;
83ba126a
AD
3288
3289failed:
89041940 3290 amdgpu_vf_error_trans_all(adev);
3840c5bc 3291 if (boco)
83ba126a 3292 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3293
83ba126a 3294 return r;
d38ceaf9
AD
3295}
3296
d38ceaf9
AD
3297/**
3298 * amdgpu_device_fini - tear down the driver
3299 *
3300 * @adev: amdgpu_device pointer
3301 *
3302 * Tear down the driver info (all asics).
3303 * Called at driver shutdown.
3304 */
3305void amdgpu_device_fini(struct amdgpu_device *adev)
3306{
3307 int r;
3308
3309 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3310 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3311 adev->shutdown = true;
9f875167 3312
752c683d
ML
3313 /* make sure IB test finished before entering exclusive mode
3314 * to avoid preemption on IB test
3315 * */
3316 if (amdgpu_sriov_vf(adev))
3317 amdgpu_virt_request_full_gpu(adev, false);
3318
e5b03032
ML
3319 /* disable all interrupts */
3320 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3321 if (adev->mode_info.mode_config_initialized){
3322 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3323 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3324 else
3325 drm_atomic_helper_shutdown(adev->ddev);
3326 }
d38ceaf9 3327 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3328 if (adev->pm_sysfs_en)
3329 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3330 amdgpu_fbdev_fini(adev);
06ec9070 3331 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3332 if (adev->firmware.gpu_info_fw) {
3333 release_firmware(adev->firmware.gpu_info_fw);
3334 adev->firmware.gpu_info_fw = NULL;
3335 }
d38ceaf9
AD
3336 adev->accel_working = false;
3337 /* free i2c buses */
4562236b
HW
3338 if (!amdgpu_device_has_dc_support(adev))
3339 amdgpu_i2c_fini(adev);
bfca0289
SL
3340
3341 if (amdgpu_emu_mode != 1)
3342 amdgpu_atombios_fini(adev);
3343
d38ceaf9
AD
3344 kfree(adev->bios);
3345 adev->bios = NULL;
3840c5bc
AD
3346 if (amdgpu_has_atpx() &&
3347 (amdgpu_is_atpx_hybrid() ||
3348 amdgpu_has_atpx_dgpu_power_cntl()) &&
3349 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3350 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3351 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3352 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3353 vga_client_register(adev->pdev, NULL, NULL, NULL);
3354 if (adev->rio_mem)
3355 pci_iounmap(adev->pdev, adev->rio_mem);
3356 adev->rio_mem = NULL;
3357 iounmap(adev->rmmio);
3358 adev->rmmio = NULL;
06ec9070 3359 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3360
7c868b59
YT
3361 if (adev->ucode_sysfs_en)
3362 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3363
3364 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3365 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3366 amdgpu_pmu_fini(adev);
f54eeab4 3367 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3368 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3369}
3370
3371
3372/*
3373 * Suspend & resume.
3374 */
3375/**
810ddc3a 3376 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3377 *
87e3f136
DP
3378 * @dev: drm dev pointer
3379 * @suspend: suspend state
3380 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3381 *
3382 * Puts the hw in the suspend state (all asics).
3383 * Returns 0 for success or an error on failure.
3384 * Called at driver suspend.
3385 */
de185019 3386int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3387{
3388 struct amdgpu_device *adev;
3389 struct drm_crtc *crtc;
3390 struct drm_connector *connector;
f8d2d39e 3391 struct drm_connector_list_iter iter;
5ceb54c6 3392 int r;
d38ceaf9
AD
3393
3394 if (dev == NULL || dev->dev_private == NULL) {
3395 return -ENODEV;
3396 }
3397
3398 adev = dev->dev_private;
3399
3400 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3401 return 0;
3402
44779b43 3403 adev->in_suspend = true;
d38ceaf9
AD
3404 drm_kms_helper_poll_disable(dev);
3405
5f818173
S
3406 if (fbcon)
3407 amdgpu_fbdev_set_suspend(adev, 1);
3408
beff74bc 3409 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3410
4562236b
HW
3411 if (!amdgpu_device_has_dc_support(adev)) {
3412 /* turn off display hw */
3413 drm_modeset_lock_all(dev);
f8d2d39e
LP
3414 drm_connector_list_iter_begin(dev, &iter);
3415 drm_for_each_connector_iter(connector, &iter)
3416 drm_helper_connector_dpms(connector,
3417 DRM_MODE_DPMS_OFF);
3418 drm_connector_list_iter_end(&iter);
4562236b 3419 drm_modeset_unlock_all(dev);
fe1053b7
AD
3420 /* unpin the front buffers and cursors */
3421 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3422 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3423 struct drm_framebuffer *fb = crtc->primary->fb;
3424 struct amdgpu_bo *robj;
3425
91334223 3426 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3427 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3428 r = amdgpu_bo_reserve(aobj, true);
3429 if (r == 0) {
3430 amdgpu_bo_unpin(aobj);
3431 amdgpu_bo_unreserve(aobj);
3432 }
756e6880 3433 }
756e6880 3434
fe1053b7
AD
3435 if (fb == NULL || fb->obj[0] == NULL) {
3436 continue;
3437 }
3438 robj = gem_to_amdgpu_bo(fb->obj[0]);
3439 /* don't unpin kernel fb objects */
3440 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3441 r = amdgpu_bo_reserve(robj, true);
3442 if (r == 0) {
3443 amdgpu_bo_unpin(robj);
3444 amdgpu_bo_unreserve(robj);
3445 }
d38ceaf9
AD
3446 }
3447 }
3448 }
fe1053b7 3449
5e6932fe 3450 amdgpu_ras_suspend(adev);
3451
fe1053b7
AD
3452 r = amdgpu_device_ip_suspend_phase1(adev);
3453
94fa5660
EQ
3454 amdgpu_amdkfd_suspend(adev, !fbcon);
3455
d38ceaf9
AD
3456 /* evict vram memory */
3457 amdgpu_bo_evict_vram(adev);
3458
5ceb54c6 3459 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3460
fe1053b7 3461 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3462
a0a71e49
AD
3463 /* evict remaining vram memory
3464 * This second call to evict vram is to evict the gart page table
3465 * using the CPU.
3466 */
d38ceaf9
AD
3467 amdgpu_bo_evict_vram(adev);
3468
d38ceaf9
AD
3469 return 0;
3470}
3471
3472/**
810ddc3a 3473 * amdgpu_device_resume - initiate device resume
d38ceaf9 3474 *
87e3f136
DP
3475 * @dev: drm dev pointer
3476 * @resume: resume state
3477 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3478 *
3479 * Bring the hw back to operating state (all asics).
3480 * Returns 0 for success or an error on failure.
3481 * Called at driver resume.
3482 */
de185019 3483int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3484{
3485 struct drm_connector *connector;
f8d2d39e 3486 struct drm_connector_list_iter iter;
d38ceaf9 3487 struct amdgpu_device *adev = dev->dev_private;
756e6880 3488 struct drm_crtc *crtc;
03161a6e 3489 int r = 0;
d38ceaf9
AD
3490
3491 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3492 return 0;
3493
d38ceaf9 3494 /* post card */
39c640c0 3495 if (amdgpu_device_need_post(adev)) {
74b0b157 3496 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3497 if (r)
3498 DRM_ERROR("amdgpu asic init failed\n");
3499 }
d38ceaf9 3500
06ec9070 3501 r = amdgpu_device_ip_resume(adev);
e6707218 3502 if (r) {
06ec9070 3503 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3504 return r;
e6707218 3505 }
5ceb54c6
AD
3506 amdgpu_fence_driver_resume(adev);
3507
d38ceaf9 3508
06ec9070 3509 r = amdgpu_device_ip_late_init(adev);
03161a6e 3510 if (r)
4d3b9ae5 3511 return r;
d38ceaf9 3512
beff74bc
AD
3513 queue_delayed_work(system_wq, &adev->delayed_init_work,
3514 msecs_to_jiffies(AMDGPU_RESUME_MS));
3515
fe1053b7
AD
3516 if (!amdgpu_device_has_dc_support(adev)) {
3517 /* pin cursors */
3518 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3519 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3520
91334223 3521 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3522 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3523 r = amdgpu_bo_reserve(aobj, true);
3524 if (r == 0) {
3525 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3526 if (r != 0)
3527 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3528 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3529 amdgpu_bo_unreserve(aobj);
3530 }
756e6880
AD
3531 }
3532 }
3533 }
9593f4d6 3534 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3535 if (r)
3536 return r;
756e6880 3537
96a5d8d4 3538 /* Make sure IB tests flushed */
beff74bc 3539 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3540
d38ceaf9
AD
3541 /* blat the mode back in */
3542 if (fbcon) {
4562236b
HW
3543 if (!amdgpu_device_has_dc_support(adev)) {
3544 /* pre DCE11 */
3545 drm_helper_resume_force_mode(dev);
3546
3547 /* turn on display hw */
3548 drm_modeset_lock_all(dev);
f8d2d39e
LP
3549
3550 drm_connector_list_iter_begin(dev, &iter);
3551 drm_for_each_connector_iter(connector, &iter)
3552 drm_helper_connector_dpms(connector,
3553 DRM_MODE_DPMS_ON);
3554 drm_connector_list_iter_end(&iter);
3555
4562236b 3556 drm_modeset_unlock_all(dev);
d38ceaf9 3557 }
4d3b9ae5 3558 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3559 }
3560
3561 drm_kms_helper_poll_enable(dev);
23a1a9e5 3562
5e6932fe 3563 amdgpu_ras_resume(adev);
3564
23a1a9e5
L
3565 /*
3566 * Most of the connector probing functions try to acquire runtime pm
3567 * refs to ensure that the GPU is powered on when connector polling is
3568 * performed. Since we're calling this from a runtime PM callback,
3569 * trying to acquire rpm refs will cause us to deadlock.
3570 *
3571 * Since we're guaranteed to be holding the rpm lock, it's safe to
3572 * temporarily disable the rpm helpers so this doesn't deadlock us.
3573 */
3574#ifdef CONFIG_PM
3575 dev->dev->power.disable_depth++;
3576#endif
4562236b
HW
3577 if (!amdgpu_device_has_dc_support(adev))
3578 drm_helper_hpd_irq_event(dev);
3579 else
3580 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3581#ifdef CONFIG_PM
3582 dev->dev->power.disable_depth--;
3583#endif
44779b43
RZ
3584 adev->in_suspend = false;
3585
4d3b9ae5 3586 return 0;
d38ceaf9
AD
3587}
3588
e3ecdffa
AD
3589/**
3590 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3591 *
3592 * @adev: amdgpu_device pointer
3593 *
3594 * The list of all the hardware IPs that make up the asic is walked and
3595 * the check_soft_reset callbacks are run. check_soft_reset determines
3596 * if the asic is still hung or not.
3597 * Returns true if any of the IPs are still in a hung state, false if not.
3598 */
06ec9070 3599static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3600{
3601 int i;
3602 bool asic_hang = false;
3603
f993d628
ML
3604 if (amdgpu_sriov_vf(adev))
3605 return true;
3606
8bc04c29
AD
3607 if (amdgpu_asic_need_full_reset(adev))
3608 return true;
3609
63fbf42f 3610 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3611 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3612 continue;
a1255107
AD
3613 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3614 adev->ip_blocks[i].status.hang =
3615 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3616 if (adev->ip_blocks[i].status.hang) {
3617 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3618 asic_hang = true;
3619 }
3620 }
3621 return asic_hang;
3622}
3623
e3ecdffa
AD
3624/**
3625 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3626 *
3627 * @adev: amdgpu_device pointer
3628 *
3629 * The list of all the hardware IPs that make up the asic is walked and the
3630 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3631 * handles any IP specific hardware or software state changes that are
3632 * necessary for a soft reset to succeed.
3633 * Returns 0 on success, negative error code on failure.
3634 */
06ec9070 3635static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3636{
3637 int i, r = 0;
3638
3639 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3640 if (!adev->ip_blocks[i].status.valid)
d31a501e 3641 continue;
a1255107
AD
3642 if (adev->ip_blocks[i].status.hang &&
3643 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3644 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3645 if (r)
3646 return r;
3647 }
3648 }
3649
3650 return 0;
3651}
3652
e3ecdffa
AD
3653/**
3654 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3655 *
3656 * @adev: amdgpu_device pointer
3657 *
3658 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3659 * reset is necessary to recover.
3660 * Returns true if a full asic reset is required, false if not.
3661 */
06ec9070 3662static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3663{
da146d3b
AD
3664 int i;
3665
8bc04c29
AD
3666 if (amdgpu_asic_need_full_reset(adev))
3667 return true;
3668
da146d3b 3669 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3670 if (!adev->ip_blocks[i].status.valid)
da146d3b 3671 continue;
a1255107
AD
3672 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3673 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3674 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3675 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3676 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3677 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3678 DRM_INFO("Some block need full reset!\n");
3679 return true;
3680 }
3681 }
35d782fe
CZ
3682 }
3683 return false;
3684}
3685
e3ecdffa
AD
3686/**
3687 * amdgpu_device_ip_soft_reset - do a soft reset
3688 *
3689 * @adev: amdgpu_device pointer
3690 *
3691 * The list of all the hardware IPs that make up the asic is walked and the
3692 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3693 * IP specific hardware or software state changes that are necessary to soft
3694 * reset the IP.
3695 * Returns 0 on success, negative error code on failure.
3696 */
06ec9070 3697static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3698{
3699 int i, r = 0;
3700
3701 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3702 if (!adev->ip_blocks[i].status.valid)
35d782fe 3703 continue;
a1255107
AD
3704 if (adev->ip_blocks[i].status.hang &&
3705 adev->ip_blocks[i].version->funcs->soft_reset) {
3706 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3707 if (r)
3708 return r;
3709 }
3710 }
3711
3712 return 0;
3713}
3714
e3ecdffa
AD
3715/**
3716 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3717 *
3718 * @adev: amdgpu_device pointer
3719 *
3720 * The list of all the hardware IPs that make up the asic is walked and the
3721 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3722 * handles any IP specific hardware or software state changes that are
3723 * necessary after the IP has been soft reset.
3724 * Returns 0 on success, negative error code on failure.
3725 */
06ec9070 3726static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3727{
3728 int i, r = 0;
3729
3730 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3731 if (!adev->ip_blocks[i].status.valid)
35d782fe 3732 continue;
a1255107
AD
3733 if (adev->ip_blocks[i].status.hang &&
3734 adev->ip_blocks[i].version->funcs->post_soft_reset)
3735 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3736 if (r)
3737 return r;
3738 }
3739
3740 return 0;
3741}
3742
e3ecdffa 3743/**
c33adbc7 3744 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3745 *
3746 * @adev: amdgpu_device pointer
3747 *
3748 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3749 * restore things like GPUVM page tables after a GPU reset where
3750 * the contents of VRAM might be lost.
403009bf
CK
3751 *
3752 * Returns:
3753 * 0 on success, negative error code on failure.
e3ecdffa 3754 */
c33adbc7 3755static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3756{
c41d1cf6 3757 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3758 struct amdgpu_bo *shadow;
3759 long r = 1, tmo;
c41d1cf6
ML
3760
3761 if (amdgpu_sriov_runtime(adev))
b045d3af 3762 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3763 else
3764 tmo = msecs_to_jiffies(100);
3765
3766 DRM_INFO("recover vram bo from shadow start\n");
3767 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3768 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3769
3770 /* No need to recover an evicted BO */
3771 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3772 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3773 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3774 continue;
3775
3776 r = amdgpu_bo_restore_shadow(shadow, &next);
3777 if (r)
3778 break;
3779
c41d1cf6 3780 if (fence) {
1712fb1a 3781 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3782 dma_fence_put(fence);
3783 fence = next;
1712fb1a 3784 if (tmo == 0) {
3785 r = -ETIMEDOUT;
c41d1cf6 3786 break;
1712fb1a 3787 } else if (tmo < 0) {
3788 r = tmo;
3789 break;
3790 }
403009bf
CK
3791 } else {
3792 fence = next;
c41d1cf6 3793 }
c41d1cf6
ML
3794 }
3795 mutex_unlock(&adev->shadow_list_lock);
3796
403009bf
CK
3797 if (fence)
3798 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3799 dma_fence_put(fence);
3800
1712fb1a 3801 if (r < 0 || tmo <= 0) {
3802 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3803 return -EIO;
3804 }
c41d1cf6 3805
403009bf
CK
3806 DRM_INFO("recover vram bo from shadow done\n");
3807 return 0;
c41d1cf6
ML
3808}
3809
a90ad3c2 3810
e3ecdffa 3811/**
06ec9070 3812 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3813 *
3814 * @adev: amdgpu device pointer
87e3f136 3815 * @from_hypervisor: request from hypervisor
5740682e
ML
3816 *
3817 * do VF FLR and reinitialize Asic
3f48c681 3818 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3819 */
3820static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3821 bool from_hypervisor)
5740682e
ML
3822{
3823 int r;
3824
3825 if (from_hypervisor)
3826 r = amdgpu_virt_request_full_gpu(adev, true);
3827 else
3828 r = amdgpu_virt_reset_gpu(adev);
3829 if (r)
3830 return r;
a90ad3c2 3831
b639c22c
JZ
3832 amdgpu_amdkfd_pre_reset(adev);
3833
a90ad3c2 3834 /* Resume IP prior to SMC */
06ec9070 3835 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3836 if (r)
3837 goto error;
a90ad3c2 3838
c9ffa427 3839 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3840 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3841 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3842
7a3e0bb2
RZ
3843 r = amdgpu_device_fw_loading(adev);
3844 if (r)
3845 return r;
3846
a90ad3c2 3847 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3848 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3849 if (r)
3850 goto error;
a90ad3c2
ML
3851
3852 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3853 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3854 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3855
abc34253
ED
3856error:
3857 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3858 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3859 amdgpu_inc_vram_lost(adev);
c33adbc7 3860 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3861 }
3862
3863 return r;
3864}
3865
12938fad
CK
3866/**
3867 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3868 *
3869 * @adev: amdgpu device pointer
3870 *
3871 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3872 * a hung GPU.
3873 */
3874bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3875{
3876 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3877 DRM_INFO("Timeout, but no hardware hang detected.\n");
3878 return false;
3879 }
3880
3ba7b418
AG
3881 if (amdgpu_gpu_recovery == 0)
3882 goto disabled;
3883
3884 if (amdgpu_sriov_vf(adev))
3885 return true;
3886
3887 if (amdgpu_gpu_recovery == -1) {
3888 switch (adev->asic_type) {
fc42d47c
AG
3889 case CHIP_BONAIRE:
3890 case CHIP_HAWAII:
3ba7b418
AG
3891 case CHIP_TOPAZ:
3892 case CHIP_TONGA:
3893 case CHIP_FIJI:
3894 case CHIP_POLARIS10:
3895 case CHIP_POLARIS11:
3896 case CHIP_POLARIS12:
3897 case CHIP_VEGAM:
3898 case CHIP_VEGA20:
3899 case CHIP_VEGA10:
3900 case CHIP_VEGA12:
c43b849f 3901 case CHIP_RAVEN:
e9d4cf91 3902 case CHIP_ARCTURUS:
2cb44fb0 3903 case CHIP_RENOIR:
658c6639
AD
3904 case CHIP_NAVI10:
3905 case CHIP_NAVI14:
3906 case CHIP_NAVI12:
3ba7b418
AG
3907 break;
3908 default:
3909 goto disabled;
3910 }
12938fad
CK
3911 }
3912
3913 return true;
3ba7b418
AG
3914
3915disabled:
3916 DRM_INFO("GPU recovery disabled.\n");
3917 return false;
12938fad
CK
3918}
3919
5c6dd71e 3920
26bc5340
AG
3921static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3922 struct amdgpu_job *job,
3923 bool *need_full_reset_arg)
3924{
3925 int i, r = 0;
3926 bool need_full_reset = *need_full_reset_arg;
71182665 3927
728e7e0c
JZ
3928 amdgpu_debugfs_wait_dump(adev);
3929
71182665 3930 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3931 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3932 struct amdgpu_ring *ring = adev->rings[i];
3933
51687759 3934 if (!ring || !ring->sched.thread)
0875dc9e 3935 continue;
5740682e 3936
2f9d4084
ML
3937 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3938 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3939 }
d38ceaf9 3940
222b5f04
AG
3941 if(job)
3942 drm_sched_increase_karma(&job->base);
3943
1d721ed6 3944 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3945 if (!amdgpu_sriov_vf(adev)) {
3946
3947 if (!need_full_reset)
3948 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3949
3950 if (!need_full_reset) {
3951 amdgpu_device_ip_pre_soft_reset(adev);
3952 r = amdgpu_device_ip_soft_reset(adev);
3953 amdgpu_device_ip_post_soft_reset(adev);
3954 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3955 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3956 need_full_reset = true;
3957 }
3958 }
3959
3960 if (need_full_reset)
3961 r = amdgpu_device_ip_suspend(adev);
3962
3963 *need_full_reset_arg = need_full_reset;
3964 }
3965
3966 return r;
3967}
3968
041a62bc 3969static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3970 struct list_head *device_list_handle,
3971 bool *need_full_reset_arg)
3972{
3973 struct amdgpu_device *tmp_adev = NULL;
3974 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3975 int r = 0;
3976
3977 /*
3978 * ASIC reset has to be done on all HGMI hive nodes ASAP
3979 * to allow proper links negotiation in FW (within 1 sec)
3980 */
3981 if (need_full_reset) {
3982 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3983 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3984 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3985 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3986 r = -EALREADY;
3987 } else
3988 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3989
041a62bc
AG
3990 if (r) {
3991 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3992 r, tmp_adev->ddev->unique);
3993 break;
ce316fa5
LM
3994 }
3995 }
3996
041a62bc
AG
3997 /* For XGMI wait for all resets to complete before proceed */
3998 if (!r) {
ce316fa5
LM
3999 list_for_each_entry(tmp_adev, device_list_handle,
4000 gmc.xgmi.head) {
4001 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4002 flush_work(&tmp_adev->xgmi_reset_work);
4003 r = tmp_adev->asic_reset_res;
4004 if (r)
4005 break;
ce316fa5
LM
4006 }
4007 }
4008 }
ce316fa5 4009 }
26bc5340 4010
43c4d576
JC
4011 if (!r && amdgpu_ras_intr_triggered()) {
4012 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4013 if (tmp_adev->mmhub.funcs &&
4014 tmp_adev->mmhub.funcs->reset_ras_error_count)
4015 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4016 }
4017
00eaa571 4018 amdgpu_ras_intr_cleared();
43c4d576 4019 }
00eaa571 4020
26bc5340
AG
4021 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4022 if (need_full_reset) {
4023 /* post card */
4024 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4025 DRM_WARN("asic atom init failed!");
4026
4027 if (!r) {
4028 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4029 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4030 if (r)
4031 goto out;
4032
4033 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4034 if (vram_lost) {
77e7f829 4035 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4036 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4037 }
4038
4039 r = amdgpu_gtt_mgr_recover(
4040 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4041 if (r)
4042 goto out;
4043
4044 r = amdgpu_device_fw_loading(tmp_adev);
4045 if (r)
4046 return r;
4047
4048 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4049 if (r)
4050 goto out;
4051
4052 if (vram_lost)
4053 amdgpu_device_fill_reset_magic(tmp_adev);
4054
fdafb359
EQ
4055 /*
4056 * Add this ASIC as tracked as reset was already
4057 * complete successfully.
4058 */
4059 amdgpu_register_gpu_instance(tmp_adev);
4060
7c04ca50 4061 r = amdgpu_device_ip_late_init(tmp_adev);
4062 if (r)
4063 goto out;
4064
565d1941
EQ
4065 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4066
e79a04d5 4067 /* must succeed. */
511fdbc3 4068 amdgpu_ras_resume(tmp_adev);
e79a04d5 4069
26bc5340
AG
4070 /* Update PSP FW topology after reset */
4071 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4072 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4073 }
4074 }
4075
4076
4077out:
4078 if (!r) {
4079 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4080 r = amdgpu_ib_ring_tests(tmp_adev);
4081 if (r) {
4082 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4083 r = amdgpu_device_ip_suspend(tmp_adev);
4084 need_full_reset = true;
4085 r = -EAGAIN;
4086 goto end;
4087 }
4088 }
4089
4090 if (!r)
4091 r = amdgpu_device_recover_vram(tmp_adev);
4092 else
4093 tmp_adev->asic_reset_res = r;
4094 }
4095
4096end:
4097 *need_full_reset_arg = need_full_reset;
4098 return r;
4099}
4100
1d721ed6 4101static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4102{
1d721ed6
AG
4103 if (trylock) {
4104 if (!mutex_trylock(&adev->lock_reset))
4105 return false;
4106 } else
4107 mutex_lock(&adev->lock_reset);
5740682e 4108
26bc5340 4109 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4110 adev->in_gpu_reset = true;
a3a09142
AD
4111 switch (amdgpu_asic_reset_method(adev)) {
4112 case AMD_RESET_METHOD_MODE1:
4113 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4114 break;
4115 case AMD_RESET_METHOD_MODE2:
4116 adev->mp1_state = PP_MP1_STATE_RESET;
4117 break;
4118 default:
4119 adev->mp1_state = PP_MP1_STATE_NONE;
4120 break;
4121 }
1d721ed6
AG
4122
4123 return true;
26bc5340 4124}
d38ceaf9 4125
26bc5340
AG
4126static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4127{
89041940 4128 amdgpu_vf_error_trans_all(adev);
a3a09142 4129 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4130 adev->in_gpu_reset = false;
13a752e3 4131 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4132}
4133
3f12acc8
EQ
4134static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4135{
4136 struct pci_dev *p = NULL;
4137
4138 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4139 adev->pdev->bus->number, 1);
4140 if (p) {
4141 pm_runtime_enable(&(p->dev));
4142 pm_runtime_resume(&(p->dev));
4143 }
4144}
4145
4146static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4147{
4148 enum amd_reset_method reset_method;
4149 struct pci_dev *p = NULL;
4150 u64 expires;
4151
4152 /*
4153 * For now, only BACO and mode1 reset are confirmed
4154 * to suffer the audio issue without proper suspended.
4155 */
4156 reset_method = amdgpu_asic_reset_method(adev);
4157 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4158 (reset_method != AMD_RESET_METHOD_MODE1))
4159 return -EINVAL;
4160
4161 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4162 adev->pdev->bus->number, 1);
4163 if (!p)
4164 return -ENODEV;
4165
4166 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4167 if (!expires)
4168 /*
4169 * If we cannot get the audio device autosuspend delay,
4170 * a fixed 4S interval will be used. Considering 3S is
4171 * the audio controller default autosuspend delay setting.
4172 * 4S used here is guaranteed to cover that.
4173 */
54b7feb9 4174 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4175
4176 while (!pm_runtime_status_suspended(&(p->dev))) {
4177 if (!pm_runtime_suspend(&(p->dev)))
4178 break;
4179
4180 if (expires < ktime_get_mono_fast_ns()) {
4181 dev_warn(adev->dev, "failed to suspend display audio\n");
4182 /* TODO: abort the succeeding gpu reset? */
4183 return -ETIMEDOUT;
4184 }
4185 }
4186
4187 pm_runtime_disable(&(p->dev));
4188
4189 return 0;
4190}
4191
26bc5340
AG
4192/**
4193 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4194 *
4195 * @adev: amdgpu device pointer
4196 * @job: which job trigger hang
4197 *
4198 * Attempt to reset the GPU if it has hung (all asics).
4199 * Attempt to do soft-reset or full-reset and reinitialize Asic
4200 * Returns 0 for success or an error on failure.
4201 */
4202
4203int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4204 struct amdgpu_job *job)
4205{
1d721ed6 4206 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4207 bool need_full_reset = false;
4208 bool job_signaled = false;
26bc5340 4209 struct amdgpu_hive_info *hive = NULL;
26bc5340 4210 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4211 int i, r = 0;
7c6e68c7 4212 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4213 bool use_baco =
4214 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4215 true : false;
3f12acc8 4216 bool audio_suspended = false;
26bc5340 4217
d5ea093e
AG
4218 /*
4219 * Flush RAM to disk so that after reboot
4220 * the user can read log and see why the system rebooted.
4221 */
b823821f 4222 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4223
4224 DRM_WARN("Emergency reboot.");
4225
4226 ksys_sync_helper();
4227 emergency_restart();
4228 }
4229
b823821f
LM
4230 dev_info(adev->dev, "GPU %s begin!\n",
4231 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340
AG
4232
4233 /*
1d721ed6
AG
4234 * Here we trylock to avoid chain of resets executing from
4235 * either trigger by jobs on different adevs in XGMI hive or jobs on
4236 * different schedulers for same device while this TO handler is running.
4237 * We always reset all schedulers for device and all devices for XGMI
4238 * hive so that should take care of them too.
26bc5340 4239 */
7dd8c205 4240 hive = amdgpu_get_xgmi_hive(adev, true);
1d721ed6
AG
4241 if (hive && !mutex_trylock(&hive->reset_lock)) {
4242 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4243 job ? job->base.id : -1, hive->hive_id);
9e94d22c 4244 mutex_unlock(&hive->hive_lock);
26bc5340 4245 return 0;
1d721ed6 4246 }
26bc5340 4247
9e94d22c
EQ
4248 /*
4249 * Build list of devices to reset.
4250 * In case we are in XGMI hive mode, resort the device list
4251 * to put adev in the 1st position.
4252 */
4253 INIT_LIST_HEAD(&device_list);
4254 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4255 if (!hive)
26bc5340 4256 return -ENODEV;
9e94d22c
EQ
4257 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4258 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4259 device_list_handle = &hive->device_list;
4260 } else {
4261 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4262 device_list_handle = &device_list;
4263 }
4264
1d721ed6
AG
4265 /* block all schedulers and reset given job's ring */
4266 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
9e94d22c
EQ
4267 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4268 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4269 job ? job->base.id : -1);
4270 mutex_unlock(&hive->hive_lock);
4271 return 0;
7c6e68c7
AG
4272 }
4273
3f12acc8
EQ
4274 /*
4275 * Try to put the audio codec into suspend state
4276 * before gpu reset started.
4277 *
4278 * Due to the power domain of the graphics device
4279 * is shared with AZ power domain. Without this,
4280 * we may change the audio hardware from behind
4281 * the audio driver's back. That will trigger
4282 * some audio codec errors.
4283 */
4284 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4285 audio_suspended = true;
4286
9e94d22c
EQ
4287 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4288
52fb44cf
EQ
4289 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4290
9e94d22c
EQ
4291 if (!amdgpu_sriov_vf(tmp_adev))
4292 amdgpu_amdkfd_pre_reset(tmp_adev);
4293
12ffa55d
AG
4294 /*
4295 * Mark these ASICs to be reseted as untracked first
4296 * And add them back after reset completed
4297 */
4298 amdgpu_unregister_gpu_instance(tmp_adev);
4299
a2f63ee8 4300 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4301
f1c1314b 4302 /* disable ras on ALL IPs */
b823821f
LM
4303 if (!(in_ras_intr && !use_baco) &&
4304 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4305 amdgpu_ras_suspend(tmp_adev);
4306
1d721ed6
AG
4307 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4308 struct amdgpu_ring *ring = tmp_adev->rings[i];
4309
4310 if (!ring || !ring->sched.thread)
4311 continue;
4312
0b2d2c2e 4313 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4314
b823821f 4315 if (in_ras_intr && !use_baco)
7c6e68c7 4316 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4317 }
4318 }
4319
b823821f 4320 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4321 goto skip_sched_resume;
4322
1d721ed6
AG
4323 /*
4324 * Must check guilty signal here since after this point all old
4325 * HW fences are force signaled.
4326 *
4327 * job->base holds a reference to parent fence
4328 */
4329 if (job && job->base.s_fence->parent &&
7dd8c205 4330 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4331 job_signaled = true;
1d721ed6
AG
4332 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4333 goto skip_hw_reset;
4334 }
4335
26bc5340
AG
4336retry: /* Rest of adevs pre asic reset from XGMI hive. */
4337 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4338 r = amdgpu_device_pre_asic_reset(tmp_adev,
4339 NULL,
4340 &need_full_reset);
4341 /*TODO Should we stop ?*/
4342 if (r) {
4343 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4344 r, tmp_adev->ddev->unique);
4345 tmp_adev->asic_reset_res = r;
4346 }
4347 }
4348
4349 /* Actual ASIC resets if needed.*/
4350 /* TODO Implement XGMI hive reset logic for SRIOV */
4351 if (amdgpu_sriov_vf(adev)) {
4352 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4353 if (r)
4354 adev->asic_reset_res = r;
4355 } else {
041a62bc 4356 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4357 if (r && r == -EAGAIN)
4358 goto retry;
4359 }
4360
1d721ed6
AG
4361skip_hw_reset:
4362
26bc5340
AG
4363 /* Post ASIC reset for all devs .*/
4364 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4365
1d721ed6
AG
4366 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4367 struct amdgpu_ring *ring = tmp_adev->rings[i];
4368
4369 if (!ring || !ring->sched.thread)
4370 continue;
4371
4372 /* No point to resubmit jobs if we didn't HW reset*/
4373 if (!tmp_adev->asic_reset_res && !job_signaled)
4374 drm_sched_resubmit_jobs(&ring->sched);
4375
4376 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4377 }
4378
4379 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4380 drm_helper_resume_force_mode(tmp_adev->ddev);
4381 }
4382
4383 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4384
4385 if (r) {
4386 /* bad news, how to tell it to userspace ? */
12ffa55d 4387 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4388 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4389 } else {
12ffa55d 4390 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4391 }
7c6e68c7 4392 }
26bc5340 4393
7c6e68c7
AG
4394skip_sched_resume:
4395 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4396 /*unlock kfd: SRIOV would do it separately */
b823821f 4397 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4398 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4399 if (audio_suspended)
4400 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4401 amdgpu_device_unlock_adev(tmp_adev);
4402 }
4403
9e94d22c 4404 if (hive) {
22d6575b 4405 mutex_unlock(&hive->reset_lock);
9e94d22c
EQ
4406 mutex_unlock(&hive->hive_lock);
4407 }
26bc5340
AG
4408
4409 if (r)
4410 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4411 return r;
4412}
4413
e3ecdffa
AD
4414/**
4415 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4416 *
4417 * @adev: amdgpu_device pointer
4418 *
4419 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4420 * and lanes) of the slot the device is in. Handles APUs and
4421 * virtualized environments where PCIE config space may not be available.
4422 */
5494d864 4423static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4424{
5d9a6330 4425 struct pci_dev *pdev;
c5313457
HK
4426 enum pci_bus_speed speed_cap, platform_speed_cap;
4427 enum pcie_link_width platform_link_width;
d0dd7f0c 4428
cd474ba0
AD
4429 if (amdgpu_pcie_gen_cap)
4430 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4431
cd474ba0
AD
4432 if (amdgpu_pcie_lane_cap)
4433 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4434
cd474ba0
AD
4435 /* covers APUs as well */
4436 if (pci_is_root_bus(adev->pdev->bus)) {
4437 if (adev->pm.pcie_gen_mask == 0)
4438 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4439 if (adev->pm.pcie_mlw_mask == 0)
4440 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4441 return;
cd474ba0 4442 }
d0dd7f0c 4443
c5313457
HK
4444 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4445 return;
4446
dbaa922b
AD
4447 pcie_bandwidth_available(adev->pdev, NULL,
4448 &platform_speed_cap, &platform_link_width);
c5313457 4449
cd474ba0 4450 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4451 /* asic caps */
4452 pdev = adev->pdev;
4453 speed_cap = pcie_get_speed_cap(pdev);
4454 if (speed_cap == PCI_SPEED_UNKNOWN) {
4455 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4456 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4457 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4458 } else {
5d9a6330
AD
4459 if (speed_cap == PCIE_SPEED_16_0GT)
4460 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4461 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4463 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4464 else if (speed_cap == PCIE_SPEED_8_0GT)
4465 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4466 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4467 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4468 else if (speed_cap == PCIE_SPEED_5_0GT)
4469 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4470 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4471 else
4472 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4473 }
4474 /* platform caps */
c5313457 4475 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4476 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4477 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4478 } else {
c5313457 4479 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4480 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4481 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4482 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4484 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4485 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4486 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4487 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4488 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4489 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4490 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4491 else
4492 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4493
cd474ba0
AD
4494 }
4495 }
4496 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4497 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4498 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4499 } else {
c5313457 4500 switch (platform_link_width) {
5d9a6330 4501 case PCIE_LNK_X32:
cd474ba0
AD
4502 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4503 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4504 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4509 break;
5d9a6330 4510 case PCIE_LNK_X16:
cd474ba0
AD
4511 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4512 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4517 break;
5d9a6330 4518 case PCIE_LNK_X12:
cd474ba0
AD
4519 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4524 break;
5d9a6330 4525 case PCIE_LNK_X8:
cd474ba0
AD
4526 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4527 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4530 break;
5d9a6330 4531 case PCIE_LNK_X4:
cd474ba0
AD
4532 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4533 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4534 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4535 break;
5d9a6330 4536 case PCIE_LNK_X2:
cd474ba0
AD
4537 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4538 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4539 break;
5d9a6330 4540 case PCIE_LNK_X1:
cd474ba0
AD
4541 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4542 break;
4543 default:
4544 break;
4545 }
d0dd7f0c
AD
4546 }
4547 }
4548}
d38ceaf9 4549
361dbd01
AD
4550int amdgpu_device_baco_enter(struct drm_device *dev)
4551{
4552 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4553 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4554
4555 if (!amdgpu_device_supports_baco(adev->ddev))
4556 return -ENOTSUPP;
4557
7a22677b
LM
4558 if (ras && ras->supported)
4559 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4560
9530273e 4561 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4562}
4563
4564int amdgpu_device_baco_exit(struct drm_device *dev)
4565{
4566 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4567 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4568 int ret = 0;
361dbd01
AD
4569
4570 if (!amdgpu_device_supports_baco(adev->ddev))
4571 return -ENOTSUPP;
4572
9530273e
EQ
4573 ret = amdgpu_dpm_baco_exit(adev);
4574 if (ret)
4575 return ret;
7a22677b
LM
4576
4577 if (ras && ras->supported)
4578 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4579
4580 return 0;
361dbd01 4581}