drm/amd/powerplay: use the same interval as PMFW on retrieving metrics table
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
c0a43457 83MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
e2a75f88 84
2dc80b00
S
85#define AMDGPU_RESUME_MS 2000
86
050091ab 87const char *amdgpu_asic_name[] = {
da69c161
KW
88 "TAHITI",
89 "PITCAIRN",
90 "VERDE",
91 "OLAND",
92 "HAINAN",
d38ceaf9
AD
93 "BONAIRE",
94 "KAVERI",
95 "KABINI",
96 "HAWAII",
97 "MULLINS",
98 "TOPAZ",
99 "TONGA",
48299f95 100 "FIJI",
d38ceaf9 101 "CARRIZO",
139f4917 102 "STONEY",
2cc0c0b5
FC
103 "POLARIS10",
104 "POLARIS11",
c4642a47 105 "POLARIS12",
48ff108d 106 "VEGAM",
d4196f01 107 "VEGA10",
8fab806a 108 "VEGA12",
956fcddc 109 "VEGA20",
2ca8a5d2 110 "RAVEN",
d6c3b24e 111 "ARCTURUS",
1eee4228 112 "RENOIR",
852a6626 113 "NAVI10",
87dbad02 114 "NAVI14",
9802f5d7 115 "NAVI12",
ccaf72d3 116 "SIENNA_CICHLID",
d38ceaf9
AD
117 "LAST",
118};
119
dcea6e65
KR
120/**
121 * DOC: pcie_replay_count
122 *
123 * The amdgpu driver provides a sysfs API for reporting the total number
124 * of PCIe replays (NAKs)
125 * The file pcie_replay_count is used for this and returns the total
126 * number of replays as a sum of the NAKs generated and NAKs received
127 */
128
129static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
130 struct device_attribute *attr, char *buf)
131{
132 struct drm_device *ddev = dev_get_drvdata(dev);
133 struct amdgpu_device *adev = ddev->dev_private;
134 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
135
136 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
137}
138
139static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
140 amdgpu_device_get_pcie_replay_count, NULL);
141
5494d864
AD
142static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
143
bd607166
KR
144/**
145 * DOC: product_name
146 *
147 * The amdgpu driver provides a sysfs API for reporting the product name
148 * for the device
149 * The file serial_number is used for this and returns the product name
150 * as returned from the FRU.
151 * NOTE: This is only available for certain server cards
152 */
153
154static ssize_t amdgpu_device_get_product_name(struct device *dev,
155 struct device_attribute *attr, char *buf)
156{
157 struct drm_device *ddev = dev_get_drvdata(dev);
158 struct amdgpu_device *adev = ddev->dev_private;
159
160 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
161}
162
163static DEVICE_ATTR(product_name, S_IRUGO,
164 amdgpu_device_get_product_name, NULL);
165
166/**
167 * DOC: product_number
168 *
169 * The amdgpu driver provides a sysfs API for reporting the part number
170 * for the device
171 * The file serial_number is used for this and returns the part number
172 * as returned from the FRU.
173 * NOTE: This is only available for certain server cards
174 */
175
176static ssize_t amdgpu_device_get_product_number(struct device *dev,
177 struct device_attribute *attr, char *buf)
178{
179 struct drm_device *ddev = dev_get_drvdata(dev);
180 struct amdgpu_device *adev = ddev->dev_private;
181
182 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
183}
184
185static DEVICE_ATTR(product_number, S_IRUGO,
186 amdgpu_device_get_product_number, NULL);
187
188/**
189 * DOC: serial_number
190 *
191 * The amdgpu driver provides a sysfs API for reporting the serial number
192 * for the device
193 * The file serial_number is used for this and returns the serial number
194 * as returned from the FRU.
195 * NOTE: This is only available for certain server cards
196 */
197
198static ssize_t amdgpu_device_get_serial_number(struct device *dev,
199 struct device_attribute *attr, char *buf)
200{
201 struct drm_device *ddev = dev_get_drvdata(dev);
202 struct amdgpu_device *adev = ddev->dev_private;
203
204 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
205}
206
207static DEVICE_ATTR(serial_number, S_IRUGO,
208 amdgpu_device_get_serial_number, NULL);
209
e3ecdffa 210/**
31af062a 211 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
212 *
213 * @dev: drm_device pointer
214 *
215 * Returns true if the device is a dGPU with HG/PX power control,
216 * otherwise return false.
217 */
31af062a 218bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
219{
220 struct amdgpu_device *adev = dev->dev_private;
221
2f7d10b3 222 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
223 return true;
224 return false;
225}
226
a69cba42
AD
227/**
228 * amdgpu_device_supports_baco - Does the device support BACO
229 *
230 * @dev: drm_device pointer
231 *
232 * Returns true if the device supporte BACO,
233 * otherwise return false.
234 */
235bool amdgpu_device_supports_baco(struct drm_device *dev)
236{
237 struct amdgpu_device *adev = dev->dev_private;
238
239 return amdgpu_asic_supports_baco(adev);
240}
241
e35e2b11
TY
242/**
243 * VRAM access helper functions.
244 *
245 * amdgpu_device_vram_access - read/write a buffer in vram
246 *
247 * @adev: amdgpu_device pointer
248 * @pos: offset of the buffer in vram
249 * @buf: virtual address of the buffer in system memory
250 * @size: read/write size, sizeof(@buf) must > @size
251 * @write: true - write to vram, otherwise - read from vram
252 */
253void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
254 uint32_t *buf, size_t size, bool write)
255{
e35e2b11 256 unsigned long flags;
ce05ac56
CK
257 uint32_t hi = ~0;
258 uint64_t last;
259
9d11eb0d
CK
260
261#ifdef CONFIG_64BIT
262 last = min(pos + size, adev->gmc.visible_vram_size);
263 if (last > pos) {
264 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
265 size_t count = last - pos;
266
267 if (write) {
268 memcpy_toio(addr, buf, count);
269 mb();
270 amdgpu_asic_flush_hdp(adev, NULL);
271 } else {
272 amdgpu_asic_invalidate_hdp(adev, NULL);
273 mb();
274 memcpy_fromio(buf, addr, count);
275 }
276
277 if (count == size)
278 return;
279
280 pos += count;
281 buf += count / 4;
282 size -= count;
283 }
284#endif
285
ce05ac56
CK
286 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
287 for (last = pos + size; pos < last; pos += 4) {
288 uint32_t tmp = pos >> 31;
e35e2b11 289
e35e2b11 290 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
291 if (tmp != hi) {
292 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
293 hi = tmp;
294 }
e35e2b11
TY
295 if (write)
296 WREG32_NO_KIQ(mmMM_DATA, *buf++);
297 else
298 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 299 }
ce05ac56 300 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
301}
302
d38ceaf9 303/*
2eee0229 304 * device register access helper functions.
d38ceaf9 305 */
e3ecdffa 306/**
2eee0229 307 * amdgpu_device_rreg - read a register
e3ecdffa
AD
308 *
309 * @adev: amdgpu_device pointer
310 * @reg: dword aligned register offset
311 * @acc_flags: access flags which require special behavior
312 *
313 * Returns the 32 bit value from the offset specified.
314 */
2eee0229
HZ
315uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
316 uint32_t acc_flags)
d38ceaf9 317{
f4b373f4
TSD
318 uint32_t ret;
319
f384ff95 320 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 321 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 322
ec59847e 323 if ((reg * 4) < adev->rmmio_size)
f4b373f4 324 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
325 else
326 ret = adev->pcie_rreg(adev, (reg * 4));
327 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
f4b373f4 328 return ret;
d38ceaf9
AD
329}
330
421a2a30
ML
331/*
332 * MMIO register read with bytes helper functions
333 * @offset:bytes offset from MMIO start
334 *
335*/
336
e3ecdffa
AD
337/**
338 * amdgpu_mm_rreg8 - read a memory mapped IO register
339 *
340 * @adev: amdgpu_device pointer
341 * @offset: byte aligned register offset
342 *
343 * Returns the 8 bit value from the offset specified.
344 */
421a2a30
ML
345uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
346 if (offset < adev->rmmio_size)
347 return (readb(adev->rmmio + offset));
348 BUG();
349}
350
351/*
352 * MMIO register write with bytes helper functions
353 * @offset:bytes offset from MMIO start
354 * @value: the value want to be written to the register
355 *
356*/
e3ecdffa
AD
357/**
358 * amdgpu_mm_wreg8 - read a memory mapped IO register
359 *
360 * @adev: amdgpu_device pointer
361 * @offset: byte aligned register offset
362 * @value: 8 bit value to write
363 *
364 * Writes the value specified to the offset specified.
365 */
421a2a30
ML
366void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
367 if (offset < adev->rmmio_size)
368 writeb(value, adev->rmmio + offset);
369 else
370 BUG();
371}
372
2eee0229
HZ
373void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
374 uint32_t v, uint32_t acc_flags)
2e0cc4d4 375{
2eee0229 376 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 377
ec59847e 378 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 379 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
380 else
381 adev->pcie_wreg(adev, (reg * 4), v);
2e0cc4d4
ML
382}
383
e3ecdffa 384/**
2eee0229 385 * amdgpu_device_wreg - write to a register
e3ecdffa
AD
386 *
387 * @adev: amdgpu_device pointer
388 * @reg: dword aligned register offset
389 * @v: 32 bit value to write to the register
390 * @acc_flags: access flags which require special behavior
391 *
392 * Writes the value specified to the offset specified.
393 */
2eee0229
HZ
394void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
395 uint32_t acc_flags)
d38ceaf9 396{
f384ff95 397 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 398 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 399
2eee0229 400 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
2e0cc4d4 401}
d38ceaf9 402
2e0cc4d4
ML
403/*
404 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
405 *
406 * this function is invoked only the debugfs register access
407 * */
408void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
409 uint32_t acc_flags)
410{
411 if (amdgpu_sriov_fullaccess(adev) &&
412 adev->gfx.rlc.funcs &&
413 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 414
2e0cc4d4
ML
415 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
416 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 417 }
2e0cc4d4 418
2eee0229 419 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
d38ceaf9
AD
420}
421
e3ecdffa
AD
422/**
423 * amdgpu_io_rreg - read an IO register
424 *
425 * @adev: amdgpu_device pointer
426 * @reg: dword aligned register offset
427 *
428 * Returns the 32 bit value from the offset specified.
429 */
d38ceaf9
AD
430u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
431{
432 if ((reg * 4) < adev->rio_mem_size)
433 return ioread32(adev->rio_mem + (reg * 4));
434 else {
435 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
436 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
437 }
438}
439
e3ecdffa
AD
440/**
441 * amdgpu_io_wreg - write to an IO register
442 *
443 * @adev: amdgpu_device pointer
444 * @reg: dword aligned register offset
445 * @v: 32 bit value to write to the register
446 *
447 * Writes the value specified to the offset specified.
448 */
d38ceaf9
AD
449void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
450{
d38ceaf9
AD
451 if ((reg * 4) < adev->rio_mem_size)
452 iowrite32(v, adev->rio_mem + (reg * 4));
453 else {
454 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
455 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
456 }
457}
458
459/**
460 * amdgpu_mm_rdoorbell - read a doorbell dword
461 *
462 * @adev: amdgpu_device pointer
463 * @index: doorbell index
464 *
465 * Returns the value in the doorbell aperture at the
466 * requested doorbell index (CIK).
467 */
468u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
469{
470 if (index < adev->doorbell.num_doorbells) {
471 return readl(adev->doorbell.ptr + index);
472 } else {
473 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
474 return 0;
475 }
476}
477
478/**
479 * amdgpu_mm_wdoorbell - write a doorbell dword
480 *
481 * @adev: amdgpu_device pointer
482 * @index: doorbell index
483 * @v: value to write
484 *
485 * Writes @v to the doorbell aperture at the
486 * requested doorbell index (CIK).
487 */
488void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
489{
490 if (index < adev->doorbell.num_doorbells) {
491 writel(v, adev->doorbell.ptr + index);
492 } else {
493 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
494 }
495}
496
832be404
KW
497/**
498 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
499 *
500 * @adev: amdgpu_device pointer
501 * @index: doorbell index
502 *
503 * Returns the value in the doorbell aperture at the
504 * requested doorbell index (VEGA10+).
505 */
506u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
507{
508 if (index < adev->doorbell.num_doorbells) {
509 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
510 } else {
511 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
512 return 0;
513 }
514}
515
516/**
517 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
518 *
519 * @adev: amdgpu_device pointer
520 * @index: doorbell index
521 * @v: value to write
522 *
523 * Writes @v to the doorbell aperture at the
524 * requested doorbell index (VEGA10+).
525 */
526void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
527{
528 if (index < adev->doorbell.num_doorbells) {
529 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
530 } else {
531 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
532 }
533}
534
d38ceaf9
AD
535/**
536 * amdgpu_invalid_rreg - dummy reg read function
537 *
538 * @adev: amdgpu device pointer
539 * @reg: offset of register
540 *
541 * Dummy register read function. Used for register blocks
542 * that certain asics don't have (all asics).
543 * Returns the value in the register.
544 */
545static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
546{
547 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
548 BUG();
549 return 0;
550}
551
552/**
553 * amdgpu_invalid_wreg - dummy reg write function
554 *
555 * @adev: amdgpu device pointer
556 * @reg: offset of register
557 * @v: value to write to the register
558 *
559 * Dummy register read function. Used for register blocks
560 * that certain asics don't have (all asics).
561 */
562static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
563{
564 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
565 reg, v);
566 BUG();
567}
568
4fa1c6a6
TZ
569/**
570 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
571 *
572 * @adev: amdgpu device pointer
573 * @reg: offset of register
574 *
575 * Dummy register read function. Used for register blocks
576 * that certain asics don't have (all asics).
577 * Returns the value in the register.
578 */
579static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
580{
581 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
582 BUG();
583 return 0;
584}
585
586/**
587 * amdgpu_invalid_wreg64 - dummy reg write function
588 *
589 * @adev: amdgpu device pointer
590 * @reg: offset of register
591 * @v: value to write to the register
592 *
593 * Dummy register read function. Used for register blocks
594 * that certain asics don't have (all asics).
595 */
596static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
597{
598 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
599 reg, v);
600 BUG();
601}
602
d38ceaf9
AD
603/**
604 * amdgpu_block_invalid_rreg - dummy reg read function
605 *
606 * @adev: amdgpu device pointer
607 * @block: offset of instance
608 * @reg: offset of register
609 *
610 * Dummy register read function. Used for register blocks
611 * that certain asics don't have (all asics).
612 * Returns the value in the register.
613 */
614static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
615 uint32_t block, uint32_t reg)
616{
617 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
618 reg, block);
619 BUG();
620 return 0;
621}
622
623/**
624 * amdgpu_block_invalid_wreg - dummy reg write function
625 *
626 * @adev: amdgpu device pointer
627 * @block: offset of instance
628 * @reg: offset of register
629 * @v: value to write to the register
630 *
631 * Dummy register read function. Used for register blocks
632 * that certain asics don't have (all asics).
633 */
634static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
635 uint32_t block,
636 uint32_t reg, uint32_t v)
637{
638 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
639 reg, block, v);
640 BUG();
641}
642
e3ecdffa
AD
643/**
644 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
645 *
646 * @adev: amdgpu device pointer
647 *
648 * Allocates a scratch page of VRAM for use by various things in the
649 * driver.
650 */
06ec9070 651static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 652{
a4a02777
CK
653 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
654 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
655 &adev->vram_scratch.robj,
656 &adev->vram_scratch.gpu_addr,
657 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
658}
659
e3ecdffa
AD
660/**
661 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
662 *
663 * @adev: amdgpu device pointer
664 *
665 * Frees the VRAM scratch page.
666 */
06ec9070 667static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 668{
078af1a3 669 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
670}
671
672/**
9c3f2b54 673 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
674 *
675 * @adev: amdgpu_device pointer
676 * @registers: pointer to the register array
677 * @array_size: size of the register array
678 *
679 * Programs an array or registers with and and or masks.
680 * This is a helper for setting golden registers.
681 */
9c3f2b54
AD
682void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
683 const u32 *registers,
684 const u32 array_size)
d38ceaf9
AD
685{
686 u32 tmp, reg, and_mask, or_mask;
687 int i;
688
689 if (array_size % 3)
690 return;
691
692 for (i = 0; i < array_size; i +=3) {
693 reg = registers[i + 0];
694 and_mask = registers[i + 1];
695 or_mask = registers[i + 2];
696
697 if (and_mask == 0xffffffff) {
698 tmp = or_mask;
699 } else {
700 tmp = RREG32(reg);
701 tmp &= ~and_mask;
e0d07657
HZ
702 if (adev->family >= AMDGPU_FAMILY_AI)
703 tmp |= (or_mask & and_mask);
704 else
705 tmp |= or_mask;
d38ceaf9
AD
706 }
707 WREG32(reg, tmp);
708 }
709}
710
e3ecdffa
AD
711/**
712 * amdgpu_device_pci_config_reset - reset the GPU
713 *
714 * @adev: amdgpu_device pointer
715 *
716 * Resets the GPU using the pci config reset sequence.
717 * Only applicable to asics prior to vega10.
718 */
8111c387 719void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
720{
721 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
722}
723
724/*
725 * GPU doorbell aperture helpers function.
726 */
727/**
06ec9070 728 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
729 *
730 * @adev: amdgpu_device pointer
731 *
732 * Init doorbell driver information (CIK)
733 * Returns 0 on success, error on failure.
734 */
06ec9070 735static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 736{
6585661d 737
705e519e
CK
738 /* No doorbell on SI hardware generation */
739 if (adev->asic_type < CHIP_BONAIRE) {
740 adev->doorbell.base = 0;
741 adev->doorbell.size = 0;
742 adev->doorbell.num_doorbells = 0;
743 adev->doorbell.ptr = NULL;
744 return 0;
745 }
746
d6895ad3
CK
747 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
748 return -EINVAL;
749
22357775
AD
750 amdgpu_asic_init_doorbell_index(adev);
751
d38ceaf9
AD
752 /* doorbell bar mapping */
753 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
754 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
755
edf600da 756 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 757 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
758 if (adev->doorbell.num_doorbells == 0)
759 return -EINVAL;
760
ec3db8a6 761 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
762 * paging queue doorbell use the second page. The
763 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
764 * doorbells are in the first page. So with paging queue enabled,
765 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
766 */
767 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 768 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 769
8972e5d2
CK
770 adev->doorbell.ptr = ioremap(adev->doorbell.base,
771 adev->doorbell.num_doorbells *
772 sizeof(u32));
773 if (adev->doorbell.ptr == NULL)
d38ceaf9 774 return -ENOMEM;
d38ceaf9
AD
775
776 return 0;
777}
778
779/**
06ec9070 780 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
781 *
782 * @adev: amdgpu_device pointer
783 *
784 * Tear down doorbell driver information (CIK)
785 */
06ec9070 786static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
787{
788 iounmap(adev->doorbell.ptr);
789 adev->doorbell.ptr = NULL;
790}
791
22cb0164 792
d38ceaf9
AD
793
794/*
06ec9070 795 * amdgpu_device_wb_*()
455a7bc2 796 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 797 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
798 */
799
800/**
06ec9070 801 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
802 *
803 * @adev: amdgpu_device pointer
804 *
805 * Disables Writeback and frees the Writeback memory (all asics).
806 * Used at driver shutdown.
807 */
06ec9070 808static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
809{
810 if (adev->wb.wb_obj) {
a76ed485
AD
811 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
812 &adev->wb.gpu_addr,
813 (void **)&adev->wb.wb);
d38ceaf9
AD
814 adev->wb.wb_obj = NULL;
815 }
816}
817
818/**
06ec9070 819 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
820 *
821 * @adev: amdgpu_device pointer
822 *
455a7bc2 823 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
824 * Used at driver startup.
825 * Returns 0 on success or an -error on failure.
826 */
06ec9070 827static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
828{
829 int r;
830
831 if (adev->wb.wb_obj == NULL) {
97407b63
AD
832 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
833 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
834 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
835 &adev->wb.wb_obj, &adev->wb.gpu_addr,
836 (void **)&adev->wb.wb);
d38ceaf9
AD
837 if (r) {
838 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
839 return r;
840 }
d38ceaf9
AD
841
842 adev->wb.num_wb = AMDGPU_MAX_WB;
843 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
844
845 /* clear wb memory */
73469585 846 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
847 }
848
849 return 0;
850}
851
852/**
131b4b36 853 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
854 *
855 * @adev: amdgpu_device pointer
856 * @wb: wb index
857 *
858 * Allocate a wb slot for use by the driver (all asics).
859 * Returns 0 on success or -EINVAL on failure.
860 */
131b4b36 861int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
862{
863 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 864
97407b63 865 if (offset < adev->wb.num_wb) {
7014285a 866 __set_bit(offset, adev->wb.used);
63ae07ca 867 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
868 return 0;
869 } else {
870 return -EINVAL;
871 }
872}
873
d38ceaf9 874/**
131b4b36 875 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
876 *
877 * @adev: amdgpu_device pointer
878 * @wb: wb index
879 *
880 * Free a wb slot allocated for use by the driver (all asics)
881 */
131b4b36 882void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 883{
73469585 884 wb >>= 3;
d38ceaf9 885 if (wb < adev->wb.num_wb)
73469585 886 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
887}
888
d6895ad3
CK
889/**
890 * amdgpu_device_resize_fb_bar - try to resize FB BAR
891 *
892 * @adev: amdgpu_device pointer
893 *
894 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
895 * to fail, but if any of the BARs is not accessible after the size we abort
896 * driver loading by returning -ENODEV.
897 */
898int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
899{
770d13b1 900 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 901 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
902 struct pci_bus *root;
903 struct resource *res;
904 unsigned i;
d6895ad3
CK
905 u16 cmd;
906 int r;
907
0c03b912 908 /* Bypass for VF */
909 if (amdgpu_sriov_vf(adev))
910 return 0;
911
31b8adab
CK
912 /* Check if the root BUS has 64bit memory resources */
913 root = adev->pdev->bus;
914 while (root->parent)
915 root = root->parent;
916
917 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 918 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
919 res->start > 0x100000000ull)
920 break;
921 }
922
923 /* Trying to resize is pointless without a root hub window above 4GB */
924 if (!res)
925 return 0;
926
d6895ad3
CK
927 /* Disable memory decoding while we change the BAR addresses and size */
928 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
929 pci_write_config_word(adev->pdev, PCI_COMMAND,
930 cmd & ~PCI_COMMAND_MEMORY);
931
932 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 933 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
934 if (adev->asic_type >= CHIP_BONAIRE)
935 pci_release_resource(adev->pdev, 2);
936
937 pci_release_resource(adev->pdev, 0);
938
939 r = pci_resize_resource(adev->pdev, 0, rbar_size);
940 if (r == -ENOSPC)
941 DRM_INFO("Not enough PCI address space for a large BAR.");
942 else if (r && r != -ENOTSUPP)
943 DRM_ERROR("Problem resizing BAR0 (%d).", r);
944
945 pci_assign_unassigned_bus_resources(adev->pdev->bus);
946
947 /* When the doorbell or fb BAR isn't available we have no chance of
948 * using the device.
949 */
06ec9070 950 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
951 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
952 return -ENODEV;
953
954 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
955
956 return 0;
957}
a05502e5 958
d38ceaf9
AD
959/*
960 * GPU helpers function.
961 */
962/**
39c640c0 963 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
964 *
965 * @adev: amdgpu_device pointer
966 *
c836fec5
JQ
967 * Check if the asic has been initialized (all asics) at driver startup
968 * or post is needed if hw reset is performed.
969 * Returns true if need or false if not.
d38ceaf9 970 */
39c640c0 971bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
972{
973 uint32_t reg;
974
bec86378
ML
975 if (amdgpu_sriov_vf(adev))
976 return false;
977
978 if (amdgpu_passthrough(adev)) {
1da2c326
ML
979 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
980 * some old smc fw still need driver do vPost otherwise gpu hang, while
981 * those smc fw version above 22.15 doesn't have this flaw, so we force
982 * vpost executed for smc version below 22.15
bec86378
ML
983 */
984 if (adev->asic_type == CHIP_FIJI) {
985 int err;
986 uint32_t fw_ver;
987 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
988 /* force vPost if error occured */
989 if (err)
990 return true;
991
992 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
993 if (fw_ver < 0x00160e00)
994 return true;
bec86378 995 }
bec86378 996 }
91fe77eb 997
998 if (adev->has_hw_reset) {
999 adev->has_hw_reset = false;
1000 return true;
1001 }
1002
1003 /* bios scratch used on CIK+ */
1004 if (adev->asic_type >= CHIP_BONAIRE)
1005 return amdgpu_atombios_scratch_need_asic_init(adev);
1006
1007 /* check MEM_SIZE for older asics */
1008 reg = amdgpu_asic_get_config_memsize(adev);
1009
1010 if ((reg != 0) && (reg != 0xffffffff))
1011 return false;
1012
1013 return true;
bec86378
ML
1014}
1015
d38ceaf9
AD
1016/* if we get transitioned to only one device, take VGA back */
1017/**
06ec9070 1018 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1019 *
1020 * @cookie: amdgpu_device pointer
1021 * @state: enable/disable vga decode
1022 *
1023 * Enable/disable vga decode (all asics).
1024 * Returns VGA resource flags.
1025 */
06ec9070 1026static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1027{
1028 struct amdgpu_device *adev = cookie;
1029 amdgpu_asic_set_vga_state(adev, state);
1030 if (state)
1031 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1032 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1033 else
1034 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1035}
1036
e3ecdffa
AD
1037/**
1038 * amdgpu_device_check_block_size - validate the vm block size
1039 *
1040 * @adev: amdgpu_device pointer
1041 *
1042 * Validates the vm block size specified via module parameter.
1043 * The vm block size defines number of bits in page table versus page directory,
1044 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1045 * page table and the remaining bits are in the page directory.
1046 */
06ec9070 1047static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1048{
1049 /* defines number of bits in page table versus page directory,
1050 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1051 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1052 if (amdgpu_vm_block_size == -1)
1053 return;
a1adf8be 1054
bab4fee7 1055 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1056 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1057 amdgpu_vm_block_size);
97489129 1058 amdgpu_vm_block_size = -1;
a1adf8be 1059 }
a1adf8be
CZ
1060}
1061
e3ecdffa
AD
1062/**
1063 * amdgpu_device_check_vm_size - validate the vm size
1064 *
1065 * @adev: amdgpu_device pointer
1066 *
1067 * Validates the vm size in GB specified via module parameter.
1068 * The VM size is the size of the GPU virtual memory space in GB.
1069 */
06ec9070 1070static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1071{
64dab074
AD
1072 /* no need to check the default value */
1073 if (amdgpu_vm_size == -1)
1074 return;
1075
83ca145d
ZJ
1076 if (amdgpu_vm_size < 1) {
1077 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1078 amdgpu_vm_size);
f3368128 1079 amdgpu_vm_size = -1;
83ca145d 1080 }
83ca145d
ZJ
1081}
1082
7951e376
RZ
1083static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1084{
1085 struct sysinfo si;
a9d4fe2f 1086 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1087 uint64_t total_memory;
1088 uint64_t dram_size_seven_GB = 0x1B8000000;
1089 uint64_t dram_size_three_GB = 0xB8000000;
1090
1091 if (amdgpu_smu_memory_pool_size == 0)
1092 return;
1093
1094 if (!is_os_64) {
1095 DRM_WARN("Not 64-bit OS, feature not supported\n");
1096 goto def_value;
1097 }
1098 si_meminfo(&si);
1099 total_memory = (uint64_t)si.totalram * si.mem_unit;
1100
1101 if ((amdgpu_smu_memory_pool_size == 1) ||
1102 (amdgpu_smu_memory_pool_size == 2)) {
1103 if (total_memory < dram_size_three_GB)
1104 goto def_value1;
1105 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1106 (amdgpu_smu_memory_pool_size == 8)) {
1107 if (total_memory < dram_size_seven_GB)
1108 goto def_value1;
1109 } else {
1110 DRM_WARN("Smu memory pool size not supported\n");
1111 goto def_value;
1112 }
1113 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1114
1115 return;
1116
1117def_value1:
1118 DRM_WARN("No enough system memory\n");
1119def_value:
1120 adev->pm.smu_prv_buffer_size = 0;
1121}
1122
d38ceaf9 1123/**
06ec9070 1124 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1125 *
1126 * @adev: amdgpu_device pointer
1127 *
1128 * Validates certain module parameters and updates
1129 * the associated values used by the driver (all asics).
1130 */
912dfc84 1131static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1132{
5b011235
CZ
1133 if (amdgpu_sched_jobs < 4) {
1134 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1135 amdgpu_sched_jobs);
1136 amdgpu_sched_jobs = 4;
76117507 1137 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1138 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1139 amdgpu_sched_jobs);
1140 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1141 }
d38ceaf9 1142
83e74db6 1143 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1144 /* gart size must be greater or equal to 32M */
1145 dev_warn(adev->dev, "gart size (%d) too small\n",
1146 amdgpu_gart_size);
83e74db6 1147 amdgpu_gart_size = -1;
d38ceaf9
AD
1148 }
1149
36d38372 1150 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1151 /* gtt size must be greater or equal to 32M */
36d38372
CK
1152 dev_warn(adev->dev, "gtt size (%d) too small\n",
1153 amdgpu_gtt_size);
1154 amdgpu_gtt_size = -1;
d38ceaf9
AD
1155 }
1156
d07f14be
RH
1157 /* valid range is between 4 and 9 inclusive */
1158 if (amdgpu_vm_fragment_size != -1 &&
1159 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1160 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1161 amdgpu_vm_fragment_size = -1;
1162 }
1163
7951e376
RZ
1164 amdgpu_device_check_smu_prv_buffer_size(adev);
1165
06ec9070 1166 amdgpu_device_check_vm_size(adev);
d38ceaf9 1167
06ec9070 1168 amdgpu_device_check_block_size(adev);
6a7f76e7 1169
19aede77 1170 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1171
c6252390 1172 amdgpu_gmc_tmz_set(adev);
01a8dcec 1173
e3c00faa 1174 return 0;
d38ceaf9
AD
1175}
1176
1177/**
1178 * amdgpu_switcheroo_set_state - set switcheroo state
1179 *
1180 * @pdev: pci dev pointer
1694467b 1181 * @state: vga_switcheroo state
d38ceaf9
AD
1182 *
1183 * Callback for the switcheroo driver. Suspends or resumes the
1184 * the asics before or after it is powered up using ACPI methods.
1185 */
1186static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1187{
1188 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1189 int r;
d38ceaf9 1190
31af062a 1191 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1192 return;
1193
1194 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1195 pr_info("switched on\n");
d38ceaf9
AD
1196 /* don't suspend or resume card normally */
1197 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1198
de185019
AD
1199 pci_set_power_state(dev->pdev, PCI_D0);
1200 pci_restore_state(dev->pdev);
1201 r = pci_enable_device(dev->pdev);
1202 if (r)
1203 DRM_WARN("pci_enable_device failed (%d)\n", r);
1204 amdgpu_device_resume(dev, true);
d38ceaf9 1205
d38ceaf9
AD
1206 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1207 drm_kms_helper_poll_enable(dev);
1208 } else {
dd4fa6c1 1209 pr_info("switched off\n");
d38ceaf9
AD
1210 drm_kms_helper_poll_disable(dev);
1211 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1212 amdgpu_device_suspend(dev, true);
1213 pci_save_state(dev->pdev);
1214 /* Shut down the device */
1215 pci_disable_device(dev->pdev);
1216 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1217 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1218 }
1219}
1220
1221/**
1222 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1223 *
1224 * @pdev: pci dev pointer
1225 *
1226 * Callback for the switcheroo driver. Check of the switcheroo
1227 * state can be changed.
1228 * Returns true if the state can be changed, false if not.
1229 */
1230static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1231{
1232 struct drm_device *dev = pci_get_drvdata(pdev);
1233
1234 /*
1235 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1236 * locking inversion with the driver load path. And the access here is
1237 * completely racy anyway. So don't bother with locking for now.
1238 */
7e13ad89 1239 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1240}
1241
1242static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1243 .set_gpu_state = amdgpu_switcheroo_set_state,
1244 .reprobe = NULL,
1245 .can_switch = amdgpu_switcheroo_can_switch,
1246};
1247
e3ecdffa
AD
1248/**
1249 * amdgpu_device_ip_set_clockgating_state - set the CG state
1250 *
87e3f136 1251 * @dev: amdgpu_device pointer
e3ecdffa
AD
1252 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1253 * @state: clockgating state (gate or ungate)
1254 *
1255 * Sets the requested clockgating state for all instances of
1256 * the hardware IP specified.
1257 * Returns the error code from the last instance.
1258 */
43fa561f 1259int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1260 enum amd_ip_block_type block_type,
1261 enum amd_clockgating_state state)
d38ceaf9 1262{
43fa561f 1263 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1264 int i, r = 0;
1265
1266 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1267 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1268 continue;
c722865a
RZ
1269 if (adev->ip_blocks[i].version->type != block_type)
1270 continue;
1271 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1272 continue;
1273 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1274 (void *)adev, state);
1275 if (r)
1276 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1277 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1278 }
1279 return r;
1280}
1281
e3ecdffa
AD
1282/**
1283 * amdgpu_device_ip_set_powergating_state - set the PG state
1284 *
87e3f136 1285 * @dev: amdgpu_device pointer
e3ecdffa
AD
1286 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1287 * @state: powergating state (gate or ungate)
1288 *
1289 * Sets the requested powergating state for all instances of
1290 * the hardware IP specified.
1291 * Returns the error code from the last instance.
1292 */
43fa561f 1293int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1294 enum amd_ip_block_type block_type,
1295 enum amd_powergating_state state)
d38ceaf9 1296{
43fa561f 1297 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1298 int i, r = 0;
1299
1300 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1301 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1302 continue;
c722865a
RZ
1303 if (adev->ip_blocks[i].version->type != block_type)
1304 continue;
1305 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1306 continue;
1307 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1308 (void *)adev, state);
1309 if (r)
1310 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1311 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1312 }
1313 return r;
1314}
1315
e3ecdffa
AD
1316/**
1317 * amdgpu_device_ip_get_clockgating_state - get the CG state
1318 *
1319 * @adev: amdgpu_device pointer
1320 * @flags: clockgating feature flags
1321 *
1322 * Walks the list of IPs on the device and updates the clockgating
1323 * flags for each IP.
1324 * Updates @flags with the feature flags for each hardware IP where
1325 * clockgating is enabled.
1326 */
2990a1fc
AD
1327void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1328 u32 *flags)
6cb2d4e4
HR
1329{
1330 int i;
1331
1332 for (i = 0; i < adev->num_ip_blocks; i++) {
1333 if (!adev->ip_blocks[i].status.valid)
1334 continue;
1335 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1336 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1337 }
1338}
1339
e3ecdffa
AD
1340/**
1341 * amdgpu_device_ip_wait_for_idle - wait for idle
1342 *
1343 * @adev: amdgpu_device pointer
1344 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1345 *
1346 * Waits for the request hardware IP to be idle.
1347 * Returns 0 for success or a negative error code on failure.
1348 */
2990a1fc
AD
1349int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1350 enum amd_ip_block_type block_type)
5dbbb60b
AD
1351{
1352 int i, r;
1353
1354 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1355 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1356 continue;
a1255107
AD
1357 if (adev->ip_blocks[i].version->type == block_type) {
1358 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1359 if (r)
1360 return r;
1361 break;
1362 }
1363 }
1364 return 0;
1365
1366}
1367
e3ecdffa
AD
1368/**
1369 * amdgpu_device_ip_is_idle - is the hardware IP idle
1370 *
1371 * @adev: amdgpu_device pointer
1372 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1373 *
1374 * Check if the hardware IP is idle or not.
1375 * Returns true if it the IP is idle, false if not.
1376 */
2990a1fc
AD
1377bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1378 enum amd_ip_block_type block_type)
5dbbb60b
AD
1379{
1380 int i;
1381
1382 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1383 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1384 continue;
a1255107
AD
1385 if (adev->ip_blocks[i].version->type == block_type)
1386 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1387 }
1388 return true;
1389
1390}
1391
e3ecdffa
AD
1392/**
1393 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1394 *
1395 * @adev: amdgpu_device pointer
87e3f136 1396 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1397 *
1398 * Returns a pointer to the hardware IP block structure
1399 * if it exists for the asic, otherwise NULL.
1400 */
2990a1fc
AD
1401struct amdgpu_ip_block *
1402amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1403 enum amd_ip_block_type type)
d38ceaf9
AD
1404{
1405 int i;
1406
1407 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1408 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1409 return &adev->ip_blocks[i];
1410
1411 return NULL;
1412}
1413
1414/**
2990a1fc 1415 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1416 *
1417 * @adev: amdgpu_device pointer
5fc3aeeb 1418 * @type: enum amd_ip_block_type
d38ceaf9
AD
1419 * @major: major version
1420 * @minor: minor version
1421 *
1422 * return 0 if equal or greater
1423 * return 1 if smaller or the ip_block doesn't exist
1424 */
2990a1fc
AD
1425int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1426 enum amd_ip_block_type type,
1427 u32 major, u32 minor)
d38ceaf9 1428{
2990a1fc 1429 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1430
a1255107
AD
1431 if (ip_block && ((ip_block->version->major > major) ||
1432 ((ip_block->version->major == major) &&
1433 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1434 return 0;
1435
1436 return 1;
1437}
1438
a1255107 1439/**
2990a1fc 1440 * amdgpu_device_ip_block_add
a1255107
AD
1441 *
1442 * @adev: amdgpu_device pointer
1443 * @ip_block_version: pointer to the IP to add
1444 *
1445 * Adds the IP block driver information to the collection of IPs
1446 * on the asic.
1447 */
2990a1fc
AD
1448int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1449 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1450{
1451 if (!ip_block_version)
1452 return -EINVAL;
1453
e966a725 1454 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1455 ip_block_version->funcs->name);
1456
a1255107
AD
1457 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1458
1459 return 0;
1460}
1461
e3ecdffa
AD
1462/**
1463 * amdgpu_device_enable_virtual_display - enable virtual display feature
1464 *
1465 * @adev: amdgpu_device pointer
1466 *
1467 * Enabled the virtual display feature if the user has enabled it via
1468 * the module parameter virtual_display. This feature provides a virtual
1469 * display hardware on headless boards or in virtualized environments.
1470 * This function parses and validates the configuration string specified by
1471 * the user and configues the virtual display configuration (number of
1472 * virtual connectors, crtcs, etc.) specified.
1473 */
483ef985 1474static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1475{
1476 adev->enable_virtual_display = false;
1477
1478 if (amdgpu_virtual_display) {
1479 struct drm_device *ddev = adev->ddev;
1480 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1481 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1482
1483 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1484 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1485 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1486 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1487 if (!strcmp("all", pciaddname)
1488 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1489 long num_crtc;
1490 int res = -1;
1491
9accf2fd 1492 adev->enable_virtual_display = true;
0f66356d
ED
1493
1494 if (pciaddname_tmp)
1495 res = kstrtol(pciaddname_tmp, 10,
1496 &num_crtc);
1497
1498 if (!res) {
1499 if (num_crtc < 1)
1500 num_crtc = 1;
1501 if (num_crtc > 6)
1502 num_crtc = 6;
1503 adev->mode_info.num_crtc = num_crtc;
1504 } else {
1505 adev->mode_info.num_crtc = 1;
1506 }
9accf2fd
ED
1507 break;
1508 }
1509 }
1510
0f66356d
ED
1511 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1512 amdgpu_virtual_display, pci_address_name,
1513 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1514
1515 kfree(pciaddstr);
1516 }
1517}
1518
e3ecdffa
AD
1519/**
1520 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1521 *
1522 * @adev: amdgpu_device pointer
1523 *
1524 * Parses the asic configuration parameters specified in the gpu info
1525 * firmware and makes them availale to the driver for use in configuring
1526 * the asic.
1527 * Returns 0 on success, -EINVAL on failure.
1528 */
e2a75f88
AD
1529static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1530{
e2a75f88 1531 const char *chip_name;
c0a43457 1532 char fw_name[40];
e2a75f88
AD
1533 int err;
1534 const struct gpu_info_firmware_header_v1_0 *hdr;
1535
ab4fe3e1
HR
1536 adev->firmware.gpu_info_fw = NULL;
1537
4292b0b2 1538 if (adev->discovery_bin) {
258620d0 1539 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1540
1541 /*
1542 * FIXME: The bounding box is still needed by Navi12, so
1543 * temporarily read it from gpu_info firmware. Should be droped
1544 * when DAL no longer needs it.
1545 */
1546 if (adev->asic_type != CHIP_NAVI12)
1547 return 0;
258620d0
AD
1548 }
1549
e2a75f88 1550 switch (adev->asic_type) {
e2a75f88
AD
1551#ifdef CONFIG_DRM_AMDGPU_SI
1552 case CHIP_VERDE:
1553 case CHIP_TAHITI:
1554 case CHIP_PITCAIRN:
1555 case CHIP_OLAND:
1556 case CHIP_HAINAN:
1557#endif
1558#ifdef CONFIG_DRM_AMDGPU_CIK
1559 case CHIP_BONAIRE:
1560 case CHIP_HAWAII:
1561 case CHIP_KAVERI:
1562 case CHIP_KABINI:
1563 case CHIP_MULLINS:
1564#endif
da87c30b
AD
1565 case CHIP_TOPAZ:
1566 case CHIP_TONGA:
1567 case CHIP_FIJI:
1568 case CHIP_POLARIS10:
1569 case CHIP_POLARIS11:
1570 case CHIP_POLARIS12:
1571 case CHIP_VEGAM:
1572 case CHIP_CARRIZO:
1573 case CHIP_STONEY:
27c0bc71 1574 case CHIP_VEGA20:
e2a75f88
AD
1575 default:
1576 return 0;
1577 case CHIP_VEGA10:
1578 chip_name = "vega10";
1579 break;
3f76dced
AD
1580 case CHIP_VEGA12:
1581 chip_name = "vega12";
1582 break;
2d2e5e7e 1583 case CHIP_RAVEN:
54f78a76 1584 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1585 chip_name = "raven2";
54f78a76 1586 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1587 chip_name = "picasso";
54c4d17e
FX
1588 else
1589 chip_name = "raven";
2d2e5e7e 1590 break;
65e60f6e
LM
1591 case CHIP_ARCTURUS:
1592 chip_name = "arcturus";
1593 break;
b51a26a0
HR
1594 case CHIP_RENOIR:
1595 chip_name = "renoir";
1596 break;
23c6268e
HR
1597 case CHIP_NAVI10:
1598 chip_name = "navi10";
1599 break;
ed42cfe1
XY
1600 case CHIP_NAVI14:
1601 chip_name = "navi14";
1602 break;
42b325e5
XY
1603 case CHIP_NAVI12:
1604 chip_name = "navi12";
1605 break;
c0a43457
LG
1606 case CHIP_SIENNA_CICHLID:
1607 chip_name = "sienna_cichlid";
1608 break;
e2a75f88
AD
1609 }
1610
1611 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1612 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1613 if (err) {
1614 dev_err(adev->dev,
1615 "Failed to load gpu_info firmware \"%s\"\n",
1616 fw_name);
1617 goto out;
1618 }
ab4fe3e1 1619 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1620 if (err) {
1621 dev_err(adev->dev,
1622 "Failed to validate gpu_info firmware \"%s\"\n",
1623 fw_name);
1624 goto out;
1625 }
1626
ab4fe3e1 1627 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1628 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1629
1630 switch (hdr->version_major) {
1631 case 1:
1632 {
1633 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1634 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1635 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1636
cc375d8c
TY
1637 /*
1638 * Should be droped when DAL no longer needs it.
1639 */
1640 if (adev->asic_type == CHIP_NAVI12)
1641 goto parse_soc_bounding_box;
1642
b5ab16bf
AD
1643 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1644 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1645 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1646 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1647 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1648 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1649 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1650 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1651 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1652 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1653 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1654 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1655 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1656 adev->gfx.cu_info.max_waves_per_simd =
1657 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1658 adev->gfx.cu_info.max_scratch_slots_per_cu =
1659 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1660 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1661 if (hdr->version_minor >= 1) {
35c2e910
HZ
1662 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1663 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1664 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1665 adev->gfx.config.num_sc_per_sh =
1666 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1667 adev->gfx.config.num_packer_per_sc =
1668 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1669 }
ec51d3fa 1670
cc375d8c 1671parse_soc_bounding_box:
ec51d3fa
XY
1672 /*
1673 * soc bounding box info is not integrated in disocovery table,
258620d0 1674 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1675 */
48321c3d
HW
1676 if (hdr->version_minor == 2) {
1677 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1678 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1679 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1680 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1681 }
e2a75f88
AD
1682 break;
1683 }
1684 default:
1685 dev_err(adev->dev,
1686 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1687 err = -EINVAL;
1688 goto out;
1689 }
1690out:
e2a75f88
AD
1691 return err;
1692}
1693
e3ecdffa
AD
1694/**
1695 * amdgpu_device_ip_early_init - run early init for hardware IPs
1696 *
1697 * @adev: amdgpu_device pointer
1698 *
1699 * Early initialization pass for hardware IPs. The hardware IPs that make
1700 * up each asic are discovered each IP's early_init callback is run. This
1701 * is the first stage in initializing the asic.
1702 * Returns 0 on success, negative error code on failure.
1703 */
06ec9070 1704static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1705{
aaa36a97 1706 int i, r;
d38ceaf9 1707
483ef985 1708 amdgpu_device_enable_virtual_display(adev);
a6be7570 1709
d38ceaf9 1710 switch (adev->asic_type) {
33f34802
KW
1711#ifdef CONFIG_DRM_AMDGPU_SI
1712 case CHIP_VERDE:
1713 case CHIP_TAHITI:
1714 case CHIP_PITCAIRN:
1715 case CHIP_OLAND:
1716 case CHIP_HAINAN:
295d0daf 1717 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1718 r = si_set_ip_blocks(adev);
1719 if (r)
1720 return r;
1721 break;
1722#endif
a2e73f56
AD
1723#ifdef CONFIG_DRM_AMDGPU_CIK
1724 case CHIP_BONAIRE:
1725 case CHIP_HAWAII:
1726 case CHIP_KAVERI:
1727 case CHIP_KABINI:
1728 case CHIP_MULLINS:
e1ad2d53 1729 if (adev->flags & AMD_IS_APU)
a2e73f56 1730 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1731 else
1732 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1733
1734 r = cik_set_ip_blocks(adev);
1735 if (r)
1736 return r;
1737 break;
1738#endif
da87c30b
AD
1739 case CHIP_TOPAZ:
1740 case CHIP_TONGA:
1741 case CHIP_FIJI:
1742 case CHIP_POLARIS10:
1743 case CHIP_POLARIS11:
1744 case CHIP_POLARIS12:
1745 case CHIP_VEGAM:
1746 case CHIP_CARRIZO:
1747 case CHIP_STONEY:
1748 if (adev->flags & AMD_IS_APU)
1749 adev->family = AMDGPU_FAMILY_CZ;
1750 else
1751 adev->family = AMDGPU_FAMILY_VI;
1752
1753 r = vi_set_ip_blocks(adev);
1754 if (r)
1755 return r;
1756 break;
e48a3cd9
AD
1757 case CHIP_VEGA10:
1758 case CHIP_VEGA12:
e4bd8170 1759 case CHIP_VEGA20:
e48a3cd9 1760 case CHIP_RAVEN:
61cf44c1 1761 case CHIP_ARCTURUS:
b51a26a0 1762 case CHIP_RENOIR:
70534d1e 1763 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1764 adev->family = AMDGPU_FAMILY_RV;
1765 else
1766 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1767
1768 r = soc15_set_ip_blocks(adev);
1769 if (r)
1770 return r;
1771 break;
0a5b8c7b 1772 case CHIP_NAVI10:
7ecb5cd4 1773 case CHIP_NAVI14:
4808cf9c 1774 case CHIP_NAVI12:
11e8aef5 1775 case CHIP_SIENNA_CICHLID:
0a5b8c7b
HR
1776 adev->family = AMDGPU_FAMILY_NV;
1777
1778 r = nv_set_ip_blocks(adev);
1779 if (r)
1780 return r;
1781 break;
d38ceaf9
AD
1782 default:
1783 /* FIXME: not supported yet */
1784 return -EINVAL;
1785 }
1786
1884734a 1787 amdgpu_amdkfd_device_probe(adev);
1788
3149d9da 1789 if (amdgpu_sriov_vf(adev)) {
122078de
ML
1790 /* handle vbios stuff prior full access mode for new handshake */
1791 if (adev->virt.req_init_data_ver == 1) {
1792 if (!amdgpu_get_bios(adev)) {
1793 DRM_ERROR("failed to get vbios\n");
1794 return -EINVAL;
1795 }
1796
1797 r = amdgpu_atombios_init(adev);
1798 if (r) {
1799 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1800 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1801 return r;
1802 }
1803 }
2f294132 1804 }
122078de 1805
2f294132
ML
1806 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1807 * will not be prepared by host for this VF */
1808 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
3149d9da
XY
1809 r = amdgpu_virt_request_full_gpu(adev, true);
1810 if (r)
2f294132 1811 return r;
3149d9da
XY
1812 }
1813
3b94fb10 1814 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1815 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1816 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1817
d38ceaf9
AD
1818 for (i = 0; i < adev->num_ip_blocks; i++) {
1819 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1820 DRM_ERROR("disabled ip block: %d <%s>\n",
1821 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1822 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1823 } else {
a1255107
AD
1824 if (adev->ip_blocks[i].version->funcs->early_init) {
1825 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1826 if (r == -ENOENT) {
a1255107 1827 adev->ip_blocks[i].status.valid = false;
2c1a2784 1828 } else if (r) {
a1255107
AD
1829 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1830 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1831 return r;
2c1a2784 1832 } else {
a1255107 1833 adev->ip_blocks[i].status.valid = true;
2c1a2784 1834 }
974e6b64 1835 } else {
a1255107 1836 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1837 }
d38ceaf9 1838 }
21a249ca
AD
1839 /* get the vbios after the asic_funcs are set up */
1840 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
1841 r = amdgpu_device_parse_gpu_info_fw(adev);
1842 if (r)
1843 return r;
1844
122078de
ML
1845 /* skip vbios handling for new handshake */
1846 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1847 continue;
1848
21a249ca
AD
1849 /* Read BIOS */
1850 if (!amdgpu_get_bios(adev))
1851 return -EINVAL;
1852
1853 r = amdgpu_atombios_init(adev);
1854 if (r) {
1855 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1856 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1857 return r;
1858 }
1859 }
d38ceaf9
AD
1860 }
1861
395d1fb9
NH
1862 adev->cg_flags &= amdgpu_cg_mask;
1863 adev->pg_flags &= amdgpu_pg_mask;
1864
d38ceaf9
AD
1865 return 0;
1866}
1867
0a4f2520
RZ
1868static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1869{
1870 int i, r;
1871
1872 for (i = 0; i < adev->num_ip_blocks; i++) {
1873 if (!adev->ip_blocks[i].status.sw)
1874 continue;
1875 if (adev->ip_blocks[i].status.hw)
1876 continue;
1877 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1878 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1879 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1880 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1881 if (r) {
1882 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1883 adev->ip_blocks[i].version->funcs->name, r);
1884 return r;
1885 }
1886 adev->ip_blocks[i].status.hw = true;
1887 }
1888 }
1889
1890 return 0;
1891}
1892
1893static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1894{
1895 int i, r;
1896
1897 for (i = 0; i < adev->num_ip_blocks; i++) {
1898 if (!adev->ip_blocks[i].status.sw)
1899 continue;
1900 if (adev->ip_blocks[i].status.hw)
1901 continue;
1902 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1903 if (r) {
1904 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1905 adev->ip_blocks[i].version->funcs->name, r);
1906 return r;
1907 }
1908 adev->ip_blocks[i].status.hw = true;
1909 }
1910
1911 return 0;
1912}
1913
7a3e0bb2
RZ
1914static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1915{
1916 int r = 0;
1917 int i;
80f41f84 1918 uint32_t smu_version;
7a3e0bb2
RZ
1919
1920 if (adev->asic_type >= CHIP_VEGA10) {
1921 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1922 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1923 continue;
1924
1925 /* no need to do the fw loading again if already done*/
1926 if (adev->ip_blocks[i].status.hw == true)
1927 break;
1928
1929 if (adev->in_gpu_reset || adev->in_suspend) {
1930 r = adev->ip_blocks[i].version->funcs->resume(adev);
1931 if (r) {
1932 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1933 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1934 return r;
1935 }
1936 } else {
1937 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1938 if (r) {
1939 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1940 adev->ip_blocks[i].version->funcs->name, r);
1941 return r;
7a3e0bb2 1942 }
7a3e0bb2 1943 }
482f0e53
ML
1944
1945 adev->ip_blocks[i].status.hw = true;
1946 break;
7a3e0bb2
RZ
1947 }
1948 }
482f0e53 1949
8973d9ec
ED
1950 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1951 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1952
80f41f84 1953 return r;
7a3e0bb2
RZ
1954}
1955
e3ecdffa
AD
1956/**
1957 * amdgpu_device_ip_init - run init for hardware IPs
1958 *
1959 * @adev: amdgpu_device pointer
1960 *
1961 * Main initialization pass for hardware IPs. The list of all the hardware
1962 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1963 * are run. sw_init initializes the software state associated with each IP
1964 * and hw_init initializes the hardware associated with each IP.
1965 * Returns 0 on success, negative error code on failure.
1966 */
06ec9070 1967static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1968{
1969 int i, r;
1970
c030f2e4 1971 r = amdgpu_ras_init(adev);
1972 if (r)
1973 return r;
1974
2f294132
ML
1975 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1976 r = amdgpu_virt_request_full_gpu(adev, true);
1977 if (r)
1978 return -EAGAIN;
1979 }
1980
d38ceaf9 1981 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1982 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1983 continue;
a1255107 1984 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1985 if (r) {
a1255107
AD
1986 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1987 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1988 goto init_failed;
2c1a2784 1989 }
a1255107 1990 adev->ip_blocks[i].status.sw = true;
bfca0289 1991
d38ceaf9 1992 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1993 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1994 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1995 if (r) {
1996 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1997 goto init_failed;
2c1a2784 1998 }
a1255107 1999 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2000 if (r) {
2001 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2002 goto init_failed;
2c1a2784 2003 }
06ec9070 2004 r = amdgpu_device_wb_init(adev);
2c1a2784 2005 if (r) {
06ec9070 2006 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2007 goto init_failed;
2c1a2784 2008 }
a1255107 2009 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2010
2011 /* right after GMC hw init, we create CSA */
f92d5c61 2012 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2013 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2014 AMDGPU_GEM_DOMAIN_VRAM,
2015 AMDGPU_CSA_SIZE);
2493664f
ML
2016 if (r) {
2017 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2018 goto init_failed;
2493664f
ML
2019 }
2020 }
d38ceaf9
AD
2021 }
2022 }
2023
c9ffa427
YT
2024 if (amdgpu_sriov_vf(adev))
2025 amdgpu_virt_init_data_exchange(adev);
2026
533aed27
AG
2027 r = amdgpu_ib_pool_init(adev);
2028 if (r) {
2029 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2030 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2031 goto init_failed;
2032 }
2033
c8963ea4
RZ
2034 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2035 if (r)
72d3f592 2036 goto init_failed;
0a4f2520
RZ
2037
2038 r = amdgpu_device_ip_hw_init_phase1(adev);
2039 if (r)
72d3f592 2040 goto init_failed;
0a4f2520 2041
7a3e0bb2
RZ
2042 r = amdgpu_device_fw_loading(adev);
2043 if (r)
72d3f592 2044 goto init_failed;
7a3e0bb2 2045
0a4f2520
RZ
2046 r = amdgpu_device_ip_hw_init_phase2(adev);
2047 if (r)
72d3f592 2048 goto init_failed;
d38ceaf9 2049
121a2bc6
AG
2050 /*
2051 * retired pages will be loaded from eeprom and reserved here,
2052 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2053 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2054 * for I2C communication which only true at this point.
2055 * recovery_init may fail, but it can free all resources allocated by
2056 * itself and its failure should not stop amdgpu init process.
2057 *
2058 * Note: theoretically, this should be called before all vram allocations
2059 * to protect retired page from abusing
2060 */
2061 amdgpu_ras_recovery_init(adev);
2062
3e2e2ab5
HZ
2063 if (adev->gmc.xgmi.num_physical_nodes > 1)
2064 amdgpu_xgmi_add_device(adev);
1884734a 2065 amdgpu_amdkfd_device_init(adev);
c6332b97 2066
bd607166
KR
2067 amdgpu_fru_get_product_info(adev);
2068
72d3f592 2069init_failed:
c9ffa427 2070 if (amdgpu_sriov_vf(adev))
c6332b97 2071 amdgpu_virt_release_full_gpu(adev, true);
2072
72d3f592 2073 return r;
d38ceaf9
AD
2074}
2075
e3ecdffa
AD
2076/**
2077 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2078 *
2079 * @adev: amdgpu_device pointer
2080 *
2081 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2082 * this function before a GPU reset. If the value is retained after a
2083 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2084 */
06ec9070 2085static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2086{
2087 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2088}
2089
e3ecdffa
AD
2090/**
2091 * amdgpu_device_check_vram_lost - check if vram is valid
2092 *
2093 * @adev: amdgpu_device pointer
2094 *
2095 * Checks the reset magic value written to the gart pointer in VRAM.
2096 * The driver calls this after a GPU reset to see if the contents of
2097 * VRAM is lost or now.
2098 * returns true if vram is lost, false if not.
2099 */
06ec9070 2100static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2101{
dadce777
EQ
2102 if (memcmp(adev->gart.ptr, adev->reset_magic,
2103 AMDGPU_RESET_MAGIC_NUM))
2104 return true;
2105
2106 if (!adev->in_gpu_reset)
2107 return false;
2108
2109 /*
2110 * For all ASICs with baco/mode1 reset, the VRAM is
2111 * always assumed to be lost.
2112 */
2113 switch (amdgpu_asic_reset_method(adev)) {
2114 case AMD_RESET_METHOD_BACO:
2115 case AMD_RESET_METHOD_MODE1:
2116 return true;
2117 default:
2118 return false;
2119 }
0c49e0b8
CZ
2120}
2121
e3ecdffa 2122/**
1112a46b 2123 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2124 *
2125 * @adev: amdgpu_device pointer
b8b72130 2126 * @state: clockgating state (gate or ungate)
e3ecdffa 2127 *
e3ecdffa 2128 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2129 * set_clockgating_state callbacks are run.
2130 * Late initialization pass enabling clockgating for hardware IPs.
2131 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2132 * Returns 0 on success, negative error code on failure.
2133 */
fdd34271 2134
1112a46b
RZ
2135static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2136 enum amd_clockgating_state state)
d38ceaf9 2137{
1112a46b 2138 int i, j, r;
d38ceaf9 2139
4a2ba394
SL
2140 if (amdgpu_emu_mode == 1)
2141 return 0;
2142
1112a46b
RZ
2143 for (j = 0; j < adev->num_ip_blocks; j++) {
2144 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2145 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2146 continue;
4a446d55 2147 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2148 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2149 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2150 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2151 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2152 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2153 /* enable clockgating to save power */
a1255107 2154 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2155 state);
4a446d55
AD
2156 if (r) {
2157 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2158 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2159 return r;
2160 }
b0b00ff1 2161 }
d38ceaf9 2162 }
06b18f61 2163
c9f96fd5
RZ
2164 return 0;
2165}
2166
1112a46b 2167static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2168{
1112a46b 2169 int i, j, r;
06b18f61 2170
c9f96fd5
RZ
2171 if (amdgpu_emu_mode == 1)
2172 return 0;
2173
1112a46b
RZ
2174 for (j = 0; j < adev->num_ip_blocks; j++) {
2175 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2176 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2177 continue;
2178 /* skip CG for VCE/UVD, it's handled specially */
2179 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2180 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2181 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2182 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2183 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2184 /* enable powergating to save power */
2185 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2186 state);
c9f96fd5
RZ
2187 if (r) {
2188 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2189 adev->ip_blocks[i].version->funcs->name, r);
2190 return r;
2191 }
2192 }
2193 }
2dc80b00
S
2194 return 0;
2195}
2196
beff74bc
AD
2197static int amdgpu_device_enable_mgpu_fan_boost(void)
2198{
2199 struct amdgpu_gpu_instance *gpu_ins;
2200 struct amdgpu_device *adev;
2201 int i, ret = 0;
2202
2203 mutex_lock(&mgpu_info.mutex);
2204
2205 /*
2206 * MGPU fan boost feature should be enabled
2207 * only when there are two or more dGPUs in
2208 * the system
2209 */
2210 if (mgpu_info.num_dgpu < 2)
2211 goto out;
2212
2213 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2214 gpu_ins = &(mgpu_info.gpu_ins[i]);
2215 adev = gpu_ins->adev;
2216 if (!(adev->flags & AMD_IS_APU) &&
2217 !gpu_ins->mgpu_fan_enabled &&
2218 adev->powerplay.pp_funcs &&
2219 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2220 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2221 if (ret)
2222 break;
2223
2224 gpu_ins->mgpu_fan_enabled = 1;
2225 }
2226 }
2227
2228out:
2229 mutex_unlock(&mgpu_info.mutex);
2230
2231 return ret;
2232}
2233
e3ecdffa
AD
2234/**
2235 * amdgpu_device_ip_late_init - run late init for hardware IPs
2236 *
2237 * @adev: amdgpu_device pointer
2238 *
2239 * Late initialization pass for hardware IPs. The list of all the hardware
2240 * IPs that make up the asic is walked and the late_init callbacks are run.
2241 * late_init covers any special initialization that an IP requires
2242 * after all of the have been initialized or something that needs to happen
2243 * late in the init process.
2244 * Returns 0 on success, negative error code on failure.
2245 */
06ec9070 2246static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2247{
60599a03 2248 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2249 int i = 0, r;
2250
2251 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2252 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2253 continue;
2254 if (adev->ip_blocks[i].version->funcs->late_init) {
2255 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2256 if (r) {
2257 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2258 adev->ip_blocks[i].version->funcs->name, r);
2259 return r;
2260 }
2dc80b00 2261 }
73f847db 2262 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2263 }
2264
a891d239
DL
2265 amdgpu_ras_set_error_query_ready(adev, true);
2266
1112a46b
RZ
2267 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2268 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2269
06ec9070 2270 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2271
beff74bc
AD
2272 r = amdgpu_device_enable_mgpu_fan_boost();
2273 if (r)
2274 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2275
60599a03
EQ
2276
2277 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2278 mutex_lock(&mgpu_info.mutex);
2279
2280 /*
2281 * Reset device p-state to low as this was booted with high.
2282 *
2283 * This should be performed only after all devices from the same
2284 * hive get initialized.
2285 *
2286 * However, it's unknown how many device in the hive in advance.
2287 * As this is counted one by one during devices initializations.
2288 *
2289 * So, we wait for all XGMI interlinked devices initialized.
2290 * This may bring some delays as those devices may come from
2291 * different hives. But that should be OK.
2292 */
2293 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2294 for (i = 0; i < mgpu_info.num_gpu; i++) {
2295 gpu_instance = &(mgpu_info.gpu_ins[i]);
2296 if (gpu_instance->adev->flags & AMD_IS_APU)
2297 continue;
2298
d84a430d
JK
2299 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2300 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2301 if (r) {
2302 DRM_ERROR("pstate setting failed (%d).\n", r);
2303 break;
2304 }
2305 }
2306 }
2307
2308 mutex_unlock(&mgpu_info.mutex);
2309 }
2310
d38ceaf9
AD
2311 return 0;
2312}
2313
e3ecdffa
AD
2314/**
2315 * amdgpu_device_ip_fini - run fini for hardware IPs
2316 *
2317 * @adev: amdgpu_device pointer
2318 *
2319 * Main teardown pass for hardware IPs. The list of all the hardware
2320 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2321 * are run. hw_fini tears down the hardware associated with each IP
2322 * and sw_fini tears down any software state associated with each IP.
2323 * Returns 0 on success, negative error code on failure.
2324 */
06ec9070 2325static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2326{
2327 int i, r;
2328
5278a159
SY
2329 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2330 amdgpu_virt_release_ras_err_handler_data(adev);
2331
c030f2e4 2332 amdgpu_ras_pre_fini(adev);
2333
a82400b5
AG
2334 if (adev->gmc.xgmi.num_physical_nodes > 1)
2335 amdgpu_xgmi_remove_device(adev);
2336
1884734a 2337 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2338
2339 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2340 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2341
3e96dbfd
AD
2342 /* need to disable SMC first */
2343 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2344 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2345 continue;
fdd34271 2346 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2347 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2348 /* XXX handle errors */
2349 if (r) {
2350 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2351 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2352 }
a1255107 2353 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2354 break;
2355 }
2356 }
2357
d38ceaf9 2358 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2359 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2360 continue;
8201a67a 2361
a1255107 2362 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2363 /* XXX handle errors */
2c1a2784 2364 if (r) {
a1255107
AD
2365 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2366 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2367 }
8201a67a 2368
a1255107 2369 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2370 }
2371
9950cda2 2372
d38ceaf9 2373 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2374 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2375 continue;
c12aba3a
ML
2376
2377 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2378 amdgpu_ucode_free_bo(adev);
1e256e27 2379 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2380 amdgpu_device_wb_fini(adev);
2381 amdgpu_device_vram_scratch_fini(adev);
533aed27 2382 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2383 }
2384
a1255107 2385 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2386 /* XXX handle errors */
2c1a2784 2387 if (r) {
a1255107
AD
2388 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2389 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2390 }
a1255107
AD
2391 adev->ip_blocks[i].status.sw = false;
2392 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2393 }
2394
a6dcfd9c 2395 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2396 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2397 continue;
a1255107
AD
2398 if (adev->ip_blocks[i].version->funcs->late_fini)
2399 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2400 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2401 }
2402
c030f2e4 2403 amdgpu_ras_fini(adev);
2404
030308fc 2405 if (amdgpu_sriov_vf(adev))
24136135
ML
2406 if (amdgpu_virt_release_full_gpu(adev, false))
2407 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2408
d38ceaf9
AD
2409 return 0;
2410}
2411
e3ecdffa 2412/**
beff74bc 2413 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2414 *
1112a46b 2415 * @work: work_struct.
e3ecdffa 2416 */
beff74bc 2417static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2418{
2419 struct amdgpu_device *adev =
beff74bc 2420 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2421 int r;
2422
2423 r = amdgpu_ib_ring_tests(adev);
2424 if (r)
2425 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2426}
2427
1e317b99
RZ
2428static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2429{
2430 struct amdgpu_device *adev =
2431 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2432
2433 mutex_lock(&adev->gfx.gfx_off_mutex);
2434 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2435 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2436 adev->gfx.gfx_off_state = true;
2437 }
2438 mutex_unlock(&adev->gfx.gfx_off_mutex);
2439}
2440
e3ecdffa 2441/**
e7854a03 2442 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2443 *
2444 * @adev: amdgpu_device pointer
2445 *
2446 * Main suspend function for hardware IPs. The list of all the hardware
2447 * IPs that make up the asic is walked, clockgating is disabled and the
2448 * suspend callbacks are run. suspend puts the hardware and software state
2449 * in each IP into a state suitable for suspend.
2450 * Returns 0 on success, negative error code on failure.
2451 */
e7854a03
AD
2452static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2453{
2454 int i, r;
2455
ced1ba97
PL
2456 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2457 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2458
e7854a03
AD
2459 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2460 if (!adev->ip_blocks[i].status.valid)
2461 continue;
2462 /* displays are handled separately */
2463 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2464 /* XXX handle errors */
2465 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2466 /* XXX handle errors */
2467 if (r) {
2468 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2469 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2470 return r;
e7854a03 2471 }
482f0e53 2472 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2473 }
2474 }
2475
e7854a03
AD
2476 return 0;
2477}
2478
2479/**
2480 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2481 *
2482 * @adev: amdgpu_device pointer
2483 *
2484 * Main suspend function for hardware IPs. The list of all the hardware
2485 * IPs that make up the asic is walked, clockgating is disabled and the
2486 * suspend callbacks are run. suspend puts the hardware and software state
2487 * in each IP into a state suitable for suspend.
2488 * Returns 0 on success, negative error code on failure.
2489 */
2490static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2491{
2492 int i, r;
2493
2494 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2495 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2496 continue;
e7854a03
AD
2497 /* displays are handled in phase1 */
2498 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2499 continue;
bff77e86
LM
2500 /* PSP lost connection when err_event_athub occurs */
2501 if (amdgpu_ras_intr_triggered() &&
2502 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2503 adev->ip_blocks[i].status.hw = false;
2504 continue;
2505 }
d38ceaf9 2506 /* XXX handle errors */
a1255107 2507 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2508 /* XXX handle errors */
2c1a2784 2509 if (r) {
a1255107
AD
2510 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2511 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2512 }
876923fb 2513 adev->ip_blocks[i].status.hw = false;
a3a09142 2514 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2515 if(!amdgpu_sriov_vf(adev)){
2516 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2517 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2518 if (r) {
2519 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2520 adev->mp1_state, r);
2521 return r;
2522 }
a3a09142
AD
2523 }
2524 }
b5507c7e 2525 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2526 }
2527
2528 return 0;
2529}
2530
e7854a03
AD
2531/**
2532 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2533 *
2534 * @adev: amdgpu_device pointer
2535 *
2536 * Main suspend function for hardware IPs. The list of all the hardware
2537 * IPs that make up the asic is walked, clockgating is disabled and the
2538 * suspend callbacks are run. suspend puts the hardware and software state
2539 * in each IP into a state suitable for suspend.
2540 * Returns 0 on success, negative error code on failure.
2541 */
2542int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2543{
2544 int r;
2545
e7819644
YT
2546 if (amdgpu_sriov_vf(adev))
2547 amdgpu_virt_request_full_gpu(adev, false);
2548
e7854a03
AD
2549 r = amdgpu_device_ip_suspend_phase1(adev);
2550 if (r)
2551 return r;
2552 r = amdgpu_device_ip_suspend_phase2(adev);
2553
e7819644
YT
2554 if (amdgpu_sriov_vf(adev))
2555 amdgpu_virt_release_full_gpu(adev, false);
2556
e7854a03
AD
2557 return r;
2558}
2559
06ec9070 2560static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2561{
2562 int i, r;
2563
2cb681b6
ML
2564 static enum amd_ip_block_type ip_order[] = {
2565 AMD_IP_BLOCK_TYPE_GMC,
2566 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2567 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2568 AMD_IP_BLOCK_TYPE_IH,
2569 };
a90ad3c2 2570
2cb681b6
ML
2571 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2572 int j;
2573 struct amdgpu_ip_block *block;
a90ad3c2 2574
2cb681b6
ML
2575 for (j = 0; j < adev->num_ip_blocks; j++) {
2576 block = &adev->ip_blocks[j];
2577
482f0e53 2578 block->status.hw = false;
2cb681b6
ML
2579 if (block->version->type != ip_order[i] ||
2580 !block->status.valid)
2581 continue;
2582
2583 r = block->version->funcs->hw_init(adev);
0aaeefcc 2584 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2585 if (r)
2586 return r;
482f0e53 2587 block->status.hw = true;
a90ad3c2
ML
2588 }
2589 }
2590
2591 return 0;
2592}
2593
06ec9070 2594static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2595{
2596 int i, r;
2597
2cb681b6
ML
2598 static enum amd_ip_block_type ip_order[] = {
2599 AMD_IP_BLOCK_TYPE_SMC,
2600 AMD_IP_BLOCK_TYPE_DCE,
2601 AMD_IP_BLOCK_TYPE_GFX,
2602 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2603 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2604 AMD_IP_BLOCK_TYPE_VCE,
2605 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2606 };
a90ad3c2 2607
2cb681b6
ML
2608 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2609 int j;
2610 struct amdgpu_ip_block *block;
a90ad3c2 2611
2cb681b6
ML
2612 for (j = 0; j < adev->num_ip_blocks; j++) {
2613 block = &adev->ip_blocks[j];
2614
2615 if (block->version->type != ip_order[i] ||
482f0e53
ML
2616 !block->status.valid ||
2617 block->status.hw)
2cb681b6
ML
2618 continue;
2619
895bd048
JZ
2620 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2621 r = block->version->funcs->resume(adev);
2622 else
2623 r = block->version->funcs->hw_init(adev);
2624
0aaeefcc 2625 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2626 if (r)
2627 return r;
482f0e53 2628 block->status.hw = true;
a90ad3c2
ML
2629 }
2630 }
2631
2632 return 0;
2633}
2634
e3ecdffa
AD
2635/**
2636 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2637 *
2638 * @adev: amdgpu_device pointer
2639 *
2640 * First resume function for hardware IPs. The list of all the hardware
2641 * IPs that make up the asic is walked and the resume callbacks are run for
2642 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2643 * after a suspend and updates the software state as necessary. This
2644 * function is also used for restoring the GPU after a GPU reset.
2645 * Returns 0 on success, negative error code on failure.
2646 */
06ec9070 2647static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2648{
2649 int i, r;
2650
a90ad3c2 2651 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2652 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2653 continue;
a90ad3c2 2654 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2655 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2656 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2657
fcf0649f
CZ
2658 r = adev->ip_blocks[i].version->funcs->resume(adev);
2659 if (r) {
2660 DRM_ERROR("resume of IP block <%s> failed %d\n",
2661 adev->ip_blocks[i].version->funcs->name, r);
2662 return r;
2663 }
482f0e53 2664 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2665 }
2666 }
2667
2668 return 0;
2669}
2670
e3ecdffa
AD
2671/**
2672 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2673 *
2674 * @adev: amdgpu_device pointer
2675 *
2676 * First resume function for hardware IPs. The list of all the hardware
2677 * IPs that make up the asic is walked and the resume callbacks are run for
2678 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2679 * functional state after a suspend and updates the software state as
2680 * necessary. This function is also used for restoring the GPU after a GPU
2681 * reset.
2682 * Returns 0 on success, negative error code on failure.
2683 */
06ec9070 2684static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2685{
2686 int i, r;
2687
2688 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2689 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2690 continue;
fcf0649f 2691 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2692 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2693 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2694 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2695 continue;
a1255107 2696 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2697 if (r) {
a1255107
AD
2698 DRM_ERROR("resume of IP block <%s> failed %d\n",
2699 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2700 return r;
2c1a2784 2701 }
482f0e53 2702 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2703 }
2704
2705 return 0;
2706}
2707
e3ecdffa
AD
2708/**
2709 * amdgpu_device_ip_resume - run resume for hardware IPs
2710 *
2711 * @adev: amdgpu_device pointer
2712 *
2713 * Main resume function for hardware IPs. The hardware IPs
2714 * are split into two resume functions because they are
2715 * are also used in in recovering from a GPU reset and some additional
2716 * steps need to be take between them. In this case (S3/S4) they are
2717 * run sequentially.
2718 * Returns 0 on success, negative error code on failure.
2719 */
06ec9070 2720static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2721{
2722 int r;
2723
06ec9070 2724 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2725 if (r)
2726 return r;
7a3e0bb2
RZ
2727
2728 r = amdgpu_device_fw_loading(adev);
2729 if (r)
2730 return r;
2731
06ec9070 2732 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2733
2734 return r;
2735}
2736
e3ecdffa
AD
2737/**
2738 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2739 *
2740 * @adev: amdgpu_device pointer
2741 *
2742 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2743 */
4e99a44e 2744static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2745{
6867e1b5
ML
2746 if (amdgpu_sriov_vf(adev)) {
2747 if (adev->is_atom_fw) {
2748 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2749 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2750 } else {
2751 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2752 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2753 }
2754
2755 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2756 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2757 }
048765ad
AR
2758}
2759
e3ecdffa
AD
2760/**
2761 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2762 *
2763 * @asic_type: AMD asic type
2764 *
2765 * Check if there is DC (new modesetting infrastructre) support for an asic.
2766 * returns true if DC has support, false if not.
2767 */
4562236b
HW
2768bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2769{
2770 switch (asic_type) {
2771#if defined(CONFIG_DRM_AMD_DC)
2772 case CHIP_BONAIRE:
0d6fbccb 2773 case CHIP_KAVERI:
367e6687
AD
2774 case CHIP_KABINI:
2775 case CHIP_MULLINS:
d9fda248
HW
2776 /*
2777 * We have systems in the wild with these ASICs that require
2778 * LVDS and VGA support which is not supported with DC.
2779 *
2780 * Fallback to the non-DC driver here by default so as not to
2781 * cause regressions.
2782 */
2783 return amdgpu_dc > 0;
2784 case CHIP_HAWAII:
4562236b
HW
2785 case CHIP_CARRIZO:
2786 case CHIP_STONEY:
4562236b 2787 case CHIP_POLARIS10:
675fd32b 2788 case CHIP_POLARIS11:
2c8ad2d5 2789 case CHIP_POLARIS12:
675fd32b 2790 case CHIP_VEGAM:
4562236b
HW
2791 case CHIP_TONGA:
2792 case CHIP_FIJI:
42f8ffa1 2793 case CHIP_VEGA10:
dca7b401 2794 case CHIP_VEGA12:
c6034aa2 2795 case CHIP_VEGA20:
b86a1aa3 2796#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2797 case CHIP_RAVEN:
b4f199c7 2798 case CHIP_NAVI10:
8fceceb6 2799 case CHIP_NAVI14:
078655d9 2800 case CHIP_NAVI12:
e1c14c43 2801 case CHIP_RENOIR:
81d9bfb8
JFZ
2802#endif
2803#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
2804 case CHIP_SIENNA_CICHLID:
42f8ffa1 2805#endif
fd187853 2806 return amdgpu_dc != 0;
4562236b
HW
2807#endif
2808 default:
93b09a9a
SS
2809 if (amdgpu_dc > 0)
2810 DRM_INFO("Display Core has been requested via kernel parameter "
2811 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2812 return false;
2813 }
2814}
2815
2816/**
2817 * amdgpu_device_has_dc_support - check if dc is supported
2818 *
2819 * @adev: amdgpu_device_pointer
2820 *
2821 * Returns true for supported, false for not supported
2822 */
2823bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2824{
2555039d
XY
2825 if (amdgpu_sriov_vf(adev))
2826 return false;
2827
4562236b
HW
2828 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2829}
2830
d4535e2c
AG
2831
2832static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2833{
2834 struct amdgpu_device *adev =
2835 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2836 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2837
c6a6e2db
AG
2838 /* It's a bug to not have a hive within this function */
2839 if (WARN_ON(!hive))
2840 return;
2841
2842 /*
2843 * Use task barrier to synchronize all xgmi reset works across the
2844 * hive. task_barrier_enter and task_barrier_exit will block
2845 * until all the threads running the xgmi reset works reach
2846 * those points. task_barrier_full will do both blocks.
2847 */
2848 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2849
2850 task_barrier_enter(&hive->tb);
2851 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2852
2853 if (adev->asic_reset_res)
2854 goto fail;
2855
2856 task_barrier_exit(&hive->tb);
2857 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2858
2859 if (adev->asic_reset_res)
2860 goto fail;
43c4d576
JC
2861
2862 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2863 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2864 } else {
2865
2866 task_barrier_full(&hive->tb);
2867 adev->asic_reset_res = amdgpu_asic_reset(adev);
2868 }
ce316fa5 2869
c6a6e2db 2870fail:
d4535e2c 2871 if (adev->asic_reset_res)
fed184e9 2872 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2873 adev->asic_reset_res, adev->ddev->unique);
2874}
2875
71f98027
AD
2876static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2877{
2878 char *input = amdgpu_lockup_timeout;
2879 char *timeout_setting = NULL;
2880 int index = 0;
2881 long timeout;
2882 int ret = 0;
2883
2884 /*
2885 * By default timeout for non compute jobs is 10000.
2886 * And there is no timeout enforced on compute jobs.
2887 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2888 * jobs are 60000 by default.
71f98027
AD
2889 */
2890 adev->gfx_timeout = msecs_to_jiffies(10000);
2891 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2892 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2893 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2894 else
2895 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2896
f440ff44 2897 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2898 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2899 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2900 ret = kstrtol(timeout_setting, 0, &timeout);
2901 if (ret)
2902 return ret;
2903
2904 if (timeout == 0) {
2905 index++;
2906 continue;
2907 } else if (timeout < 0) {
2908 timeout = MAX_SCHEDULE_TIMEOUT;
2909 } else {
2910 timeout = msecs_to_jiffies(timeout);
2911 }
2912
2913 switch (index++) {
2914 case 0:
2915 adev->gfx_timeout = timeout;
2916 break;
2917 case 1:
2918 adev->compute_timeout = timeout;
2919 break;
2920 case 2:
2921 adev->sdma_timeout = timeout;
2922 break;
2923 case 3:
2924 adev->video_timeout = timeout;
2925 break;
2926 default:
2927 break;
2928 }
2929 }
2930 /*
2931 * There is only one value specified and
2932 * it should apply to all non-compute jobs.
2933 */
bcccee89 2934 if (index == 1) {
71f98027 2935 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2936 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2937 adev->compute_timeout = adev->gfx_timeout;
2938 }
71f98027
AD
2939 }
2940
2941 return ret;
2942}
d4535e2c 2943
77f3a5cd
ND
2944static const struct attribute *amdgpu_dev_attributes[] = {
2945 &dev_attr_product_name.attr,
2946 &dev_attr_product_number.attr,
2947 &dev_attr_serial_number.attr,
2948 &dev_attr_pcie_replay_count.attr,
2949 NULL
2950};
2951
d38ceaf9
AD
2952/**
2953 * amdgpu_device_init - initialize the driver
2954 *
2955 * @adev: amdgpu_device pointer
87e3f136 2956 * @ddev: drm dev pointer
d38ceaf9
AD
2957 * @pdev: pci dev pointer
2958 * @flags: driver flags
2959 *
2960 * Initializes the driver info and hw (all asics).
2961 * Returns 0 for success or an error on failure.
2962 * Called at driver startup.
2963 */
2964int amdgpu_device_init(struct amdgpu_device *adev,
2965 struct drm_device *ddev,
2966 struct pci_dev *pdev,
2967 uint32_t flags)
2968{
2969 int r, i;
3840c5bc 2970 bool boco = false;
95844d20 2971 u32 max_MBps;
d38ceaf9
AD
2972
2973 adev->shutdown = false;
2974 adev->dev = &pdev->dev;
2975 adev->ddev = ddev;
2976 adev->pdev = pdev;
2977 adev->flags = flags;
4e66d7d2
YZ
2978
2979 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2980 adev->asic_type = amdgpu_force_asic_type;
2981 else
2982 adev->asic_type = flags & AMD_ASIC_MASK;
2983
d38ceaf9 2984 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2985 if (amdgpu_emu_mode == 1)
8bdab6bb 2986 adev->usec_timeout *= 10;
770d13b1 2987 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2988 adev->accel_working = false;
2989 adev->num_rings = 0;
2990 adev->mman.buffer_funcs = NULL;
2991 adev->mman.buffer_funcs_ring = NULL;
2992 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2993 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2994 adev->gmc.gmc_funcs = NULL;
f54d1867 2995 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2996 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2997
2998 adev->smc_rreg = &amdgpu_invalid_rreg;
2999 adev->smc_wreg = &amdgpu_invalid_wreg;
3000 adev->pcie_rreg = &amdgpu_invalid_rreg;
3001 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3002 adev->pciep_rreg = &amdgpu_invalid_rreg;
3003 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3004 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3005 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3006 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3007 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3008 adev->didt_rreg = &amdgpu_invalid_rreg;
3009 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3010 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3011 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3012 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3013 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3014
3e39ab90
AD
3015 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3016 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3017 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3018
3019 /* mutex initialization are all done here so we
3020 * can recall function without having locking issues */
d38ceaf9 3021 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3022 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3023 mutex_init(&adev->pm.mutex);
3024 mutex_init(&adev->gfx.gpu_clock_mutex);
3025 mutex_init(&adev->srbm_mutex);
b8866c26 3026 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3027 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3028 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3029 mutex_init(&adev->mn_lock);
e23b74aa 3030 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3031 hash_init(adev->mn_hash);
13a752e3 3032 mutex_init(&adev->lock_reset);
32eaeae0 3033 mutex_init(&adev->psp.mutex);
bd052211 3034 mutex_init(&adev->notifier_lock);
d38ceaf9 3035
912dfc84
EQ
3036 r = amdgpu_device_check_arguments(adev);
3037 if (r)
3038 return r;
d38ceaf9 3039
d38ceaf9
AD
3040 spin_lock_init(&adev->mmio_idx_lock);
3041 spin_lock_init(&adev->smc_idx_lock);
3042 spin_lock_init(&adev->pcie_idx_lock);
3043 spin_lock_init(&adev->uvd_ctx_idx_lock);
3044 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3045 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3046 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3047 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3048 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3049
0c4e7fa5
CZ
3050 INIT_LIST_HEAD(&adev->shadow_list);
3051 mutex_init(&adev->shadow_list_lock);
3052
beff74bc
AD
3053 INIT_DELAYED_WORK(&adev->delayed_init_work,
3054 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3055 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3056 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3057
d4535e2c
AG
3058 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3059
d23ee13f 3060 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3061 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3062
b265bdbd
EQ
3063 atomic_set(&adev->throttling_logging_enabled, 1);
3064 /*
3065 * If throttling continues, logging will be performed every minute
3066 * to avoid log flooding. "-1" is subtracted since the thermal
3067 * throttling interrupt comes every second. Thus, the total logging
3068 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3069 * for throttling interrupt) = 60 seconds.
3070 */
3071 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3072 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3073
0fa49558
AX
3074 /* Registers mapping */
3075 /* TODO: block userspace mapping of io register */
da69c161
KW
3076 if (adev->asic_type >= CHIP_BONAIRE) {
3077 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3078 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3079 } else {
3080 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3081 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3082 }
d38ceaf9 3083
d38ceaf9
AD
3084 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3085 if (adev->rmmio == NULL) {
3086 return -ENOMEM;
3087 }
3088 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3089 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3090
d38ceaf9
AD
3091 /* io port mapping */
3092 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3093 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3094 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3095 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3096 break;
3097 }
3098 }
3099 if (adev->rio_mem == NULL)
b64a18c5 3100 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3101
b2109d8e
JX
3102 /* enable PCIE atomic ops */
3103 r = pci_enable_atomic_ops_to_root(adev->pdev,
3104 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3105 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3106 if (r) {
3107 adev->have_atomics_support = false;
3108 DRM_INFO("PCIE atomic ops is not supported\n");
3109 } else {
3110 adev->have_atomics_support = true;
3111 }
3112
5494d864
AD
3113 amdgpu_device_get_pcie_info(adev);
3114
b239c017
JX
3115 if (amdgpu_mcbp)
3116 DRM_INFO("MCBP is enabled\n");
3117
5f84cc63
JX
3118 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3119 adev->enable_mes = true;
3120
3aa0115d
ML
3121 /* detect hw virtualization here */
3122 amdgpu_detect_virtualization(adev);
3123
dffa11b4
ML
3124 r = amdgpu_device_get_job_timeout_settings(adev);
3125 if (r) {
3126 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3127 return r;
a190d1c7
XY
3128 }
3129
d38ceaf9 3130 /* early init functions */
06ec9070 3131 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3132 if (r)
3133 return r;
3134
6585661d
OZ
3135 /* doorbell bar mapping and doorbell index init*/
3136 amdgpu_device_doorbell_init(adev);
3137
d38ceaf9
AD
3138 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3139 /* this will fail for cards that aren't VGA class devices, just
3140 * ignore it */
06ec9070 3141 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3142
31af062a 3143 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3144 boco = true;
3145 if (amdgpu_has_atpx() &&
3146 (amdgpu_is_atpx_hybrid() ||
3147 amdgpu_has_atpx_dgpu_power_cntl()) &&
3148 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3149 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3150 &amdgpu_switcheroo_ops, boco);
3151 if (boco)
d38ceaf9
AD
3152 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3153
9475a943
SL
3154 if (amdgpu_emu_mode == 1) {
3155 /* post the asic on emulation mode */
3156 emu_soc_asic_init(adev);
bfca0289 3157 goto fence_driver_init;
9475a943 3158 }
bfca0289 3159
4e99a44e
ML
3160 /* detect if we are with an SRIOV vbios */
3161 amdgpu_device_detect_sriov_bios(adev);
048765ad 3162
95e8e59e
AD
3163 /* check if we need to reset the asic
3164 * E.g., driver was not cleanly unloaded previously, etc.
3165 */
f14899fd 3166 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3167 r = amdgpu_asic_reset(adev);
3168 if (r) {
3169 dev_err(adev->dev, "asic reset on init failed\n");
3170 goto failed;
3171 }
3172 }
3173
d38ceaf9 3174 /* Post card if necessary */
39c640c0 3175 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3176 if (!adev->bios) {
bec86378 3177 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3178 r = -EINVAL;
3179 goto failed;
d38ceaf9 3180 }
bec86378 3181 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3182 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3183 if (r) {
3184 dev_err(adev->dev, "gpu post error!\n");
3185 goto failed;
3186 }
d38ceaf9
AD
3187 }
3188
88b64e95
AD
3189 if (adev->is_atom_fw) {
3190 /* Initialize clocks */
3191 r = amdgpu_atomfirmware_get_clock_info(adev);
3192 if (r) {
3193 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3194 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3195 goto failed;
3196 }
3197 } else {
a5bde2f9
AD
3198 /* Initialize clocks */
3199 r = amdgpu_atombios_get_clock_info(adev);
3200 if (r) {
3201 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3202 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3203 goto failed;
a5bde2f9
AD
3204 }
3205 /* init i2c buses */
4562236b
HW
3206 if (!amdgpu_device_has_dc_support(adev))
3207 amdgpu_atombios_i2c_init(adev);
2c1a2784 3208 }
d38ceaf9 3209
bfca0289 3210fence_driver_init:
d38ceaf9
AD
3211 /* Fence driver */
3212 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3213 if (r) {
3214 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3215 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3216 goto failed;
2c1a2784 3217 }
d38ceaf9
AD
3218
3219 /* init the mode config */
3220 drm_mode_config_init(adev->ddev);
3221
06ec9070 3222 r = amdgpu_device_ip_init(adev);
d38ceaf9 3223 if (r) {
8840a387 3224 /* failed in exclusive mode due to timeout */
3225 if (amdgpu_sriov_vf(adev) &&
3226 !amdgpu_sriov_runtime(adev) &&
3227 amdgpu_virt_mmio_blocked(adev) &&
3228 !amdgpu_virt_wait_reset(adev)) {
3229 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3230 /* Don't send request since VF is inactive. */
3231 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3232 adev->virt.ops = NULL;
8840a387 3233 r = -EAGAIN;
3234 goto failed;
3235 }
06ec9070 3236 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3237 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3238 goto failed;
d38ceaf9
AD
3239 }
3240
d69b8971
YZ
3241 dev_info(adev->dev,
3242 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3243 adev->gfx.config.max_shader_engines,
3244 adev->gfx.config.max_sh_per_se,
3245 adev->gfx.config.max_cu_per_sh,
3246 adev->gfx.cu_info.number);
3247
d38ceaf9
AD
3248 adev->accel_working = true;
3249
e59c0205
AX
3250 amdgpu_vm_check_compute_bug(adev);
3251
95844d20
MO
3252 /* Initialize the buffer migration limit. */
3253 if (amdgpu_moverate >= 0)
3254 max_MBps = amdgpu_moverate;
3255 else
3256 max_MBps = 8; /* Allow 8 MB/s. */
3257 /* Get a log2 for easy divisions. */
3258 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3259
9bc92b9c
ML
3260 amdgpu_fbdev_init(adev);
3261
d2f52ac8 3262 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3263 if (r) {
3264 adev->pm_sysfs_en = false;
d2f52ac8 3265 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3266 } else
3267 adev->pm_sysfs_en = true;
d2f52ac8 3268
5bb23532 3269 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3270 if (r) {
3271 adev->ucode_sysfs_en = false;
5bb23532 3272 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3273 } else
3274 adev->ucode_sysfs_en = true;
5bb23532 3275
d38ceaf9
AD
3276 if ((amdgpu_testing & 1)) {
3277 if (adev->accel_working)
3278 amdgpu_test_moves(adev);
3279 else
3280 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3281 }
d38ceaf9
AD
3282 if (amdgpu_benchmarking) {
3283 if (adev->accel_working)
3284 amdgpu_benchmark(adev, amdgpu_benchmarking);
3285 else
3286 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3287 }
3288
b0adca4d
EQ
3289 /*
3290 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3291 * Otherwise the mgpu fan boost feature will be skipped due to the
3292 * gpu instance is counted less.
3293 */
3294 amdgpu_register_gpu_instance(adev);
3295
d38ceaf9
AD
3296 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3297 * explicit gating rather than handling it automatically.
3298 */
06ec9070 3299 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3300 if (r) {
06ec9070 3301 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3302 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3303 goto failed;
2c1a2784 3304 }
d38ceaf9 3305
108c6a63 3306 /* must succeed. */
511fdbc3 3307 amdgpu_ras_resume(adev);
108c6a63 3308
beff74bc
AD
3309 queue_delayed_work(system_wq, &adev->delayed_init_work,
3310 msecs_to_jiffies(AMDGPU_RESUME_MS));
3311
77f3a5cd 3312 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3313 if (r) {
77f3a5cd 3314 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3315 return r;
3316 }
3317
d155bef0
AB
3318 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3319 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3320 if (r)
3321 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3322
d38ceaf9 3323 return 0;
83ba126a
AD
3324
3325failed:
89041940 3326 amdgpu_vf_error_trans_all(adev);
3840c5bc 3327 if (boco)
83ba126a 3328 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3329
83ba126a 3330 return r;
d38ceaf9
AD
3331}
3332
d38ceaf9
AD
3333/**
3334 * amdgpu_device_fini - tear down the driver
3335 *
3336 * @adev: amdgpu_device pointer
3337 *
3338 * Tear down the driver info (all asics).
3339 * Called at driver shutdown.
3340 */
3341void amdgpu_device_fini(struct amdgpu_device *adev)
3342{
3343 int r;
3344
3345 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3346 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3347 adev->shutdown = true;
9f875167 3348
752c683d
ML
3349 /* make sure IB test finished before entering exclusive mode
3350 * to avoid preemption on IB test
3351 * */
3352 if (amdgpu_sriov_vf(adev))
3353 amdgpu_virt_request_full_gpu(adev, false);
3354
e5b03032
ML
3355 /* disable all interrupts */
3356 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3357 if (adev->mode_info.mode_config_initialized){
3358 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3359 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3360 else
3361 drm_atomic_helper_shutdown(adev->ddev);
3362 }
d38ceaf9 3363 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3364 if (adev->pm_sysfs_en)
3365 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3366 amdgpu_fbdev_fini(adev);
06ec9070 3367 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3368 if (adev->firmware.gpu_info_fw) {
3369 release_firmware(adev->firmware.gpu_info_fw);
3370 adev->firmware.gpu_info_fw = NULL;
3371 }
d38ceaf9
AD
3372 adev->accel_working = false;
3373 /* free i2c buses */
4562236b
HW
3374 if (!amdgpu_device_has_dc_support(adev))
3375 amdgpu_i2c_fini(adev);
bfca0289
SL
3376
3377 if (amdgpu_emu_mode != 1)
3378 amdgpu_atombios_fini(adev);
3379
d38ceaf9
AD
3380 kfree(adev->bios);
3381 adev->bios = NULL;
3840c5bc
AD
3382 if (amdgpu_has_atpx() &&
3383 (amdgpu_is_atpx_hybrid() ||
3384 amdgpu_has_atpx_dgpu_power_cntl()) &&
3385 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3386 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3387 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3388 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3389 vga_client_register(adev->pdev, NULL, NULL, NULL);
3390 if (adev->rio_mem)
3391 pci_iounmap(adev->pdev, adev->rio_mem);
3392 adev->rio_mem = NULL;
3393 iounmap(adev->rmmio);
3394 adev->rmmio = NULL;
06ec9070 3395 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3396
7c868b59
YT
3397 if (adev->ucode_sysfs_en)
3398 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3399
3400 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3401 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3402 amdgpu_pmu_fini(adev);
4292b0b2 3403 if (adev->discovery_bin)
a190d1c7 3404 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3405}
3406
3407
3408/*
3409 * Suspend & resume.
3410 */
3411/**
810ddc3a 3412 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3413 *
87e3f136
DP
3414 * @dev: drm dev pointer
3415 * @suspend: suspend state
3416 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3417 *
3418 * Puts the hw in the suspend state (all asics).
3419 * Returns 0 for success or an error on failure.
3420 * Called at driver suspend.
3421 */
de185019 3422int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3423{
3424 struct amdgpu_device *adev;
3425 struct drm_crtc *crtc;
3426 struct drm_connector *connector;
f8d2d39e 3427 struct drm_connector_list_iter iter;
5ceb54c6 3428 int r;
d38ceaf9
AD
3429
3430 if (dev == NULL || dev->dev_private == NULL) {
3431 return -ENODEV;
3432 }
3433
3434 adev = dev->dev_private;
3435
3436 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3437 return 0;
3438
44779b43 3439 adev->in_suspend = true;
d38ceaf9
AD
3440 drm_kms_helper_poll_disable(dev);
3441
5f818173
S
3442 if (fbcon)
3443 amdgpu_fbdev_set_suspend(adev, 1);
3444
beff74bc 3445 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3446
4562236b
HW
3447 if (!amdgpu_device_has_dc_support(adev)) {
3448 /* turn off display hw */
3449 drm_modeset_lock_all(dev);
f8d2d39e
LP
3450 drm_connector_list_iter_begin(dev, &iter);
3451 drm_for_each_connector_iter(connector, &iter)
3452 drm_helper_connector_dpms(connector,
3453 DRM_MODE_DPMS_OFF);
3454 drm_connector_list_iter_end(&iter);
4562236b 3455 drm_modeset_unlock_all(dev);
fe1053b7
AD
3456 /* unpin the front buffers and cursors */
3457 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3458 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3459 struct drm_framebuffer *fb = crtc->primary->fb;
3460 struct amdgpu_bo *robj;
3461
91334223 3462 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3463 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3464 r = amdgpu_bo_reserve(aobj, true);
3465 if (r == 0) {
3466 amdgpu_bo_unpin(aobj);
3467 amdgpu_bo_unreserve(aobj);
3468 }
756e6880 3469 }
756e6880 3470
fe1053b7
AD
3471 if (fb == NULL || fb->obj[0] == NULL) {
3472 continue;
3473 }
3474 robj = gem_to_amdgpu_bo(fb->obj[0]);
3475 /* don't unpin kernel fb objects */
3476 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3477 r = amdgpu_bo_reserve(robj, true);
3478 if (r == 0) {
3479 amdgpu_bo_unpin(robj);
3480 amdgpu_bo_unreserve(robj);
3481 }
d38ceaf9
AD
3482 }
3483 }
3484 }
fe1053b7 3485
5e6932fe 3486 amdgpu_ras_suspend(adev);
3487
fe1053b7
AD
3488 r = amdgpu_device_ip_suspend_phase1(adev);
3489
94fa5660
EQ
3490 amdgpu_amdkfd_suspend(adev, !fbcon);
3491
d38ceaf9
AD
3492 /* evict vram memory */
3493 amdgpu_bo_evict_vram(adev);
3494
5ceb54c6 3495 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3496
fe1053b7 3497 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3498
a0a71e49
AD
3499 /* evict remaining vram memory
3500 * This second call to evict vram is to evict the gart page table
3501 * using the CPU.
3502 */
d38ceaf9
AD
3503 amdgpu_bo_evict_vram(adev);
3504
d38ceaf9
AD
3505 return 0;
3506}
3507
3508/**
810ddc3a 3509 * amdgpu_device_resume - initiate device resume
d38ceaf9 3510 *
87e3f136
DP
3511 * @dev: drm dev pointer
3512 * @resume: resume state
3513 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3514 *
3515 * Bring the hw back to operating state (all asics).
3516 * Returns 0 for success or an error on failure.
3517 * Called at driver resume.
3518 */
de185019 3519int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3520{
3521 struct drm_connector *connector;
f8d2d39e 3522 struct drm_connector_list_iter iter;
d38ceaf9 3523 struct amdgpu_device *adev = dev->dev_private;
756e6880 3524 struct drm_crtc *crtc;
03161a6e 3525 int r = 0;
d38ceaf9
AD
3526
3527 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3528 return 0;
3529
d38ceaf9 3530 /* post card */
39c640c0 3531 if (amdgpu_device_need_post(adev)) {
74b0b157 3532 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3533 if (r)
3534 DRM_ERROR("amdgpu asic init failed\n");
3535 }
d38ceaf9 3536
06ec9070 3537 r = amdgpu_device_ip_resume(adev);
e6707218 3538 if (r) {
06ec9070 3539 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3540 return r;
e6707218 3541 }
5ceb54c6
AD
3542 amdgpu_fence_driver_resume(adev);
3543
d38ceaf9 3544
06ec9070 3545 r = amdgpu_device_ip_late_init(adev);
03161a6e 3546 if (r)
4d3b9ae5 3547 return r;
d38ceaf9 3548
beff74bc
AD
3549 queue_delayed_work(system_wq, &adev->delayed_init_work,
3550 msecs_to_jiffies(AMDGPU_RESUME_MS));
3551
fe1053b7
AD
3552 if (!amdgpu_device_has_dc_support(adev)) {
3553 /* pin cursors */
3554 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3555 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3556
91334223 3557 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3558 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3559 r = amdgpu_bo_reserve(aobj, true);
3560 if (r == 0) {
3561 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3562 if (r != 0)
3563 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3564 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3565 amdgpu_bo_unreserve(aobj);
3566 }
756e6880
AD
3567 }
3568 }
3569 }
9593f4d6 3570 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3571 if (r)
3572 return r;
756e6880 3573
96a5d8d4 3574 /* Make sure IB tests flushed */
beff74bc 3575 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3576
d38ceaf9
AD
3577 /* blat the mode back in */
3578 if (fbcon) {
4562236b
HW
3579 if (!amdgpu_device_has_dc_support(adev)) {
3580 /* pre DCE11 */
3581 drm_helper_resume_force_mode(dev);
3582
3583 /* turn on display hw */
3584 drm_modeset_lock_all(dev);
f8d2d39e
LP
3585
3586 drm_connector_list_iter_begin(dev, &iter);
3587 drm_for_each_connector_iter(connector, &iter)
3588 drm_helper_connector_dpms(connector,
3589 DRM_MODE_DPMS_ON);
3590 drm_connector_list_iter_end(&iter);
3591
4562236b 3592 drm_modeset_unlock_all(dev);
d38ceaf9 3593 }
4d3b9ae5 3594 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3595 }
3596
3597 drm_kms_helper_poll_enable(dev);
23a1a9e5 3598
5e6932fe 3599 amdgpu_ras_resume(adev);
3600
23a1a9e5
L
3601 /*
3602 * Most of the connector probing functions try to acquire runtime pm
3603 * refs to ensure that the GPU is powered on when connector polling is
3604 * performed. Since we're calling this from a runtime PM callback,
3605 * trying to acquire rpm refs will cause us to deadlock.
3606 *
3607 * Since we're guaranteed to be holding the rpm lock, it's safe to
3608 * temporarily disable the rpm helpers so this doesn't deadlock us.
3609 */
3610#ifdef CONFIG_PM
3611 dev->dev->power.disable_depth++;
3612#endif
4562236b
HW
3613 if (!amdgpu_device_has_dc_support(adev))
3614 drm_helper_hpd_irq_event(dev);
3615 else
3616 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3617#ifdef CONFIG_PM
3618 dev->dev->power.disable_depth--;
3619#endif
44779b43
RZ
3620 adev->in_suspend = false;
3621
4d3b9ae5 3622 return 0;
d38ceaf9
AD
3623}
3624
e3ecdffa
AD
3625/**
3626 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3627 *
3628 * @adev: amdgpu_device pointer
3629 *
3630 * The list of all the hardware IPs that make up the asic is walked and
3631 * the check_soft_reset callbacks are run. check_soft_reset determines
3632 * if the asic is still hung or not.
3633 * Returns true if any of the IPs are still in a hung state, false if not.
3634 */
06ec9070 3635static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3636{
3637 int i;
3638 bool asic_hang = false;
3639
f993d628
ML
3640 if (amdgpu_sriov_vf(adev))
3641 return true;
3642
8bc04c29
AD
3643 if (amdgpu_asic_need_full_reset(adev))
3644 return true;
3645
63fbf42f 3646 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3647 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3648 continue;
a1255107
AD
3649 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3650 adev->ip_blocks[i].status.hang =
3651 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3652 if (adev->ip_blocks[i].status.hang) {
3653 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3654 asic_hang = true;
3655 }
3656 }
3657 return asic_hang;
3658}
3659
e3ecdffa
AD
3660/**
3661 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3662 *
3663 * @adev: amdgpu_device pointer
3664 *
3665 * The list of all the hardware IPs that make up the asic is walked and the
3666 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3667 * handles any IP specific hardware or software state changes that are
3668 * necessary for a soft reset to succeed.
3669 * Returns 0 on success, negative error code on failure.
3670 */
06ec9070 3671static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3672{
3673 int i, r = 0;
3674
3675 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3676 if (!adev->ip_blocks[i].status.valid)
d31a501e 3677 continue;
a1255107
AD
3678 if (adev->ip_blocks[i].status.hang &&
3679 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3680 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3681 if (r)
3682 return r;
3683 }
3684 }
3685
3686 return 0;
3687}
3688
e3ecdffa
AD
3689/**
3690 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3691 *
3692 * @adev: amdgpu_device pointer
3693 *
3694 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3695 * reset is necessary to recover.
3696 * Returns true if a full asic reset is required, false if not.
3697 */
06ec9070 3698static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3699{
da146d3b
AD
3700 int i;
3701
8bc04c29
AD
3702 if (amdgpu_asic_need_full_reset(adev))
3703 return true;
3704
da146d3b 3705 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3706 if (!adev->ip_blocks[i].status.valid)
da146d3b 3707 continue;
a1255107
AD
3708 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3709 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3710 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3711 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3712 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3713 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3714 DRM_INFO("Some block need full reset!\n");
3715 return true;
3716 }
3717 }
35d782fe
CZ
3718 }
3719 return false;
3720}
3721
e3ecdffa
AD
3722/**
3723 * amdgpu_device_ip_soft_reset - do a soft reset
3724 *
3725 * @adev: amdgpu_device pointer
3726 *
3727 * The list of all the hardware IPs that make up the asic is walked and the
3728 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3729 * IP specific hardware or software state changes that are necessary to soft
3730 * reset the IP.
3731 * Returns 0 on success, negative error code on failure.
3732 */
06ec9070 3733static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3734{
3735 int i, r = 0;
3736
3737 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3738 if (!adev->ip_blocks[i].status.valid)
35d782fe 3739 continue;
a1255107
AD
3740 if (adev->ip_blocks[i].status.hang &&
3741 adev->ip_blocks[i].version->funcs->soft_reset) {
3742 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3743 if (r)
3744 return r;
3745 }
3746 }
3747
3748 return 0;
3749}
3750
e3ecdffa
AD
3751/**
3752 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3753 *
3754 * @adev: amdgpu_device pointer
3755 *
3756 * The list of all the hardware IPs that make up the asic is walked and the
3757 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3758 * handles any IP specific hardware or software state changes that are
3759 * necessary after the IP has been soft reset.
3760 * Returns 0 on success, negative error code on failure.
3761 */
06ec9070 3762static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3763{
3764 int i, r = 0;
3765
3766 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3767 if (!adev->ip_blocks[i].status.valid)
35d782fe 3768 continue;
a1255107
AD
3769 if (adev->ip_blocks[i].status.hang &&
3770 adev->ip_blocks[i].version->funcs->post_soft_reset)
3771 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3772 if (r)
3773 return r;
3774 }
3775
3776 return 0;
3777}
3778
e3ecdffa 3779/**
c33adbc7 3780 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3781 *
3782 * @adev: amdgpu_device pointer
3783 *
3784 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3785 * restore things like GPUVM page tables after a GPU reset where
3786 * the contents of VRAM might be lost.
403009bf
CK
3787 *
3788 * Returns:
3789 * 0 on success, negative error code on failure.
e3ecdffa 3790 */
c33adbc7 3791static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3792{
c41d1cf6 3793 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3794 struct amdgpu_bo *shadow;
3795 long r = 1, tmo;
c41d1cf6
ML
3796
3797 if (amdgpu_sriov_runtime(adev))
b045d3af 3798 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3799 else
3800 tmo = msecs_to_jiffies(100);
3801
3802 DRM_INFO("recover vram bo from shadow start\n");
3803 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3804 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3805
3806 /* No need to recover an evicted BO */
3807 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3808 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3809 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3810 continue;
3811
3812 r = amdgpu_bo_restore_shadow(shadow, &next);
3813 if (r)
3814 break;
3815
c41d1cf6 3816 if (fence) {
1712fb1a 3817 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3818 dma_fence_put(fence);
3819 fence = next;
1712fb1a 3820 if (tmo == 0) {
3821 r = -ETIMEDOUT;
c41d1cf6 3822 break;
1712fb1a 3823 } else if (tmo < 0) {
3824 r = tmo;
3825 break;
3826 }
403009bf
CK
3827 } else {
3828 fence = next;
c41d1cf6 3829 }
c41d1cf6
ML
3830 }
3831 mutex_unlock(&adev->shadow_list_lock);
3832
403009bf
CK
3833 if (fence)
3834 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3835 dma_fence_put(fence);
3836
1712fb1a 3837 if (r < 0 || tmo <= 0) {
3838 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3839 return -EIO;
3840 }
c41d1cf6 3841
403009bf
CK
3842 DRM_INFO("recover vram bo from shadow done\n");
3843 return 0;
c41d1cf6
ML
3844}
3845
a90ad3c2 3846
e3ecdffa 3847/**
06ec9070 3848 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3849 *
3850 * @adev: amdgpu device pointer
87e3f136 3851 * @from_hypervisor: request from hypervisor
5740682e
ML
3852 *
3853 * do VF FLR and reinitialize Asic
3f48c681 3854 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3855 */
3856static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3857 bool from_hypervisor)
5740682e
ML
3858{
3859 int r;
3860
3861 if (from_hypervisor)
3862 r = amdgpu_virt_request_full_gpu(adev, true);
3863 else
3864 r = amdgpu_virt_reset_gpu(adev);
3865 if (r)
3866 return r;
a90ad3c2 3867
b639c22c
JZ
3868 amdgpu_amdkfd_pre_reset(adev);
3869
a90ad3c2 3870 /* Resume IP prior to SMC */
06ec9070 3871 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3872 if (r)
3873 goto error;
a90ad3c2 3874
c9ffa427 3875 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3876 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3877 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3878
7a3e0bb2
RZ
3879 r = amdgpu_device_fw_loading(adev);
3880 if (r)
3881 return r;
3882
a90ad3c2 3883 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3884 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3885 if (r)
3886 goto error;
a90ad3c2
ML
3887
3888 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3889 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3890 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3891
abc34253
ED
3892error:
3893 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3894 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3895 amdgpu_inc_vram_lost(adev);
c33adbc7 3896 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3897 }
3898
3899 return r;
3900}
3901
12938fad
CK
3902/**
3903 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3904 *
3905 * @adev: amdgpu device pointer
3906 *
3907 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3908 * a hung GPU.
3909 */
3910bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3911{
3912 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3913 DRM_INFO("Timeout, but no hardware hang detected.\n");
3914 return false;
3915 }
3916
3ba7b418
AG
3917 if (amdgpu_gpu_recovery == 0)
3918 goto disabled;
3919
3920 if (amdgpu_sriov_vf(adev))
3921 return true;
3922
3923 if (amdgpu_gpu_recovery == -1) {
3924 switch (adev->asic_type) {
fc42d47c
AG
3925 case CHIP_BONAIRE:
3926 case CHIP_HAWAII:
3ba7b418
AG
3927 case CHIP_TOPAZ:
3928 case CHIP_TONGA:
3929 case CHIP_FIJI:
3930 case CHIP_POLARIS10:
3931 case CHIP_POLARIS11:
3932 case CHIP_POLARIS12:
3933 case CHIP_VEGAM:
3934 case CHIP_VEGA20:
3935 case CHIP_VEGA10:
3936 case CHIP_VEGA12:
c43b849f 3937 case CHIP_RAVEN:
e9d4cf91 3938 case CHIP_ARCTURUS:
2cb44fb0 3939 case CHIP_RENOIR:
658c6639
AD
3940 case CHIP_NAVI10:
3941 case CHIP_NAVI14:
3942 case CHIP_NAVI12:
3ba7b418
AG
3943 break;
3944 default:
3945 goto disabled;
3946 }
12938fad
CK
3947 }
3948
3949 return true;
3ba7b418
AG
3950
3951disabled:
3952 DRM_INFO("GPU recovery disabled.\n");
3953 return false;
12938fad
CK
3954}
3955
5c6dd71e 3956
26bc5340
AG
3957static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3958 struct amdgpu_job *job,
3959 bool *need_full_reset_arg)
3960{
3961 int i, r = 0;
3962 bool need_full_reset = *need_full_reset_arg;
71182665 3963
728e7e0c
JZ
3964 amdgpu_debugfs_wait_dump(adev);
3965
71182665 3966 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3967 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3968 struct amdgpu_ring *ring = adev->rings[i];
3969
51687759 3970 if (!ring || !ring->sched.thread)
0875dc9e 3971 continue;
5740682e 3972
2f9d4084
ML
3973 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3974 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3975 }
d38ceaf9 3976
222b5f04
AG
3977 if(job)
3978 drm_sched_increase_karma(&job->base);
3979
1d721ed6 3980 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3981 if (!amdgpu_sriov_vf(adev)) {
3982
3983 if (!need_full_reset)
3984 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3985
3986 if (!need_full_reset) {
3987 amdgpu_device_ip_pre_soft_reset(adev);
3988 r = amdgpu_device_ip_soft_reset(adev);
3989 amdgpu_device_ip_post_soft_reset(adev);
3990 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3991 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3992 need_full_reset = true;
3993 }
3994 }
3995
3996 if (need_full_reset)
3997 r = amdgpu_device_ip_suspend(adev);
3998
3999 *need_full_reset_arg = need_full_reset;
4000 }
4001
4002 return r;
4003}
4004
041a62bc 4005static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
4006 struct list_head *device_list_handle,
4007 bool *need_full_reset_arg)
4008{
4009 struct amdgpu_device *tmp_adev = NULL;
4010 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4011 int r = 0;
4012
4013 /*
4014 * ASIC reset has to be done on all HGMI hive nodes ASAP
4015 * to allow proper links negotiation in FW (within 1 sec)
4016 */
4017 if (need_full_reset) {
4018 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4019 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4020 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4021 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4022 r = -EALREADY;
4023 } else
4024 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4025
041a62bc
AG
4026 if (r) {
4027 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4028 r, tmp_adev->ddev->unique);
4029 break;
ce316fa5
LM
4030 }
4031 }
4032
041a62bc
AG
4033 /* For XGMI wait for all resets to complete before proceed */
4034 if (!r) {
ce316fa5
LM
4035 list_for_each_entry(tmp_adev, device_list_handle,
4036 gmc.xgmi.head) {
4037 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4038 flush_work(&tmp_adev->xgmi_reset_work);
4039 r = tmp_adev->asic_reset_res;
4040 if (r)
4041 break;
ce316fa5
LM
4042 }
4043 }
4044 }
ce316fa5 4045 }
26bc5340 4046
43c4d576
JC
4047 if (!r && amdgpu_ras_intr_triggered()) {
4048 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4049 if (tmp_adev->mmhub.funcs &&
4050 tmp_adev->mmhub.funcs->reset_ras_error_count)
4051 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4052 }
4053
00eaa571 4054 amdgpu_ras_intr_cleared();
43c4d576 4055 }
00eaa571 4056
26bc5340
AG
4057 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4058 if (need_full_reset) {
4059 /* post card */
4060 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4061 DRM_WARN("asic atom init failed!");
4062
4063 if (!r) {
4064 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4065 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4066 if (r)
4067 goto out;
4068
4069 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4070 if (vram_lost) {
77e7f829 4071 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4072 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4073 }
4074
4075 r = amdgpu_gtt_mgr_recover(
4076 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4077 if (r)
4078 goto out;
4079
4080 r = amdgpu_device_fw_loading(tmp_adev);
4081 if (r)
4082 return r;
4083
4084 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4085 if (r)
4086 goto out;
4087
4088 if (vram_lost)
4089 amdgpu_device_fill_reset_magic(tmp_adev);
4090
fdafb359
EQ
4091 /*
4092 * Add this ASIC as tracked as reset was already
4093 * complete successfully.
4094 */
4095 amdgpu_register_gpu_instance(tmp_adev);
4096
7c04ca50 4097 r = amdgpu_device_ip_late_init(tmp_adev);
4098 if (r)
4099 goto out;
4100
565d1941
EQ
4101 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4102
e79a04d5 4103 /* must succeed. */
511fdbc3 4104 amdgpu_ras_resume(tmp_adev);
e79a04d5 4105
26bc5340
AG
4106 /* Update PSP FW topology after reset */
4107 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4108 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4109 }
4110 }
4111
4112
4113out:
4114 if (!r) {
4115 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4116 r = amdgpu_ib_ring_tests(tmp_adev);
4117 if (r) {
4118 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4119 r = amdgpu_device_ip_suspend(tmp_adev);
4120 need_full_reset = true;
4121 r = -EAGAIN;
4122 goto end;
4123 }
4124 }
4125
4126 if (!r)
4127 r = amdgpu_device_recover_vram(tmp_adev);
4128 else
4129 tmp_adev->asic_reset_res = r;
4130 }
4131
4132end:
4133 *need_full_reset_arg = need_full_reset;
4134 return r;
4135}
4136
1d721ed6 4137static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4138{
1d721ed6
AG
4139 if (trylock) {
4140 if (!mutex_trylock(&adev->lock_reset))
4141 return false;
4142 } else
4143 mutex_lock(&adev->lock_reset);
5740682e 4144
26bc5340 4145 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4146 adev->in_gpu_reset = true;
a3a09142
AD
4147 switch (amdgpu_asic_reset_method(adev)) {
4148 case AMD_RESET_METHOD_MODE1:
4149 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4150 break;
4151 case AMD_RESET_METHOD_MODE2:
4152 adev->mp1_state = PP_MP1_STATE_RESET;
4153 break;
4154 default:
4155 adev->mp1_state = PP_MP1_STATE_NONE;
4156 break;
4157 }
1d721ed6
AG
4158
4159 return true;
26bc5340 4160}
d38ceaf9 4161
26bc5340
AG
4162static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4163{
89041940 4164 amdgpu_vf_error_trans_all(adev);
a3a09142 4165 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4166 adev->in_gpu_reset = false;
13a752e3 4167 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4168}
4169
3f12acc8
EQ
4170static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4171{
4172 struct pci_dev *p = NULL;
4173
4174 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4175 adev->pdev->bus->number, 1);
4176 if (p) {
4177 pm_runtime_enable(&(p->dev));
4178 pm_runtime_resume(&(p->dev));
4179 }
4180}
4181
4182static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4183{
4184 enum amd_reset_method reset_method;
4185 struct pci_dev *p = NULL;
4186 u64 expires;
4187
4188 /*
4189 * For now, only BACO and mode1 reset are confirmed
4190 * to suffer the audio issue without proper suspended.
4191 */
4192 reset_method = amdgpu_asic_reset_method(adev);
4193 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4194 (reset_method != AMD_RESET_METHOD_MODE1))
4195 return -EINVAL;
4196
4197 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4198 adev->pdev->bus->number, 1);
4199 if (!p)
4200 return -ENODEV;
4201
4202 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4203 if (!expires)
4204 /*
4205 * If we cannot get the audio device autosuspend delay,
4206 * a fixed 4S interval will be used. Considering 3S is
4207 * the audio controller default autosuspend delay setting.
4208 * 4S used here is guaranteed to cover that.
4209 */
54b7feb9 4210 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4211
4212 while (!pm_runtime_status_suspended(&(p->dev))) {
4213 if (!pm_runtime_suspend(&(p->dev)))
4214 break;
4215
4216 if (expires < ktime_get_mono_fast_ns()) {
4217 dev_warn(adev->dev, "failed to suspend display audio\n");
4218 /* TODO: abort the succeeding gpu reset? */
4219 return -ETIMEDOUT;
4220 }
4221 }
4222
4223 pm_runtime_disable(&(p->dev));
4224
4225 return 0;
4226}
4227
26bc5340
AG
4228/**
4229 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4230 *
4231 * @adev: amdgpu device pointer
4232 * @job: which job trigger hang
4233 *
4234 * Attempt to reset the GPU if it has hung (all asics).
4235 * Attempt to do soft-reset or full-reset and reinitialize Asic
4236 * Returns 0 for success or an error on failure.
4237 */
4238
4239int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4240 struct amdgpu_job *job)
4241{
1d721ed6 4242 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4243 bool need_full_reset = false;
4244 bool job_signaled = false;
26bc5340 4245 struct amdgpu_hive_info *hive = NULL;
26bc5340 4246 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4247 int i, r = 0;
7c6e68c7 4248 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4249 bool use_baco =
4250 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4251 true : false;
3f12acc8 4252 bool audio_suspended = false;
26bc5340 4253
d5ea093e
AG
4254 /*
4255 * Flush RAM to disk so that after reboot
4256 * the user can read log and see why the system rebooted.
4257 */
b823821f 4258 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4259
4260 DRM_WARN("Emergency reboot.");
4261
4262 ksys_sync_helper();
4263 emergency_restart();
4264 }
4265
b823821f
LM
4266 dev_info(adev->dev, "GPU %s begin!\n",
4267 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340
AG
4268
4269 /*
1d721ed6
AG
4270 * Here we trylock to avoid chain of resets executing from
4271 * either trigger by jobs on different adevs in XGMI hive or jobs on
4272 * different schedulers for same device while this TO handler is running.
4273 * We always reset all schedulers for device and all devices for XGMI
4274 * hive so that should take care of them too.
26bc5340 4275 */
7dd8c205 4276 hive = amdgpu_get_xgmi_hive(adev, true);
1d721ed6
AG
4277 if (hive && !mutex_trylock(&hive->reset_lock)) {
4278 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4279 job ? job->base.id : -1, hive->hive_id);
9e94d22c 4280 mutex_unlock(&hive->hive_lock);
26bc5340 4281 return 0;
1d721ed6 4282 }
26bc5340 4283
9e94d22c
EQ
4284 /*
4285 * Build list of devices to reset.
4286 * In case we are in XGMI hive mode, resort the device list
4287 * to put adev in the 1st position.
4288 */
4289 INIT_LIST_HEAD(&device_list);
4290 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4291 if (!hive)
26bc5340 4292 return -ENODEV;
9e94d22c
EQ
4293 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4294 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4295 device_list_handle = &hive->device_list;
4296 } else {
4297 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4298 device_list_handle = &device_list;
4299 }
4300
1d721ed6
AG
4301 /* block all schedulers and reset given job's ring */
4302 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
9e94d22c
EQ
4303 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4304 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4305 job ? job->base.id : -1);
4306 mutex_unlock(&hive->hive_lock);
4307 return 0;
7c6e68c7
AG
4308 }
4309
3f12acc8
EQ
4310 /*
4311 * Try to put the audio codec into suspend state
4312 * before gpu reset started.
4313 *
4314 * Due to the power domain of the graphics device
4315 * is shared with AZ power domain. Without this,
4316 * we may change the audio hardware from behind
4317 * the audio driver's back. That will trigger
4318 * some audio codec errors.
4319 */
4320 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4321 audio_suspended = true;
4322
9e94d22c
EQ
4323 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4324
52fb44cf
EQ
4325 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4326
9e94d22c
EQ
4327 if (!amdgpu_sriov_vf(tmp_adev))
4328 amdgpu_amdkfd_pre_reset(tmp_adev);
4329
12ffa55d
AG
4330 /*
4331 * Mark these ASICs to be reseted as untracked first
4332 * And add them back after reset completed
4333 */
4334 amdgpu_unregister_gpu_instance(tmp_adev);
4335
a2f63ee8 4336 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4337
f1c1314b 4338 /* disable ras on ALL IPs */
b823821f
LM
4339 if (!(in_ras_intr && !use_baco) &&
4340 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4341 amdgpu_ras_suspend(tmp_adev);
4342
1d721ed6
AG
4343 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4344 struct amdgpu_ring *ring = tmp_adev->rings[i];
4345
4346 if (!ring || !ring->sched.thread)
4347 continue;
4348
0b2d2c2e 4349 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4350
b823821f 4351 if (in_ras_intr && !use_baco)
7c6e68c7 4352 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4353 }
4354 }
4355
b823821f 4356 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4357 goto skip_sched_resume;
4358
1d721ed6
AG
4359 /*
4360 * Must check guilty signal here since after this point all old
4361 * HW fences are force signaled.
4362 *
4363 * job->base holds a reference to parent fence
4364 */
4365 if (job && job->base.s_fence->parent &&
7dd8c205 4366 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4367 job_signaled = true;
1d721ed6
AG
4368 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4369 goto skip_hw_reset;
4370 }
4371
26bc5340
AG
4372retry: /* Rest of adevs pre asic reset from XGMI hive. */
4373 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4374 r = amdgpu_device_pre_asic_reset(tmp_adev,
4375 NULL,
4376 &need_full_reset);
4377 /*TODO Should we stop ?*/
4378 if (r) {
4379 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4380 r, tmp_adev->ddev->unique);
4381 tmp_adev->asic_reset_res = r;
4382 }
4383 }
4384
4385 /* Actual ASIC resets if needed.*/
4386 /* TODO Implement XGMI hive reset logic for SRIOV */
4387 if (amdgpu_sriov_vf(adev)) {
4388 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4389 if (r)
4390 adev->asic_reset_res = r;
4391 } else {
041a62bc 4392 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4393 if (r && r == -EAGAIN)
4394 goto retry;
4395 }
4396
1d721ed6
AG
4397skip_hw_reset:
4398
26bc5340
AG
4399 /* Post ASIC reset for all devs .*/
4400 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4401
1d721ed6
AG
4402 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4403 struct amdgpu_ring *ring = tmp_adev->rings[i];
4404
4405 if (!ring || !ring->sched.thread)
4406 continue;
4407
4408 /* No point to resubmit jobs if we didn't HW reset*/
4409 if (!tmp_adev->asic_reset_res && !job_signaled)
4410 drm_sched_resubmit_jobs(&ring->sched);
4411
4412 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4413 }
4414
4415 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4416 drm_helper_resume_force_mode(tmp_adev->ddev);
4417 }
4418
4419 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4420
4421 if (r) {
4422 /* bad news, how to tell it to userspace ? */
12ffa55d 4423 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4424 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4425 } else {
12ffa55d 4426 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4427 }
7c6e68c7 4428 }
26bc5340 4429
7c6e68c7
AG
4430skip_sched_resume:
4431 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4432 /*unlock kfd: SRIOV would do it separately */
b823821f 4433 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4434 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4435 if (audio_suspended)
4436 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4437 amdgpu_device_unlock_adev(tmp_adev);
4438 }
4439
9e94d22c 4440 if (hive) {
22d6575b 4441 mutex_unlock(&hive->reset_lock);
9e94d22c
EQ
4442 mutex_unlock(&hive->hive_lock);
4443 }
26bc5340
AG
4444
4445 if (r)
4446 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4447 return r;
4448}
4449
e3ecdffa
AD
4450/**
4451 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4452 *
4453 * @adev: amdgpu_device pointer
4454 *
4455 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4456 * and lanes) of the slot the device is in. Handles APUs and
4457 * virtualized environments where PCIE config space may not be available.
4458 */
5494d864 4459static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4460{
5d9a6330 4461 struct pci_dev *pdev;
c5313457
HK
4462 enum pci_bus_speed speed_cap, platform_speed_cap;
4463 enum pcie_link_width platform_link_width;
d0dd7f0c 4464
cd474ba0
AD
4465 if (amdgpu_pcie_gen_cap)
4466 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4467
cd474ba0
AD
4468 if (amdgpu_pcie_lane_cap)
4469 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4470
cd474ba0
AD
4471 /* covers APUs as well */
4472 if (pci_is_root_bus(adev->pdev->bus)) {
4473 if (adev->pm.pcie_gen_mask == 0)
4474 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4475 if (adev->pm.pcie_mlw_mask == 0)
4476 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4477 return;
cd474ba0 4478 }
d0dd7f0c 4479
c5313457
HK
4480 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4481 return;
4482
dbaa922b
AD
4483 pcie_bandwidth_available(adev->pdev, NULL,
4484 &platform_speed_cap, &platform_link_width);
c5313457 4485
cd474ba0 4486 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4487 /* asic caps */
4488 pdev = adev->pdev;
4489 speed_cap = pcie_get_speed_cap(pdev);
4490 if (speed_cap == PCI_SPEED_UNKNOWN) {
4491 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4492 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4493 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4494 } else {
5d9a6330
AD
4495 if (speed_cap == PCIE_SPEED_16_0GT)
4496 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4497 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4498 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4499 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4500 else if (speed_cap == PCIE_SPEED_8_0GT)
4501 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4502 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4503 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4504 else if (speed_cap == PCIE_SPEED_5_0GT)
4505 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4506 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4507 else
4508 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4509 }
4510 /* platform caps */
c5313457 4511 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4512 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4513 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4514 } else {
c5313457 4515 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4516 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4517 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4518 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4519 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4520 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4521 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4522 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4523 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4524 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4525 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4526 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4527 else
4528 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4529
cd474ba0
AD
4530 }
4531 }
4532 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4533 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4534 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4535 } else {
c5313457 4536 switch (platform_link_width) {
5d9a6330 4537 case PCIE_LNK_X32:
cd474ba0
AD
4538 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4543 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4544 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4545 break;
5d9a6330 4546 case PCIE_LNK_X16:
cd474ba0
AD
4547 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4548 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4549 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4550 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4551 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4552 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4553 break;
5d9a6330 4554 case PCIE_LNK_X12:
cd474ba0
AD
4555 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4556 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4557 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4558 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4559 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4560 break;
5d9a6330 4561 case PCIE_LNK_X8:
cd474ba0
AD
4562 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4563 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4564 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4565 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4566 break;
5d9a6330 4567 case PCIE_LNK_X4:
cd474ba0
AD
4568 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4569 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4570 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4571 break;
5d9a6330 4572 case PCIE_LNK_X2:
cd474ba0
AD
4573 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4574 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4575 break;
5d9a6330 4576 case PCIE_LNK_X1:
cd474ba0
AD
4577 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4578 break;
4579 default:
4580 break;
4581 }
d0dd7f0c
AD
4582 }
4583 }
4584}
d38ceaf9 4585
361dbd01
AD
4586int amdgpu_device_baco_enter(struct drm_device *dev)
4587{
4588 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4589 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4590
4591 if (!amdgpu_device_supports_baco(adev->ddev))
4592 return -ENOTSUPP;
4593
7a22677b
LM
4594 if (ras && ras->supported)
4595 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4596
9530273e 4597 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4598}
4599
4600int amdgpu_device_baco_exit(struct drm_device *dev)
4601{
4602 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4603 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4604 int ret = 0;
361dbd01
AD
4605
4606 if (!amdgpu_device_supports_baco(adev->ddev))
4607 return -ENOTSUPP;
4608
9530273e
EQ
4609 ret = amdgpu_dpm_baco_exit(adev);
4610 if (ret)
4611 return ret;
7a22677b
LM
4612
4613 if (ras && ras->supported)
4614 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4615
4616 return 0;
361dbd01 4617}