drm/amdgpu: move discovery gfx config fetching
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 83
2dc80b00
S
84#define AMDGPU_RESUME_MS 2000
85
050091ab 86const char *amdgpu_asic_name[] = {
da69c161
KW
87 "TAHITI",
88 "PITCAIRN",
89 "VERDE",
90 "OLAND",
91 "HAINAN",
d38ceaf9
AD
92 "BONAIRE",
93 "KAVERI",
94 "KABINI",
95 "HAWAII",
96 "MULLINS",
97 "TOPAZ",
98 "TONGA",
48299f95 99 "FIJI",
d38ceaf9 100 "CARRIZO",
139f4917 101 "STONEY",
2cc0c0b5
FC
102 "POLARIS10",
103 "POLARIS11",
c4642a47 104 "POLARIS12",
48ff108d 105 "VEGAM",
d4196f01 106 "VEGA10",
8fab806a 107 "VEGA12",
956fcddc 108 "VEGA20",
2ca8a5d2 109 "RAVEN",
d6c3b24e 110 "ARCTURUS",
1eee4228 111 "RENOIR",
852a6626 112 "NAVI10",
87dbad02 113 "NAVI14",
9802f5d7 114 "NAVI12",
d38ceaf9
AD
115 "LAST",
116};
117
dcea6e65
KR
118/**
119 * DOC: pcie_replay_count
120 *
121 * The amdgpu driver provides a sysfs API for reporting the total number
122 * of PCIe replays (NAKs)
123 * The file pcie_replay_count is used for this and returns the total
124 * number of replays as a sum of the NAKs generated and NAKs received
125 */
126
127static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
128 struct device_attribute *attr, char *buf)
129{
130 struct drm_device *ddev = dev_get_drvdata(dev);
131 struct amdgpu_device *adev = ddev->dev_private;
132 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
133
134 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
135}
136
137static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
138 amdgpu_device_get_pcie_replay_count, NULL);
139
5494d864
AD
140static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
141
bd607166
KR
142/**
143 * DOC: product_name
144 *
145 * The amdgpu driver provides a sysfs API for reporting the product name
146 * for the device
147 * The file serial_number is used for this and returns the product name
148 * as returned from the FRU.
149 * NOTE: This is only available for certain server cards
150 */
151
152static ssize_t amdgpu_device_get_product_name(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
156 struct amdgpu_device *adev = ddev->dev_private;
157
158 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
159}
160
161static DEVICE_ATTR(product_name, S_IRUGO,
162 amdgpu_device_get_product_name, NULL);
163
164/**
165 * DOC: product_number
166 *
167 * The amdgpu driver provides a sysfs API for reporting the part number
168 * for the device
169 * The file serial_number is used for this and returns the part number
170 * as returned from the FRU.
171 * NOTE: This is only available for certain server cards
172 */
173
174static ssize_t amdgpu_device_get_product_number(struct device *dev,
175 struct device_attribute *attr, char *buf)
176{
177 struct drm_device *ddev = dev_get_drvdata(dev);
178 struct amdgpu_device *adev = ddev->dev_private;
179
180 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
181}
182
183static DEVICE_ATTR(product_number, S_IRUGO,
184 amdgpu_device_get_product_number, NULL);
185
186/**
187 * DOC: serial_number
188 *
189 * The amdgpu driver provides a sysfs API for reporting the serial number
190 * for the device
191 * The file serial_number is used for this and returns the serial number
192 * as returned from the FRU.
193 * NOTE: This is only available for certain server cards
194 */
195
196static ssize_t amdgpu_device_get_serial_number(struct device *dev,
197 struct device_attribute *attr, char *buf)
198{
199 struct drm_device *ddev = dev_get_drvdata(dev);
200 struct amdgpu_device *adev = ddev->dev_private;
201
202 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
203}
204
205static DEVICE_ATTR(serial_number, S_IRUGO,
206 amdgpu_device_get_serial_number, NULL);
207
e3ecdffa 208/**
31af062a 209 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
210 *
211 * @dev: drm_device pointer
212 *
213 * Returns true if the device is a dGPU with HG/PX power control,
214 * otherwise return false.
215 */
31af062a 216bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
217{
218 struct amdgpu_device *adev = dev->dev_private;
219
2f7d10b3 220 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
221 return true;
222 return false;
223}
224
a69cba42
AD
225/**
226 * amdgpu_device_supports_baco - Does the device support BACO
227 *
228 * @dev: drm_device pointer
229 *
230 * Returns true if the device supporte BACO,
231 * otherwise return false.
232 */
233bool amdgpu_device_supports_baco(struct drm_device *dev)
234{
235 struct amdgpu_device *adev = dev->dev_private;
236
237 return amdgpu_asic_supports_baco(adev);
238}
239
e35e2b11
TY
240/**
241 * VRAM access helper functions.
242 *
243 * amdgpu_device_vram_access - read/write a buffer in vram
244 *
245 * @adev: amdgpu_device pointer
246 * @pos: offset of the buffer in vram
247 * @buf: virtual address of the buffer in system memory
248 * @size: read/write size, sizeof(@buf) must > @size
249 * @write: true - write to vram, otherwise - read from vram
250 */
251void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
252 uint32_t *buf, size_t size, bool write)
253{
e35e2b11 254 unsigned long flags;
ce05ac56
CK
255 uint32_t hi = ~0;
256 uint64_t last;
257
9d11eb0d
CK
258
259#ifdef CONFIG_64BIT
260 last = min(pos + size, adev->gmc.visible_vram_size);
261 if (last > pos) {
262 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
263 size_t count = last - pos;
264
265 if (write) {
266 memcpy_toio(addr, buf, count);
267 mb();
268 amdgpu_asic_flush_hdp(adev, NULL);
269 } else {
270 amdgpu_asic_invalidate_hdp(adev, NULL);
271 mb();
272 memcpy_fromio(buf, addr, count);
273 }
274
275 if (count == size)
276 return;
277
278 pos += count;
279 buf += count / 4;
280 size -= count;
281 }
282#endif
283
ce05ac56
CK
284 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
285 for (last = pos + size; pos < last; pos += 4) {
286 uint32_t tmp = pos >> 31;
e35e2b11 287
e35e2b11 288 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
289 if (tmp != hi) {
290 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
291 hi = tmp;
292 }
e35e2b11
TY
293 if (write)
294 WREG32_NO_KIQ(mmMM_DATA, *buf++);
295 else
296 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 297 }
ce05ac56 298 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
299}
300
d38ceaf9 301/*
2eee0229 302 * device register access helper functions.
d38ceaf9 303 */
e3ecdffa 304/**
2eee0229 305 * amdgpu_device_rreg - read a register
e3ecdffa
AD
306 *
307 * @adev: amdgpu_device pointer
308 * @reg: dword aligned register offset
309 * @acc_flags: access flags which require special behavior
310 *
311 * Returns the 32 bit value from the offset specified.
312 */
2eee0229
HZ
313uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
314 uint32_t acc_flags)
d38ceaf9 315{
f4b373f4
TSD
316 uint32_t ret;
317
f384ff95 318 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 319 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 320
ec59847e 321 if ((reg * 4) < adev->rmmio_size)
f4b373f4 322 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
323 else
324 ret = adev->pcie_rreg(adev, (reg * 4));
325 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
f4b373f4 326 return ret;
d38ceaf9
AD
327}
328
421a2a30
ML
329/*
330 * MMIO register read with bytes helper functions
331 * @offset:bytes offset from MMIO start
332 *
333*/
334
e3ecdffa
AD
335/**
336 * amdgpu_mm_rreg8 - read a memory mapped IO register
337 *
338 * @adev: amdgpu_device pointer
339 * @offset: byte aligned register offset
340 *
341 * Returns the 8 bit value from the offset specified.
342 */
421a2a30
ML
343uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
344 if (offset < adev->rmmio_size)
345 return (readb(adev->rmmio + offset));
346 BUG();
347}
348
349/*
350 * MMIO register write with bytes helper functions
351 * @offset:bytes offset from MMIO start
352 * @value: the value want to be written to the register
353 *
354*/
e3ecdffa
AD
355/**
356 * amdgpu_mm_wreg8 - read a memory mapped IO register
357 *
358 * @adev: amdgpu_device pointer
359 * @offset: byte aligned register offset
360 * @value: 8 bit value to write
361 *
362 * Writes the value specified to the offset specified.
363 */
421a2a30
ML
364void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
365 if (offset < adev->rmmio_size)
366 writeb(value, adev->rmmio + offset);
367 else
368 BUG();
369}
370
2eee0229
HZ
371void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
372 uint32_t v, uint32_t acc_flags)
2e0cc4d4 373{
2eee0229 374 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 375
ec59847e 376 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 377 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
378 else
379 adev->pcie_wreg(adev, (reg * 4), v);
2e0cc4d4
ML
380}
381
e3ecdffa 382/**
2eee0229 383 * amdgpu_device_wreg - write to a register
e3ecdffa
AD
384 *
385 * @adev: amdgpu_device pointer
386 * @reg: dword aligned register offset
387 * @v: 32 bit value to write to the register
388 * @acc_flags: access flags which require special behavior
389 *
390 * Writes the value specified to the offset specified.
391 */
2eee0229
HZ
392void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
393 uint32_t acc_flags)
d38ceaf9 394{
f384ff95 395 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 396 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 397
2eee0229 398 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
2e0cc4d4 399}
d38ceaf9 400
2e0cc4d4
ML
401/*
402 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
403 *
404 * this function is invoked only the debugfs register access
405 * */
406void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
407 uint32_t acc_flags)
408{
409 if (amdgpu_sriov_fullaccess(adev) &&
410 adev->gfx.rlc.funcs &&
411 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 412
2e0cc4d4
ML
413 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
414 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 415 }
2e0cc4d4 416
2eee0229 417 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
d38ceaf9
AD
418}
419
e3ecdffa
AD
420/**
421 * amdgpu_io_rreg - read an IO register
422 *
423 * @adev: amdgpu_device pointer
424 * @reg: dword aligned register offset
425 *
426 * Returns the 32 bit value from the offset specified.
427 */
d38ceaf9
AD
428u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
429{
430 if ((reg * 4) < adev->rio_mem_size)
431 return ioread32(adev->rio_mem + (reg * 4));
432 else {
433 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
434 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
435 }
436}
437
e3ecdffa
AD
438/**
439 * amdgpu_io_wreg - write to an IO register
440 *
441 * @adev: amdgpu_device pointer
442 * @reg: dword aligned register offset
443 * @v: 32 bit value to write to the register
444 *
445 * Writes the value specified to the offset specified.
446 */
d38ceaf9
AD
447void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
448{
d38ceaf9
AD
449 if ((reg * 4) < adev->rio_mem_size)
450 iowrite32(v, adev->rio_mem + (reg * 4));
451 else {
452 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
453 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
454 }
455}
456
457/**
458 * amdgpu_mm_rdoorbell - read a doorbell dword
459 *
460 * @adev: amdgpu_device pointer
461 * @index: doorbell index
462 *
463 * Returns the value in the doorbell aperture at the
464 * requested doorbell index (CIK).
465 */
466u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
467{
468 if (index < adev->doorbell.num_doorbells) {
469 return readl(adev->doorbell.ptr + index);
470 } else {
471 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
472 return 0;
473 }
474}
475
476/**
477 * amdgpu_mm_wdoorbell - write a doorbell dword
478 *
479 * @adev: amdgpu_device pointer
480 * @index: doorbell index
481 * @v: value to write
482 *
483 * Writes @v to the doorbell aperture at the
484 * requested doorbell index (CIK).
485 */
486void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
487{
488 if (index < adev->doorbell.num_doorbells) {
489 writel(v, adev->doorbell.ptr + index);
490 } else {
491 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
492 }
493}
494
832be404
KW
495/**
496 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
497 *
498 * @adev: amdgpu_device pointer
499 * @index: doorbell index
500 *
501 * Returns the value in the doorbell aperture at the
502 * requested doorbell index (VEGA10+).
503 */
504u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
505{
506 if (index < adev->doorbell.num_doorbells) {
507 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
508 } else {
509 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
510 return 0;
511 }
512}
513
514/**
515 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
516 *
517 * @adev: amdgpu_device pointer
518 * @index: doorbell index
519 * @v: value to write
520 *
521 * Writes @v to the doorbell aperture at the
522 * requested doorbell index (VEGA10+).
523 */
524void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
525{
526 if (index < adev->doorbell.num_doorbells) {
527 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
528 } else {
529 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
530 }
531}
532
d38ceaf9
AD
533/**
534 * amdgpu_invalid_rreg - dummy reg read function
535 *
536 * @adev: amdgpu device pointer
537 * @reg: offset of register
538 *
539 * Dummy register read function. Used for register blocks
540 * that certain asics don't have (all asics).
541 * Returns the value in the register.
542 */
543static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
544{
545 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
546 BUG();
547 return 0;
548}
549
550/**
551 * amdgpu_invalid_wreg - dummy reg write function
552 *
553 * @adev: amdgpu device pointer
554 * @reg: offset of register
555 * @v: value to write to the register
556 *
557 * Dummy register read function. Used for register blocks
558 * that certain asics don't have (all asics).
559 */
560static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
561{
562 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
563 reg, v);
564 BUG();
565}
566
4fa1c6a6
TZ
567/**
568 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
569 *
570 * @adev: amdgpu device pointer
571 * @reg: offset of register
572 *
573 * Dummy register read function. Used for register blocks
574 * that certain asics don't have (all asics).
575 * Returns the value in the register.
576 */
577static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
578{
579 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
580 BUG();
581 return 0;
582}
583
584/**
585 * amdgpu_invalid_wreg64 - dummy reg write function
586 *
587 * @adev: amdgpu device pointer
588 * @reg: offset of register
589 * @v: value to write to the register
590 *
591 * Dummy register read function. Used for register blocks
592 * that certain asics don't have (all asics).
593 */
594static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
595{
596 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
597 reg, v);
598 BUG();
599}
600
d38ceaf9
AD
601/**
602 * amdgpu_block_invalid_rreg - dummy reg read function
603 *
604 * @adev: amdgpu device pointer
605 * @block: offset of instance
606 * @reg: offset of register
607 *
608 * Dummy register read function. Used for register blocks
609 * that certain asics don't have (all asics).
610 * Returns the value in the register.
611 */
612static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
613 uint32_t block, uint32_t reg)
614{
615 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
616 reg, block);
617 BUG();
618 return 0;
619}
620
621/**
622 * amdgpu_block_invalid_wreg - dummy reg write function
623 *
624 * @adev: amdgpu device pointer
625 * @block: offset of instance
626 * @reg: offset of register
627 * @v: value to write to the register
628 *
629 * Dummy register read function. Used for register blocks
630 * that certain asics don't have (all asics).
631 */
632static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
633 uint32_t block,
634 uint32_t reg, uint32_t v)
635{
636 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
637 reg, block, v);
638 BUG();
639}
640
e3ecdffa
AD
641/**
642 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
643 *
644 * @adev: amdgpu device pointer
645 *
646 * Allocates a scratch page of VRAM for use by various things in the
647 * driver.
648 */
06ec9070 649static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 650{
a4a02777
CK
651 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
652 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
653 &adev->vram_scratch.robj,
654 &adev->vram_scratch.gpu_addr,
655 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
656}
657
e3ecdffa
AD
658/**
659 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
660 *
661 * @adev: amdgpu device pointer
662 *
663 * Frees the VRAM scratch page.
664 */
06ec9070 665static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 666{
078af1a3 667 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
668}
669
670/**
9c3f2b54 671 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
672 *
673 * @adev: amdgpu_device pointer
674 * @registers: pointer to the register array
675 * @array_size: size of the register array
676 *
677 * Programs an array or registers with and and or masks.
678 * This is a helper for setting golden registers.
679 */
9c3f2b54
AD
680void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
681 const u32 *registers,
682 const u32 array_size)
d38ceaf9
AD
683{
684 u32 tmp, reg, and_mask, or_mask;
685 int i;
686
687 if (array_size % 3)
688 return;
689
690 for (i = 0; i < array_size; i +=3) {
691 reg = registers[i + 0];
692 and_mask = registers[i + 1];
693 or_mask = registers[i + 2];
694
695 if (and_mask == 0xffffffff) {
696 tmp = or_mask;
697 } else {
698 tmp = RREG32(reg);
699 tmp &= ~and_mask;
e0d07657
HZ
700 if (adev->family >= AMDGPU_FAMILY_AI)
701 tmp |= (or_mask & and_mask);
702 else
703 tmp |= or_mask;
d38ceaf9
AD
704 }
705 WREG32(reg, tmp);
706 }
707}
708
e3ecdffa
AD
709/**
710 * amdgpu_device_pci_config_reset - reset the GPU
711 *
712 * @adev: amdgpu_device pointer
713 *
714 * Resets the GPU using the pci config reset sequence.
715 * Only applicable to asics prior to vega10.
716 */
8111c387 717void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
718{
719 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
720}
721
722/*
723 * GPU doorbell aperture helpers function.
724 */
725/**
06ec9070 726 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
727 *
728 * @adev: amdgpu_device pointer
729 *
730 * Init doorbell driver information (CIK)
731 * Returns 0 on success, error on failure.
732 */
06ec9070 733static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 734{
6585661d 735
705e519e
CK
736 /* No doorbell on SI hardware generation */
737 if (adev->asic_type < CHIP_BONAIRE) {
738 adev->doorbell.base = 0;
739 adev->doorbell.size = 0;
740 adev->doorbell.num_doorbells = 0;
741 adev->doorbell.ptr = NULL;
742 return 0;
743 }
744
d6895ad3
CK
745 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
746 return -EINVAL;
747
22357775
AD
748 amdgpu_asic_init_doorbell_index(adev);
749
d38ceaf9
AD
750 /* doorbell bar mapping */
751 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
752 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
753
edf600da 754 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 755 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
756 if (adev->doorbell.num_doorbells == 0)
757 return -EINVAL;
758
ec3db8a6 759 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
760 * paging queue doorbell use the second page. The
761 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
762 * doorbells are in the first page. So with paging queue enabled,
763 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
764 */
765 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 766 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 767
8972e5d2
CK
768 adev->doorbell.ptr = ioremap(adev->doorbell.base,
769 adev->doorbell.num_doorbells *
770 sizeof(u32));
771 if (adev->doorbell.ptr == NULL)
d38ceaf9 772 return -ENOMEM;
d38ceaf9
AD
773
774 return 0;
775}
776
777/**
06ec9070 778 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
779 *
780 * @adev: amdgpu_device pointer
781 *
782 * Tear down doorbell driver information (CIK)
783 */
06ec9070 784static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
785{
786 iounmap(adev->doorbell.ptr);
787 adev->doorbell.ptr = NULL;
788}
789
22cb0164 790
d38ceaf9
AD
791
792/*
06ec9070 793 * amdgpu_device_wb_*()
455a7bc2 794 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 795 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
796 */
797
798/**
06ec9070 799 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
800 *
801 * @adev: amdgpu_device pointer
802 *
803 * Disables Writeback and frees the Writeback memory (all asics).
804 * Used at driver shutdown.
805 */
06ec9070 806static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
807{
808 if (adev->wb.wb_obj) {
a76ed485
AD
809 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
810 &adev->wb.gpu_addr,
811 (void **)&adev->wb.wb);
d38ceaf9
AD
812 adev->wb.wb_obj = NULL;
813 }
814}
815
816/**
06ec9070 817 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
818 *
819 * @adev: amdgpu_device pointer
820 *
455a7bc2 821 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
822 * Used at driver startup.
823 * Returns 0 on success or an -error on failure.
824 */
06ec9070 825static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
826{
827 int r;
828
829 if (adev->wb.wb_obj == NULL) {
97407b63
AD
830 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
831 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
832 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
833 &adev->wb.wb_obj, &adev->wb.gpu_addr,
834 (void **)&adev->wb.wb);
d38ceaf9
AD
835 if (r) {
836 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
837 return r;
838 }
d38ceaf9
AD
839
840 adev->wb.num_wb = AMDGPU_MAX_WB;
841 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
842
843 /* clear wb memory */
73469585 844 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
845 }
846
847 return 0;
848}
849
850/**
131b4b36 851 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
852 *
853 * @adev: amdgpu_device pointer
854 * @wb: wb index
855 *
856 * Allocate a wb slot for use by the driver (all asics).
857 * Returns 0 on success or -EINVAL on failure.
858 */
131b4b36 859int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
860{
861 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 862
97407b63 863 if (offset < adev->wb.num_wb) {
7014285a 864 __set_bit(offset, adev->wb.used);
63ae07ca 865 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
866 return 0;
867 } else {
868 return -EINVAL;
869 }
870}
871
d38ceaf9 872/**
131b4b36 873 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
874 *
875 * @adev: amdgpu_device pointer
876 * @wb: wb index
877 *
878 * Free a wb slot allocated for use by the driver (all asics)
879 */
131b4b36 880void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 881{
73469585 882 wb >>= 3;
d38ceaf9 883 if (wb < adev->wb.num_wb)
73469585 884 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
885}
886
d6895ad3
CK
887/**
888 * amdgpu_device_resize_fb_bar - try to resize FB BAR
889 *
890 * @adev: amdgpu_device pointer
891 *
892 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
893 * to fail, but if any of the BARs is not accessible after the size we abort
894 * driver loading by returning -ENODEV.
895 */
896int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
897{
770d13b1 898 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 899 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
900 struct pci_bus *root;
901 struct resource *res;
902 unsigned i;
d6895ad3
CK
903 u16 cmd;
904 int r;
905
0c03b912 906 /* Bypass for VF */
907 if (amdgpu_sriov_vf(adev))
908 return 0;
909
31b8adab
CK
910 /* Check if the root BUS has 64bit memory resources */
911 root = adev->pdev->bus;
912 while (root->parent)
913 root = root->parent;
914
915 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 916 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
917 res->start > 0x100000000ull)
918 break;
919 }
920
921 /* Trying to resize is pointless without a root hub window above 4GB */
922 if (!res)
923 return 0;
924
d6895ad3
CK
925 /* Disable memory decoding while we change the BAR addresses and size */
926 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
927 pci_write_config_word(adev->pdev, PCI_COMMAND,
928 cmd & ~PCI_COMMAND_MEMORY);
929
930 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 931 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
932 if (adev->asic_type >= CHIP_BONAIRE)
933 pci_release_resource(adev->pdev, 2);
934
935 pci_release_resource(adev->pdev, 0);
936
937 r = pci_resize_resource(adev->pdev, 0, rbar_size);
938 if (r == -ENOSPC)
939 DRM_INFO("Not enough PCI address space for a large BAR.");
940 else if (r && r != -ENOTSUPP)
941 DRM_ERROR("Problem resizing BAR0 (%d).", r);
942
943 pci_assign_unassigned_bus_resources(adev->pdev->bus);
944
945 /* When the doorbell or fb BAR isn't available we have no chance of
946 * using the device.
947 */
06ec9070 948 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
949 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
950 return -ENODEV;
951
952 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
953
954 return 0;
955}
a05502e5 956
d38ceaf9
AD
957/*
958 * GPU helpers function.
959 */
960/**
39c640c0 961 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
962 *
963 * @adev: amdgpu_device pointer
964 *
c836fec5
JQ
965 * Check if the asic has been initialized (all asics) at driver startup
966 * or post is needed if hw reset is performed.
967 * Returns true if need or false if not.
d38ceaf9 968 */
39c640c0 969bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
970{
971 uint32_t reg;
972
bec86378
ML
973 if (amdgpu_sriov_vf(adev))
974 return false;
975
976 if (amdgpu_passthrough(adev)) {
1da2c326
ML
977 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
978 * some old smc fw still need driver do vPost otherwise gpu hang, while
979 * those smc fw version above 22.15 doesn't have this flaw, so we force
980 * vpost executed for smc version below 22.15
bec86378
ML
981 */
982 if (adev->asic_type == CHIP_FIJI) {
983 int err;
984 uint32_t fw_ver;
985 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
986 /* force vPost if error occured */
987 if (err)
988 return true;
989
990 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
991 if (fw_ver < 0x00160e00)
992 return true;
bec86378 993 }
bec86378 994 }
91fe77eb 995
996 if (adev->has_hw_reset) {
997 adev->has_hw_reset = false;
998 return true;
999 }
1000
1001 /* bios scratch used on CIK+ */
1002 if (adev->asic_type >= CHIP_BONAIRE)
1003 return amdgpu_atombios_scratch_need_asic_init(adev);
1004
1005 /* check MEM_SIZE for older asics */
1006 reg = amdgpu_asic_get_config_memsize(adev);
1007
1008 if ((reg != 0) && (reg != 0xffffffff))
1009 return false;
1010
1011 return true;
bec86378
ML
1012}
1013
d38ceaf9
AD
1014/* if we get transitioned to only one device, take VGA back */
1015/**
06ec9070 1016 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1017 *
1018 * @cookie: amdgpu_device pointer
1019 * @state: enable/disable vga decode
1020 *
1021 * Enable/disable vga decode (all asics).
1022 * Returns VGA resource flags.
1023 */
06ec9070 1024static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1025{
1026 struct amdgpu_device *adev = cookie;
1027 amdgpu_asic_set_vga_state(adev, state);
1028 if (state)
1029 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1030 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1031 else
1032 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1033}
1034
e3ecdffa
AD
1035/**
1036 * amdgpu_device_check_block_size - validate the vm block size
1037 *
1038 * @adev: amdgpu_device pointer
1039 *
1040 * Validates the vm block size specified via module parameter.
1041 * The vm block size defines number of bits in page table versus page directory,
1042 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1043 * page table and the remaining bits are in the page directory.
1044 */
06ec9070 1045static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1046{
1047 /* defines number of bits in page table versus page directory,
1048 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1049 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1050 if (amdgpu_vm_block_size == -1)
1051 return;
a1adf8be 1052
bab4fee7 1053 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1054 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1055 amdgpu_vm_block_size);
97489129 1056 amdgpu_vm_block_size = -1;
a1adf8be 1057 }
a1adf8be
CZ
1058}
1059
e3ecdffa
AD
1060/**
1061 * amdgpu_device_check_vm_size - validate the vm size
1062 *
1063 * @adev: amdgpu_device pointer
1064 *
1065 * Validates the vm size in GB specified via module parameter.
1066 * The VM size is the size of the GPU virtual memory space in GB.
1067 */
06ec9070 1068static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1069{
64dab074
AD
1070 /* no need to check the default value */
1071 if (amdgpu_vm_size == -1)
1072 return;
1073
83ca145d
ZJ
1074 if (amdgpu_vm_size < 1) {
1075 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1076 amdgpu_vm_size);
f3368128 1077 amdgpu_vm_size = -1;
83ca145d 1078 }
83ca145d
ZJ
1079}
1080
7951e376
RZ
1081static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1082{
1083 struct sysinfo si;
a9d4fe2f 1084 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1085 uint64_t total_memory;
1086 uint64_t dram_size_seven_GB = 0x1B8000000;
1087 uint64_t dram_size_three_GB = 0xB8000000;
1088
1089 if (amdgpu_smu_memory_pool_size == 0)
1090 return;
1091
1092 if (!is_os_64) {
1093 DRM_WARN("Not 64-bit OS, feature not supported\n");
1094 goto def_value;
1095 }
1096 si_meminfo(&si);
1097 total_memory = (uint64_t)si.totalram * si.mem_unit;
1098
1099 if ((amdgpu_smu_memory_pool_size == 1) ||
1100 (amdgpu_smu_memory_pool_size == 2)) {
1101 if (total_memory < dram_size_three_GB)
1102 goto def_value1;
1103 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1104 (amdgpu_smu_memory_pool_size == 8)) {
1105 if (total_memory < dram_size_seven_GB)
1106 goto def_value1;
1107 } else {
1108 DRM_WARN("Smu memory pool size not supported\n");
1109 goto def_value;
1110 }
1111 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1112
1113 return;
1114
1115def_value1:
1116 DRM_WARN("No enough system memory\n");
1117def_value:
1118 adev->pm.smu_prv_buffer_size = 0;
1119}
1120
d38ceaf9 1121/**
06ec9070 1122 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1123 *
1124 * @adev: amdgpu_device pointer
1125 *
1126 * Validates certain module parameters and updates
1127 * the associated values used by the driver (all asics).
1128 */
912dfc84 1129static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1130{
5b011235
CZ
1131 if (amdgpu_sched_jobs < 4) {
1132 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1133 amdgpu_sched_jobs);
1134 amdgpu_sched_jobs = 4;
76117507 1135 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1136 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1137 amdgpu_sched_jobs);
1138 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1139 }
d38ceaf9 1140
83e74db6 1141 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1142 /* gart size must be greater or equal to 32M */
1143 dev_warn(adev->dev, "gart size (%d) too small\n",
1144 amdgpu_gart_size);
83e74db6 1145 amdgpu_gart_size = -1;
d38ceaf9
AD
1146 }
1147
36d38372 1148 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1149 /* gtt size must be greater or equal to 32M */
36d38372
CK
1150 dev_warn(adev->dev, "gtt size (%d) too small\n",
1151 amdgpu_gtt_size);
1152 amdgpu_gtt_size = -1;
d38ceaf9
AD
1153 }
1154
d07f14be
RH
1155 /* valid range is between 4 and 9 inclusive */
1156 if (amdgpu_vm_fragment_size != -1 &&
1157 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1158 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1159 amdgpu_vm_fragment_size = -1;
1160 }
1161
7951e376
RZ
1162 amdgpu_device_check_smu_prv_buffer_size(adev);
1163
06ec9070 1164 amdgpu_device_check_vm_size(adev);
d38ceaf9 1165
06ec9070 1166 amdgpu_device_check_block_size(adev);
6a7f76e7 1167
19aede77 1168 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1169
c6252390 1170 amdgpu_gmc_tmz_set(adev);
01a8dcec 1171
e3c00faa 1172 return 0;
d38ceaf9
AD
1173}
1174
1175/**
1176 * amdgpu_switcheroo_set_state - set switcheroo state
1177 *
1178 * @pdev: pci dev pointer
1694467b 1179 * @state: vga_switcheroo state
d38ceaf9
AD
1180 *
1181 * Callback for the switcheroo driver. Suspends or resumes the
1182 * the asics before or after it is powered up using ACPI methods.
1183 */
1184static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1185{
1186 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1187 int r;
d38ceaf9 1188
31af062a 1189 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1190 return;
1191
1192 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1193 pr_info("switched on\n");
d38ceaf9
AD
1194 /* don't suspend or resume card normally */
1195 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1196
de185019
AD
1197 pci_set_power_state(dev->pdev, PCI_D0);
1198 pci_restore_state(dev->pdev);
1199 r = pci_enable_device(dev->pdev);
1200 if (r)
1201 DRM_WARN("pci_enable_device failed (%d)\n", r);
1202 amdgpu_device_resume(dev, true);
d38ceaf9 1203
d38ceaf9
AD
1204 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1205 drm_kms_helper_poll_enable(dev);
1206 } else {
dd4fa6c1 1207 pr_info("switched off\n");
d38ceaf9
AD
1208 drm_kms_helper_poll_disable(dev);
1209 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1210 amdgpu_device_suspend(dev, true);
1211 pci_save_state(dev->pdev);
1212 /* Shut down the device */
1213 pci_disable_device(dev->pdev);
1214 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1215 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1216 }
1217}
1218
1219/**
1220 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1221 *
1222 * @pdev: pci dev pointer
1223 *
1224 * Callback for the switcheroo driver. Check of the switcheroo
1225 * state can be changed.
1226 * Returns true if the state can be changed, false if not.
1227 */
1228static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1229{
1230 struct drm_device *dev = pci_get_drvdata(pdev);
1231
1232 /*
1233 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1234 * locking inversion with the driver load path. And the access here is
1235 * completely racy anyway. So don't bother with locking for now.
1236 */
7e13ad89 1237 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1238}
1239
1240static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1241 .set_gpu_state = amdgpu_switcheroo_set_state,
1242 .reprobe = NULL,
1243 .can_switch = amdgpu_switcheroo_can_switch,
1244};
1245
e3ecdffa
AD
1246/**
1247 * amdgpu_device_ip_set_clockgating_state - set the CG state
1248 *
87e3f136 1249 * @dev: amdgpu_device pointer
e3ecdffa
AD
1250 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1251 * @state: clockgating state (gate or ungate)
1252 *
1253 * Sets the requested clockgating state for all instances of
1254 * the hardware IP specified.
1255 * Returns the error code from the last instance.
1256 */
43fa561f 1257int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1258 enum amd_ip_block_type block_type,
1259 enum amd_clockgating_state state)
d38ceaf9 1260{
43fa561f 1261 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1262 int i, r = 0;
1263
1264 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1265 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1266 continue;
c722865a
RZ
1267 if (adev->ip_blocks[i].version->type != block_type)
1268 continue;
1269 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1270 continue;
1271 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1272 (void *)adev, state);
1273 if (r)
1274 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1275 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1276 }
1277 return r;
1278}
1279
e3ecdffa
AD
1280/**
1281 * amdgpu_device_ip_set_powergating_state - set the PG state
1282 *
87e3f136 1283 * @dev: amdgpu_device pointer
e3ecdffa
AD
1284 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1285 * @state: powergating state (gate or ungate)
1286 *
1287 * Sets the requested powergating state for all instances of
1288 * the hardware IP specified.
1289 * Returns the error code from the last instance.
1290 */
43fa561f 1291int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1292 enum amd_ip_block_type block_type,
1293 enum amd_powergating_state state)
d38ceaf9 1294{
43fa561f 1295 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1296 int i, r = 0;
1297
1298 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1299 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1300 continue;
c722865a
RZ
1301 if (adev->ip_blocks[i].version->type != block_type)
1302 continue;
1303 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1304 continue;
1305 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1306 (void *)adev, state);
1307 if (r)
1308 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1309 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1310 }
1311 return r;
1312}
1313
e3ecdffa
AD
1314/**
1315 * amdgpu_device_ip_get_clockgating_state - get the CG state
1316 *
1317 * @adev: amdgpu_device pointer
1318 * @flags: clockgating feature flags
1319 *
1320 * Walks the list of IPs on the device and updates the clockgating
1321 * flags for each IP.
1322 * Updates @flags with the feature flags for each hardware IP where
1323 * clockgating is enabled.
1324 */
2990a1fc
AD
1325void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1326 u32 *flags)
6cb2d4e4
HR
1327{
1328 int i;
1329
1330 for (i = 0; i < adev->num_ip_blocks; i++) {
1331 if (!adev->ip_blocks[i].status.valid)
1332 continue;
1333 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1334 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1335 }
1336}
1337
e3ecdffa
AD
1338/**
1339 * amdgpu_device_ip_wait_for_idle - wait for idle
1340 *
1341 * @adev: amdgpu_device pointer
1342 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1343 *
1344 * Waits for the request hardware IP to be idle.
1345 * Returns 0 for success or a negative error code on failure.
1346 */
2990a1fc
AD
1347int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1348 enum amd_ip_block_type block_type)
5dbbb60b
AD
1349{
1350 int i, r;
1351
1352 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1353 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1354 continue;
a1255107
AD
1355 if (adev->ip_blocks[i].version->type == block_type) {
1356 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1357 if (r)
1358 return r;
1359 break;
1360 }
1361 }
1362 return 0;
1363
1364}
1365
e3ecdffa
AD
1366/**
1367 * amdgpu_device_ip_is_idle - is the hardware IP idle
1368 *
1369 * @adev: amdgpu_device pointer
1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371 *
1372 * Check if the hardware IP is idle or not.
1373 * Returns true if it the IP is idle, false if not.
1374 */
2990a1fc
AD
1375bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1376 enum amd_ip_block_type block_type)
5dbbb60b
AD
1377{
1378 int i;
1379
1380 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1381 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1382 continue;
a1255107
AD
1383 if (adev->ip_blocks[i].version->type == block_type)
1384 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1385 }
1386 return true;
1387
1388}
1389
e3ecdffa
AD
1390/**
1391 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1392 *
1393 * @adev: amdgpu_device pointer
87e3f136 1394 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1395 *
1396 * Returns a pointer to the hardware IP block structure
1397 * if it exists for the asic, otherwise NULL.
1398 */
2990a1fc
AD
1399struct amdgpu_ip_block *
1400amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1401 enum amd_ip_block_type type)
d38ceaf9
AD
1402{
1403 int i;
1404
1405 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1406 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1407 return &adev->ip_blocks[i];
1408
1409 return NULL;
1410}
1411
1412/**
2990a1fc 1413 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1414 *
1415 * @adev: amdgpu_device pointer
5fc3aeeb 1416 * @type: enum amd_ip_block_type
d38ceaf9
AD
1417 * @major: major version
1418 * @minor: minor version
1419 *
1420 * return 0 if equal or greater
1421 * return 1 if smaller or the ip_block doesn't exist
1422 */
2990a1fc
AD
1423int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1424 enum amd_ip_block_type type,
1425 u32 major, u32 minor)
d38ceaf9 1426{
2990a1fc 1427 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1428
a1255107
AD
1429 if (ip_block && ((ip_block->version->major > major) ||
1430 ((ip_block->version->major == major) &&
1431 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1432 return 0;
1433
1434 return 1;
1435}
1436
a1255107 1437/**
2990a1fc 1438 * amdgpu_device_ip_block_add
a1255107
AD
1439 *
1440 * @adev: amdgpu_device pointer
1441 * @ip_block_version: pointer to the IP to add
1442 *
1443 * Adds the IP block driver information to the collection of IPs
1444 * on the asic.
1445 */
2990a1fc
AD
1446int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1447 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1448{
1449 if (!ip_block_version)
1450 return -EINVAL;
1451
e966a725 1452 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1453 ip_block_version->funcs->name);
1454
a1255107
AD
1455 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1456
1457 return 0;
1458}
1459
e3ecdffa
AD
1460/**
1461 * amdgpu_device_enable_virtual_display - enable virtual display feature
1462 *
1463 * @adev: amdgpu_device pointer
1464 *
1465 * Enabled the virtual display feature if the user has enabled it via
1466 * the module parameter virtual_display. This feature provides a virtual
1467 * display hardware on headless boards or in virtualized environments.
1468 * This function parses and validates the configuration string specified by
1469 * the user and configues the virtual display configuration (number of
1470 * virtual connectors, crtcs, etc.) specified.
1471 */
483ef985 1472static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1473{
1474 adev->enable_virtual_display = false;
1475
1476 if (amdgpu_virtual_display) {
1477 struct drm_device *ddev = adev->ddev;
1478 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1479 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1480
1481 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1482 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1483 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1484 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1485 if (!strcmp("all", pciaddname)
1486 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1487 long num_crtc;
1488 int res = -1;
1489
9accf2fd 1490 adev->enable_virtual_display = true;
0f66356d
ED
1491
1492 if (pciaddname_tmp)
1493 res = kstrtol(pciaddname_tmp, 10,
1494 &num_crtc);
1495
1496 if (!res) {
1497 if (num_crtc < 1)
1498 num_crtc = 1;
1499 if (num_crtc > 6)
1500 num_crtc = 6;
1501 adev->mode_info.num_crtc = num_crtc;
1502 } else {
1503 adev->mode_info.num_crtc = 1;
1504 }
9accf2fd
ED
1505 break;
1506 }
1507 }
1508
0f66356d
ED
1509 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1510 amdgpu_virtual_display, pci_address_name,
1511 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1512
1513 kfree(pciaddstr);
1514 }
1515}
1516
e3ecdffa
AD
1517/**
1518 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1519 *
1520 * @adev: amdgpu_device pointer
1521 *
1522 * Parses the asic configuration parameters specified in the gpu info
1523 * firmware and makes them availale to the driver for use in configuring
1524 * the asic.
1525 * Returns 0 on success, -EINVAL on failure.
1526 */
e2a75f88
AD
1527static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1528{
e2a75f88
AD
1529 const char *chip_name;
1530 char fw_name[30];
1531 int err;
1532 const struct gpu_info_firmware_header_v1_0 *hdr;
1533
ab4fe3e1
HR
1534 adev->firmware.gpu_info_fw = NULL;
1535
e2a75f88
AD
1536 switch (adev->asic_type) {
1537 case CHIP_TOPAZ:
1538 case CHIP_TONGA:
1539 case CHIP_FIJI:
e2a75f88 1540 case CHIP_POLARIS10:
cc07f18d 1541 case CHIP_POLARIS11:
e2a75f88 1542 case CHIP_POLARIS12:
cc07f18d 1543 case CHIP_VEGAM:
e2a75f88
AD
1544 case CHIP_CARRIZO:
1545 case CHIP_STONEY:
1546#ifdef CONFIG_DRM_AMDGPU_SI
1547 case CHIP_VERDE:
1548 case CHIP_TAHITI:
1549 case CHIP_PITCAIRN:
1550 case CHIP_OLAND:
1551 case CHIP_HAINAN:
1552#endif
1553#ifdef CONFIG_DRM_AMDGPU_CIK
1554 case CHIP_BONAIRE:
1555 case CHIP_HAWAII:
1556 case CHIP_KAVERI:
1557 case CHIP_KABINI:
1558 case CHIP_MULLINS:
1559#endif
27c0bc71 1560 case CHIP_VEGA20:
e2a75f88
AD
1561 default:
1562 return 0;
1563 case CHIP_VEGA10:
1564 chip_name = "vega10";
1565 break;
3f76dced
AD
1566 case CHIP_VEGA12:
1567 chip_name = "vega12";
1568 break;
2d2e5e7e 1569 case CHIP_RAVEN:
54c4d17e
FX
1570 if (adev->rev_id >= 8)
1571 chip_name = "raven2";
741deade
AD
1572 else if (adev->pdev->device == 0x15d8)
1573 chip_name = "picasso";
54c4d17e
FX
1574 else
1575 chip_name = "raven";
2d2e5e7e 1576 break;
65e60f6e
LM
1577 case CHIP_ARCTURUS:
1578 chip_name = "arcturus";
1579 break;
b51a26a0
HR
1580 case CHIP_RENOIR:
1581 chip_name = "renoir";
1582 break;
23c6268e
HR
1583 case CHIP_NAVI10:
1584 chip_name = "navi10";
1585 break;
ed42cfe1
XY
1586 case CHIP_NAVI14:
1587 chip_name = "navi14";
1588 break;
42b325e5
XY
1589 case CHIP_NAVI12:
1590 chip_name = "navi12";
1591 break;
e2a75f88
AD
1592 }
1593
1594 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1595 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1596 if (err) {
1597 dev_err(adev->dev,
1598 "Failed to load gpu_info firmware \"%s\"\n",
1599 fw_name);
1600 goto out;
1601 }
ab4fe3e1 1602 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1603 if (err) {
1604 dev_err(adev->dev,
1605 "Failed to validate gpu_info firmware \"%s\"\n",
1606 fw_name);
1607 goto out;
1608 }
1609
ab4fe3e1 1610 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1611 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1612
1613 switch (hdr->version_major) {
1614 case 1:
1615 {
1616 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1617 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1618 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1619
6ba57b7a
AD
1620 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
1621 amdgpu_discovery_get_gfx_info(adev);
ec51d3fa 1622 goto parse_soc_bounding_box;
6ba57b7a 1623 }
ec51d3fa 1624
b5ab16bf
AD
1625 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1626 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1627 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1628 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1629 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1630 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1631 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1632 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1633 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1634 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1635 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1636 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1637 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1638 adev->gfx.cu_info.max_waves_per_simd =
1639 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1640 adev->gfx.cu_info.max_scratch_slots_per_cu =
1641 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1642 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1643 if (hdr->version_minor >= 1) {
35c2e910
HZ
1644 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1645 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1646 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1647 adev->gfx.config.num_sc_per_sh =
1648 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1649 adev->gfx.config.num_packer_per_sc =
1650 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1651 }
ec51d3fa
XY
1652
1653parse_soc_bounding_box:
ec51d3fa
XY
1654 /*
1655 * soc bounding box info is not integrated in disocovery table,
1656 * we always need to parse it from gpu info firmware.
1657 */
48321c3d
HW
1658 if (hdr->version_minor == 2) {
1659 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1660 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1661 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1662 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1663 }
e2a75f88
AD
1664 break;
1665 }
1666 default:
1667 dev_err(adev->dev,
1668 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1669 err = -EINVAL;
1670 goto out;
1671 }
1672out:
e2a75f88
AD
1673 return err;
1674}
1675
e3ecdffa
AD
1676/**
1677 * amdgpu_device_ip_early_init - run early init for hardware IPs
1678 *
1679 * @adev: amdgpu_device pointer
1680 *
1681 * Early initialization pass for hardware IPs. The hardware IPs that make
1682 * up each asic are discovered each IP's early_init callback is run. This
1683 * is the first stage in initializing the asic.
1684 * Returns 0 on success, negative error code on failure.
1685 */
06ec9070 1686static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1687{
aaa36a97 1688 int i, r;
d38ceaf9 1689
483ef985 1690 amdgpu_device_enable_virtual_display(adev);
a6be7570 1691
d38ceaf9 1692 switch (adev->asic_type) {
aaa36a97
AD
1693 case CHIP_TOPAZ:
1694 case CHIP_TONGA:
48299f95 1695 case CHIP_FIJI:
2cc0c0b5 1696 case CHIP_POLARIS10:
32cc7e53 1697 case CHIP_POLARIS11:
c4642a47 1698 case CHIP_POLARIS12:
32cc7e53 1699 case CHIP_VEGAM:
aaa36a97 1700 case CHIP_CARRIZO:
39bb0c92
SL
1701 case CHIP_STONEY:
1702 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1703 adev->family = AMDGPU_FAMILY_CZ;
1704 else
1705 adev->family = AMDGPU_FAMILY_VI;
1706
1707 r = vi_set_ip_blocks(adev);
1708 if (r)
1709 return r;
1710 break;
33f34802
KW
1711#ifdef CONFIG_DRM_AMDGPU_SI
1712 case CHIP_VERDE:
1713 case CHIP_TAHITI:
1714 case CHIP_PITCAIRN:
1715 case CHIP_OLAND:
1716 case CHIP_HAINAN:
295d0daf 1717 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1718 r = si_set_ip_blocks(adev);
1719 if (r)
1720 return r;
1721 break;
1722#endif
a2e73f56
AD
1723#ifdef CONFIG_DRM_AMDGPU_CIK
1724 case CHIP_BONAIRE:
1725 case CHIP_HAWAII:
1726 case CHIP_KAVERI:
1727 case CHIP_KABINI:
1728 case CHIP_MULLINS:
1729 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1730 adev->family = AMDGPU_FAMILY_CI;
1731 else
1732 adev->family = AMDGPU_FAMILY_KV;
1733
1734 r = cik_set_ip_blocks(adev);
1735 if (r)
1736 return r;
1737 break;
1738#endif
e48a3cd9
AD
1739 case CHIP_VEGA10:
1740 case CHIP_VEGA12:
e4bd8170 1741 case CHIP_VEGA20:
e48a3cd9 1742 case CHIP_RAVEN:
61cf44c1 1743 case CHIP_ARCTURUS:
b51a26a0
HR
1744 case CHIP_RENOIR:
1745 if (adev->asic_type == CHIP_RAVEN ||
1746 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1747 adev->family = AMDGPU_FAMILY_RV;
1748 else
1749 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1750
1751 r = soc15_set_ip_blocks(adev);
1752 if (r)
1753 return r;
1754 break;
0a5b8c7b 1755 case CHIP_NAVI10:
7ecb5cd4 1756 case CHIP_NAVI14:
4808cf9c 1757 case CHIP_NAVI12:
0a5b8c7b
HR
1758 adev->family = AMDGPU_FAMILY_NV;
1759
1760 r = nv_set_ip_blocks(adev);
1761 if (r)
1762 return r;
1763 break;
d38ceaf9
AD
1764 default:
1765 /* FIXME: not supported yet */
1766 return -EINVAL;
1767 }
1768
e2a75f88
AD
1769 r = amdgpu_device_parse_gpu_info_fw(adev);
1770 if (r)
1771 return r;
1772
1884734a 1773 amdgpu_amdkfd_device_probe(adev);
1774
3149d9da 1775 if (amdgpu_sriov_vf(adev)) {
122078de
ML
1776 /* handle vbios stuff prior full access mode for new handshake */
1777 if (adev->virt.req_init_data_ver == 1) {
1778 if (!amdgpu_get_bios(adev)) {
1779 DRM_ERROR("failed to get vbios\n");
1780 return -EINVAL;
1781 }
1782
1783 r = amdgpu_atombios_init(adev);
1784 if (r) {
1785 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1786 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1787 return r;
1788 }
1789 }
2f294132 1790 }
122078de 1791
2f294132
ML
1792 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1793 * will not be prepared by host for this VF */
1794 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
3149d9da
XY
1795 r = amdgpu_virt_request_full_gpu(adev, true);
1796 if (r)
2f294132 1797 return r;
3149d9da
XY
1798 }
1799
3b94fb10 1800 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1801 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1802 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1803
d38ceaf9
AD
1804 for (i = 0; i < adev->num_ip_blocks; i++) {
1805 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1806 DRM_ERROR("disabled ip block: %d <%s>\n",
1807 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1808 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1809 } else {
a1255107
AD
1810 if (adev->ip_blocks[i].version->funcs->early_init) {
1811 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1812 if (r == -ENOENT) {
a1255107 1813 adev->ip_blocks[i].status.valid = false;
2c1a2784 1814 } else if (r) {
a1255107
AD
1815 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1816 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1817 return r;
2c1a2784 1818 } else {
a1255107 1819 adev->ip_blocks[i].status.valid = true;
2c1a2784 1820 }
974e6b64 1821 } else {
a1255107 1822 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1823 }
d38ceaf9 1824 }
21a249ca
AD
1825 /* get the vbios after the asic_funcs are set up */
1826 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
122078de
ML
1827 /* skip vbios handling for new handshake */
1828 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1829 continue;
1830
21a249ca
AD
1831 /* Read BIOS */
1832 if (!amdgpu_get_bios(adev))
1833 return -EINVAL;
1834
1835 r = amdgpu_atombios_init(adev);
1836 if (r) {
1837 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1838 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1839 return r;
1840 }
1841 }
d38ceaf9
AD
1842 }
1843
395d1fb9
NH
1844 adev->cg_flags &= amdgpu_cg_mask;
1845 adev->pg_flags &= amdgpu_pg_mask;
1846
d38ceaf9
AD
1847 return 0;
1848}
1849
0a4f2520
RZ
1850static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1851{
1852 int i, r;
1853
1854 for (i = 0; i < adev->num_ip_blocks; i++) {
1855 if (!adev->ip_blocks[i].status.sw)
1856 continue;
1857 if (adev->ip_blocks[i].status.hw)
1858 continue;
1859 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1860 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1861 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1862 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1863 if (r) {
1864 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1865 adev->ip_blocks[i].version->funcs->name, r);
1866 return r;
1867 }
1868 adev->ip_blocks[i].status.hw = true;
1869 }
1870 }
1871
1872 return 0;
1873}
1874
1875static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1876{
1877 int i, r;
1878
1879 for (i = 0; i < adev->num_ip_blocks; i++) {
1880 if (!adev->ip_blocks[i].status.sw)
1881 continue;
1882 if (adev->ip_blocks[i].status.hw)
1883 continue;
1884 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1885 if (r) {
1886 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1887 adev->ip_blocks[i].version->funcs->name, r);
1888 return r;
1889 }
1890 adev->ip_blocks[i].status.hw = true;
1891 }
1892
1893 return 0;
1894}
1895
7a3e0bb2
RZ
1896static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1897{
1898 int r = 0;
1899 int i;
80f41f84 1900 uint32_t smu_version;
7a3e0bb2
RZ
1901
1902 if (adev->asic_type >= CHIP_VEGA10) {
1903 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1904 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1905 continue;
1906
1907 /* no need to do the fw loading again if already done*/
1908 if (adev->ip_blocks[i].status.hw == true)
1909 break;
1910
1911 if (adev->in_gpu_reset || adev->in_suspend) {
1912 r = adev->ip_blocks[i].version->funcs->resume(adev);
1913 if (r) {
1914 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1915 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1916 return r;
1917 }
1918 } else {
1919 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1920 if (r) {
1921 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1922 adev->ip_blocks[i].version->funcs->name, r);
1923 return r;
7a3e0bb2 1924 }
7a3e0bb2 1925 }
482f0e53
ML
1926
1927 adev->ip_blocks[i].status.hw = true;
1928 break;
7a3e0bb2
RZ
1929 }
1930 }
482f0e53 1931
8973d9ec
ED
1932 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1933 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1934
80f41f84 1935 return r;
7a3e0bb2
RZ
1936}
1937
e3ecdffa
AD
1938/**
1939 * amdgpu_device_ip_init - run init for hardware IPs
1940 *
1941 * @adev: amdgpu_device pointer
1942 *
1943 * Main initialization pass for hardware IPs. The list of all the hardware
1944 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1945 * are run. sw_init initializes the software state associated with each IP
1946 * and hw_init initializes the hardware associated with each IP.
1947 * Returns 0 on success, negative error code on failure.
1948 */
06ec9070 1949static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1950{
1951 int i, r;
1952
c030f2e4 1953 r = amdgpu_ras_init(adev);
1954 if (r)
1955 return r;
1956
2f294132
ML
1957 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1958 r = amdgpu_virt_request_full_gpu(adev, true);
1959 if (r)
1960 return -EAGAIN;
1961 }
1962
d38ceaf9 1963 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1964 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1965 continue;
a1255107 1966 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1967 if (r) {
a1255107
AD
1968 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1969 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1970 goto init_failed;
2c1a2784 1971 }
a1255107 1972 adev->ip_blocks[i].status.sw = true;
bfca0289 1973
d38ceaf9 1974 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1975 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1976 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1977 if (r) {
1978 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1979 goto init_failed;
2c1a2784 1980 }
a1255107 1981 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1982 if (r) {
1983 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1984 goto init_failed;
2c1a2784 1985 }
06ec9070 1986 r = amdgpu_device_wb_init(adev);
2c1a2784 1987 if (r) {
06ec9070 1988 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1989 goto init_failed;
2c1a2784 1990 }
a1255107 1991 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1992
1993 /* right after GMC hw init, we create CSA */
f92d5c61 1994 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1995 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1996 AMDGPU_GEM_DOMAIN_VRAM,
1997 AMDGPU_CSA_SIZE);
2493664f
ML
1998 if (r) {
1999 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2000 goto init_failed;
2493664f
ML
2001 }
2002 }
d38ceaf9
AD
2003 }
2004 }
2005
c9ffa427
YT
2006 if (amdgpu_sriov_vf(adev))
2007 amdgpu_virt_init_data_exchange(adev);
2008
533aed27
AG
2009 r = amdgpu_ib_pool_init(adev);
2010 if (r) {
2011 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2012 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2013 goto init_failed;
2014 }
2015
c8963ea4
RZ
2016 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2017 if (r)
72d3f592 2018 goto init_failed;
0a4f2520
RZ
2019
2020 r = amdgpu_device_ip_hw_init_phase1(adev);
2021 if (r)
72d3f592 2022 goto init_failed;
0a4f2520 2023
7a3e0bb2
RZ
2024 r = amdgpu_device_fw_loading(adev);
2025 if (r)
72d3f592 2026 goto init_failed;
7a3e0bb2 2027
0a4f2520
RZ
2028 r = amdgpu_device_ip_hw_init_phase2(adev);
2029 if (r)
72d3f592 2030 goto init_failed;
d38ceaf9 2031
121a2bc6
AG
2032 /*
2033 * retired pages will be loaded from eeprom and reserved here,
2034 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2035 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2036 * for I2C communication which only true at this point.
2037 * recovery_init may fail, but it can free all resources allocated by
2038 * itself and its failure should not stop amdgpu init process.
2039 *
2040 * Note: theoretically, this should be called before all vram allocations
2041 * to protect retired page from abusing
2042 */
2043 amdgpu_ras_recovery_init(adev);
2044
3e2e2ab5
HZ
2045 if (adev->gmc.xgmi.num_physical_nodes > 1)
2046 amdgpu_xgmi_add_device(adev);
1884734a 2047 amdgpu_amdkfd_device_init(adev);
c6332b97 2048
bd607166
KR
2049 amdgpu_fru_get_product_info(adev);
2050
72d3f592 2051init_failed:
c9ffa427 2052 if (amdgpu_sriov_vf(adev))
c6332b97 2053 amdgpu_virt_release_full_gpu(adev, true);
2054
72d3f592 2055 return r;
d38ceaf9
AD
2056}
2057
e3ecdffa
AD
2058/**
2059 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2060 *
2061 * @adev: amdgpu_device pointer
2062 *
2063 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2064 * this function before a GPU reset. If the value is retained after a
2065 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2066 */
06ec9070 2067static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2068{
2069 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2070}
2071
e3ecdffa
AD
2072/**
2073 * amdgpu_device_check_vram_lost - check if vram is valid
2074 *
2075 * @adev: amdgpu_device pointer
2076 *
2077 * Checks the reset magic value written to the gart pointer in VRAM.
2078 * The driver calls this after a GPU reset to see if the contents of
2079 * VRAM is lost or now.
2080 * returns true if vram is lost, false if not.
2081 */
06ec9070 2082static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2083{
dadce777
EQ
2084 if (memcmp(adev->gart.ptr, adev->reset_magic,
2085 AMDGPU_RESET_MAGIC_NUM))
2086 return true;
2087
2088 if (!adev->in_gpu_reset)
2089 return false;
2090
2091 /*
2092 * For all ASICs with baco/mode1 reset, the VRAM is
2093 * always assumed to be lost.
2094 */
2095 switch (amdgpu_asic_reset_method(adev)) {
2096 case AMD_RESET_METHOD_BACO:
2097 case AMD_RESET_METHOD_MODE1:
2098 return true;
2099 default:
2100 return false;
2101 }
0c49e0b8
CZ
2102}
2103
e3ecdffa 2104/**
1112a46b 2105 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2106 *
2107 * @adev: amdgpu_device pointer
b8b72130 2108 * @state: clockgating state (gate or ungate)
e3ecdffa 2109 *
e3ecdffa 2110 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2111 * set_clockgating_state callbacks are run.
2112 * Late initialization pass enabling clockgating for hardware IPs.
2113 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2114 * Returns 0 on success, negative error code on failure.
2115 */
fdd34271 2116
1112a46b
RZ
2117static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2118 enum amd_clockgating_state state)
d38ceaf9 2119{
1112a46b 2120 int i, j, r;
d38ceaf9 2121
4a2ba394
SL
2122 if (amdgpu_emu_mode == 1)
2123 return 0;
2124
1112a46b
RZ
2125 for (j = 0; j < adev->num_ip_blocks; j++) {
2126 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2127 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2128 continue;
4a446d55 2129 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2130 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2131 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2132 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2133 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2134 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2135 /* enable clockgating to save power */
a1255107 2136 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2137 state);
4a446d55
AD
2138 if (r) {
2139 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2140 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2141 return r;
2142 }
b0b00ff1 2143 }
d38ceaf9 2144 }
06b18f61 2145
c9f96fd5
RZ
2146 return 0;
2147}
2148
1112a46b 2149static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2150{
1112a46b 2151 int i, j, r;
06b18f61 2152
c9f96fd5
RZ
2153 if (amdgpu_emu_mode == 1)
2154 return 0;
2155
1112a46b
RZ
2156 for (j = 0; j < adev->num_ip_blocks; j++) {
2157 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2158 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2159 continue;
2160 /* skip CG for VCE/UVD, it's handled specially */
2161 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2162 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2163 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2164 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2165 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2166 /* enable powergating to save power */
2167 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2168 state);
c9f96fd5
RZ
2169 if (r) {
2170 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2171 adev->ip_blocks[i].version->funcs->name, r);
2172 return r;
2173 }
2174 }
2175 }
2dc80b00
S
2176 return 0;
2177}
2178
beff74bc
AD
2179static int amdgpu_device_enable_mgpu_fan_boost(void)
2180{
2181 struct amdgpu_gpu_instance *gpu_ins;
2182 struct amdgpu_device *adev;
2183 int i, ret = 0;
2184
2185 mutex_lock(&mgpu_info.mutex);
2186
2187 /*
2188 * MGPU fan boost feature should be enabled
2189 * only when there are two or more dGPUs in
2190 * the system
2191 */
2192 if (mgpu_info.num_dgpu < 2)
2193 goto out;
2194
2195 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2196 gpu_ins = &(mgpu_info.gpu_ins[i]);
2197 adev = gpu_ins->adev;
2198 if (!(adev->flags & AMD_IS_APU) &&
2199 !gpu_ins->mgpu_fan_enabled &&
2200 adev->powerplay.pp_funcs &&
2201 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2202 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2203 if (ret)
2204 break;
2205
2206 gpu_ins->mgpu_fan_enabled = 1;
2207 }
2208 }
2209
2210out:
2211 mutex_unlock(&mgpu_info.mutex);
2212
2213 return ret;
2214}
2215
e3ecdffa
AD
2216/**
2217 * amdgpu_device_ip_late_init - run late init for hardware IPs
2218 *
2219 * @adev: amdgpu_device pointer
2220 *
2221 * Late initialization pass for hardware IPs. The list of all the hardware
2222 * IPs that make up the asic is walked and the late_init callbacks are run.
2223 * late_init covers any special initialization that an IP requires
2224 * after all of the have been initialized or something that needs to happen
2225 * late in the init process.
2226 * Returns 0 on success, negative error code on failure.
2227 */
06ec9070 2228static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2229{
60599a03 2230 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2231 int i = 0, r;
2232
2233 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2234 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2235 continue;
2236 if (adev->ip_blocks[i].version->funcs->late_init) {
2237 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2238 if (r) {
2239 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2240 adev->ip_blocks[i].version->funcs->name, r);
2241 return r;
2242 }
2dc80b00 2243 }
73f847db 2244 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2245 }
2246
a891d239
DL
2247 amdgpu_ras_set_error_query_ready(adev, true);
2248
1112a46b
RZ
2249 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2250 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2251
06ec9070 2252 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2253
beff74bc
AD
2254 r = amdgpu_device_enable_mgpu_fan_boost();
2255 if (r)
2256 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2257
60599a03
EQ
2258
2259 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2260 mutex_lock(&mgpu_info.mutex);
2261
2262 /*
2263 * Reset device p-state to low as this was booted with high.
2264 *
2265 * This should be performed only after all devices from the same
2266 * hive get initialized.
2267 *
2268 * However, it's unknown how many device in the hive in advance.
2269 * As this is counted one by one during devices initializations.
2270 *
2271 * So, we wait for all XGMI interlinked devices initialized.
2272 * This may bring some delays as those devices may come from
2273 * different hives. But that should be OK.
2274 */
2275 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2276 for (i = 0; i < mgpu_info.num_gpu; i++) {
2277 gpu_instance = &(mgpu_info.gpu_ins[i]);
2278 if (gpu_instance->adev->flags & AMD_IS_APU)
2279 continue;
2280
d84a430d
JK
2281 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2282 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2283 if (r) {
2284 DRM_ERROR("pstate setting failed (%d).\n", r);
2285 break;
2286 }
2287 }
2288 }
2289
2290 mutex_unlock(&mgpu_info.mutex);
2291 }
2292
d38ceaf9
AD
2293 return 0;
2294}
2295
e3ecdffa
AD
2296/**
2297 * amdgpu_device_ip_fini - run fini for hardware IPs
2298 *
2299 * @adev: amdgpu_device pointer
2300 *
2301 * Main teardown pass for hardware IPs. The list of all the hardware
2302 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2303 * are run. hw_fini tears down the hardware associated with each IP
2304 * and sw_fini tears down any software state associated with each IP.
2305 * Returns 0 on success, negative error code on failure.
2306 */
06ec9070 2307static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2308{
2309 int i, r;
2310
c030f2e4 2311 amdgpu_ras_pre_fini(adev);
2312
a82400b5
AG
2313 if (adev->gmc.xgmi.num_physical_nodes > 1)
2314 amdgpu_xgmi_remove_device(adev);
2315
1884734a 2316 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2317
2318 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2319 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2320
3e96dbfd
AD
2321 /* need to disable SMC first */
2322 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2323 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2324 continue;
fdd34271 2325 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2326 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2327 /* XXX handle errors */
2328 if (r) {
2329 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2330 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2331 }
a1255107 2332 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2333 break;
2334 }
2335 }
2336
d38ceaf9 2337 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2338 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2339 continue;
8201a67a 2340
a1255107 2341 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2342 /* XXX handle errors */
2c1a2784 2343 if (r) {
a1255107
AD
2344 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2345 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2346 }
8201a67a 2347
a1255107 2348 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2349 }
2350
9950cda2 2351
d38ceaf9 2352 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2353 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2354 continue;
c12aba3a
ML
2355
2356 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2357 amdgpu_ucode_free_bo(adev);
1e256e27 2358 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2359 amdgpu_device_wb_fini(adev);
2360 amdgpu_device_vram_scratch_fini(adev);
533aed27 2361 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2362 }
2363
a1255107 2364 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2365 /* XXX handle errors */
2c1a2784 2366 if (r) {
a1255107
AD
2367 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2368 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2369 }
a1255107
AD
2370 adev->ip_blocks[i].status.sw = false;
2371 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2372 }
2373
a6dcfd9c 2374 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2375 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2376 continue;
a1255107
AD
2377 if (adev->ip_blocks[i].version->funcs->late_fini)
2378 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2379 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2380 }
2381
c030f2e4 2382 amdgpu_ras_fini(adev);
2383
030308fc 2384 if (amdgpu_sriov_vf(adev))
24136135
ML
2385 if (amdgpu_virt_release_full_gpu(adev, false))
2386 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2387
d38ceaf9
AD
2388 return 0;
2389}
2390
e3ecdffa 2391/**
beff74bc 2392 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2393 *
1112a46b 2394 * @work: work_struct.
e3ecdffa 2395 */
beff74bc 2396static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2397{
2398 struct amdgpu_device *adev =
beff74bc 2399 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2400 int r;
2401
2402 r = amdgpu_ib_ring_tests(adev);
2403 if (r)
2404 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2405}
2406
1e317b99
RZ
2407static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2408{
2409 struct amdgpu_device *adev =
2410 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2411
2412 mutex_lock(&adev->gfx.gfx_off_mutex);
2413 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2414 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2415 adev->gfx.gfx_off_state = true;
2416 }
2417 mutex_unlock(&adev->gfx.gfx_off_mutex);
2418}
2419
e3ecdffa 2420/**
e7854a03 2421 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2422 *
2423 * @adev: amdgpu_device pointer
2424 *
2425 * Main suspend function for hardware IPs. The list of all the hardware
2426 * IPs that make up the asic is walked, clockgating is disabled and the
2427 * suspend callbacks are run. suspend puts the hardware and software state
2428 * in each IP into a state suitable for suspend.
2429 * Returns 0 on success, negative error code on failure.
2430 */
e7854a03
AD
2431static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2432{
2433 int i, r;
2434
ced1ba97
PL
2435 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2436 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2437
e7854a03
AD
2438 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2439 if (!adev->ip_blocks[i].status.valid)
2440 continue;
2441 /* displays are handled separately */
2442 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2443 /* XXX handle errors */
2444 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2445 /* XXX handle errors */
2446 if (r) {
2447 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2448 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2449 return r;
e7854a03 2450 }
482f0e53 2451 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2452 }
2453 }
2454
e7854a03
AD
2455 return 0;
2456}
2457
2458/**
2459 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2460 *
2461 * @adev: amdgpu_device pointer
2462 *
2463 * Main suspend function for hardware IPs. The list of all the hardware
2464 * IPs that make up the asic is walked, clockgating is disabled and the
2465 * suspend callbacks are run. suspend puts the hardware and software state
2466 * in each IP into a state suitable for suspend.
2467 * Returns 0 on success, negative error code on failure.
2468 */
2469static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2470{
2471 int i, r;
2472
2473 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2474 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2475 continue;
e7854a03
AD
2476 /* displays are handled in phase1 */
2477 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2478 continue;
bff77e86
LM
2479 /* PSP lost connection when err_event_athub occurs */
2480 if (amdgpu_ras_intr_triggered() &&
2481 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2482 adev->ip_blocks[i].status.hw = false;
2483 continue;
2484 }
d38ceaf9 2485 /* XXX handle errors */
a1255107 2486 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2487 /* XXX handle errors */
2c1a2784 2488 if (r) {
a1255107
AD
2489 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2490 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2491 }
876923fb 2492 adev->ip_blocks[i].status.hw = false;
a3a09142 2493 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2494 if(!amdgpu_sriov_vf(adev)){
2495 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2496 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2497 if (r) {
2498 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2499 adev->mp1_state, r);
2500 return r;
2501 }
a3a09142
AD
2502 }
2503 }
b5507c7e 2504 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2505 }
2506
2507 return 0;
2508}
2509
e7854a03
AD
2510/**
2511 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2512 *
2513 * @adev: amdgpu_device pointer
2514 *
2515 * Main suspend function for hardware IPs. The list of all the hardware
2516 * IPs that make up the asic is walked, clockgating is disabled and the
2517 * suspend callbacks are run. suspend puts the hardware and software state
2518 * in each IP into a state suitable for suspend.
2519 * Returns 0 on success, negative error code on failure.
2520 */
2521int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2522{
2523 int r;
2524
e7819644
YT
2525 if (amdgpu_sriov_vf(adev))
2526 amdgpu_virt_request_full_gpu(adev, false);
2527
e7854a03
AD
2528 r = amdgpu_device_ip_suspend_phase1(adev);
2529 if (r)
2530 return r;
2531 r = amdgpu_device_ip_suspend_phase2(adev);
2532
e7819644
YT
2533 if (amdgpu_sriov_vf(adev))
2534 amdgpu_virt_release_full_gpu(adev, false);
2535
e7854a03
AD
2536 return r;
2537}
2538
06ec9070 2539static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2540{
2541 int i, r;
2542
2cb681b6
ML
2543 static enum amd_ip_block_type ip_order[] = {
2544 AMD_IP_BLOCK_TYPE_GMC,
2545 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2546 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2547 AMD_IP_BLOCK_TYPE_IH,
2548 };
a90ad3c2 2549
2cb681b6
ML
2550 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2551 int j;
2552 struct amdgpu_ip_block *block;
a90ad3c2 2553
2cb681b6
ML
2554 for (j = 0; j < adev->num_ip_blocks; j++) {
2555 block = &adev->ip_blocks[j];
2556
482f0e53 2557 block->status.hw = false;
2cb681b6
ML
2558 if (block->version->type != ip_order[i] ||
2559 !block->status.valid)
2560 continue;
2561
2562 r = block->version->funcs->hw_init(adev);
0aaeefcc 2563 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2564 if (r)
2565 return r;
482f0e53 2566 block->status.hw = true;
a90ad3c2
ML
2567 }
2568 }
2569
2570 return 0;
2571}
2572
06ec9070 2573static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2574{
2575 int i, r;
2576
2cb681b6
ML
2577 static enum amd_ip_block_type ip_order[] = {
2578 AMD_IP_BLOCK_TYPE_SMC,
2579 AMD_IP_BLOCK_TYPE_DCE,
2580 AMD_IP_BLOCK_TYPE_GFX,
2581 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2582 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2583 AMD_IP_BLOCK_TYPE_VCE,
2584 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2585 };
a90ad3c2 2586
2cb681b6
ML
2587 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2588 int j;
2589 struct amdgpu_ip_block *block;
a90ad3c2 2590
2cb681b6
ML
2591 for (j = 0; j < adev->num_ip_blocks; j++) {
2592 block = &adev->ip_blocks[j];
2593
2594 if (block->version->type != ip_order[i] ||
482f0e53
ML
2595 !block->status.valid ||
2596 block->status.hw)
2cb681b6
ML
2597 continue;
2598
895bd048
JZ
2599 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2600 r = block->version->funcs->resume(adev);
2601 else
2602 r = block->version->funcs->hw_init(adev);
2603
0aaeefcc 2604 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2605 if (r)
2606 return r;
482f0e53 2607 block->status.hw = true;
a90ad3c2
ML
2608 }
2609 }
2610
2611 return 0;
2612}
2613
e3ecdffa
AD
2614/**
2615 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2616 *
2617 * @adev: amdgpu_device pointer
2618 *
2619 * First resume function for hardware IPs. The list of all the hardware
2620 * IPs that make up the asic is walked and the resume callbacks are run for
2621 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2622 * after a suspend and updates the software state as necessary. This
2623 * function is also used for restoring the GPU after a GPU reset.
2624 * Returns 0 on success, negative error code on failure.
2625 */
06ec9070 2626static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2627{
2628 int i, r;
2629
a90ad3c2 2630 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2631 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2632 continue;
a90ad3c2 2633 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2634 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2635 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2636
fcf0649f
CZ
2637 r = adev->ip_blocks[i].version->funcs->resume(adev);
2638 if (r) {
2639 DRM_ERROR("resume of IP block <%s> failed %d\n",
2640 adev->ip_blocks[i].version->funcs->name, r);
2641 return r;
2642 }
482f0e53 2643 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2644 }
2645 }
2646
2647 return 0;
2648}
2649
e3ecdffa
AD
2650/**
2651 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2652 *
2653 * @adev: amdgpu_device pointer
2654 *
2655 * First resume function for hardware IPs. The list of all the hardware
2656 * IPs that make up the asic is walked and the resume callbacks are run for
2657 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2658 * functional state after a suspend and updates the software state as
2659 * necessary. This function is also used for restoring the GPU after a GPU
2660 * reset.
2661 * Returns 0 on success, negative error code on failure.
2662 */
06ec9070 2663static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2664{
2665 int i, r;
2666
2667 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2668 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2669 continue;
fcf0649f 2670 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2671 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2672 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2673 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2674 continue;
a1255107 2675 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2676 if (r) {
a1255107
AD
2677 DRM_ERROR("resume of IP block <%s> failed %d\n",
2678 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2679 return r;
2c1a2784 2680 }
482f0e53 2681 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2682 }
2683
2684 return 0;
2685}
2686
e3ecdffa
AD
2687/**
2688 * amdgpu_device_ip_resume - run resume for hardware IPs
2689 *
2690 * @adev: amdgpu_device pointer
2691 *
2692 * Main resume function for hardware IPs. The hardware IPs
2693 * are split into two resume functions because they are
2694 * are also used in in recovering from a GPU reset and some additional
2695 * steps need to be take between them. In this case (S3/S4) they are
2696 * run sequentially.
2697 * Returns 0 on success, negative error code on failure.
2698 */
06ec9070 2699static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2700{
2701 int r;
2702
06ec9070 2703 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2704 if (r)
2705 return r;
7a3e0bb2
RZ
2706
2707 r = amdgpu_device_fw_loading(adev);
2708 if (r)
2709 return r;
2710
06ec9070 2711 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2712
2713 return r;
2714}
2715
e3ecdffa
AD
2716/**
2717 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2718 *
2719 * @adev: amdgpu_device pointer
2720 *
2721 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2722 */
4e99a44e 2723static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2724{
6867e1b5
ML
2725 if (amdgpu_sriov_vf(adev)) {
2726 if (adev->is_atom_fw) {
2727 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2728 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2729 } else {
2730 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2731 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2732 }
2733
2734 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2735 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2736 }
048765ad
AR
2737}
2738
e3ecdffa
AD
2739/**
2740 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2741 *
2742 * @asic_type: AMD asic type
2743 *
2744 * Check if there is DC (new modesetting infrastructre) support for an asic.
2745 * returns true if DC has support, false if not.
2746 */
4562236b
HW
2747bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2748{
2749 switch (asic_type) {
2750#if defined(CONFIG_DRM_AMD_DC)
2751 case CHIP_BONAIRE:
0d6fbccb 2752 case CHIP_KAVERI:
367e6687
AD
2753 case CHIP_KABINI:
2754 case CHIP_MULLINS:
d9fda248
HW
2755 /*
2756 * We have systems in the wild with these ASICs that require
2757 * LVDS and VGA support which is not supported with DC.
2758 *
2759 * Fallback to the non-DC driver here by default so as not to
2760 * cause regressions.
2761 */
2762 return amdgpu_dc > 0;
2763 case CHIP_HAWAII:
4562236b
HW
2764 case CHIP_CARRIZO:
2765 case CHIP_STONEY:
4562236b 2766 case CHIP_POLARIS10:
675fd32b 2767 case CHIP_POLARIS11:
2c8ad2d5 2768 case CHIP_POLARIS12:
675fd32b 2769 case CHIP_VEGAM:
4562236b
HW
2770 case CHIP_TONGA:
2771 case CHIP_FIJI:
42f8ffa1 2772 case CHIP_VEGA10:
dca7b401 2773 case CHIP_VEGA12:
c6034aa2 2774 case CHIP_VEGA20:
b86a1aa3 2775#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2776 case CHIP_RAVEN:
b4f199c7 2777 case CHIP_NAVI10:
8fceceb6 2778 case CHIP_NAVI14:
078655d9 2779 case CHIP_NAVI12:
e1c14c43 2780 case CHIP_RENOIR:
42f8ffa1 2781#endif
fd187853 2782 return amdgpu_dc != 0;
4562236b
HW
2783#endif
2784 default:
93b09a9a
SS
2785 if (amdgpu_dc > 0)
2786 DRM_INFO("Display Core has been requested via kernel parameter "
2787 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2788 return false;
2789 }
2790}
2791
2792/**
2793 * amdgpu_device_has_dc_support - check if dc is supported
2794 *
2795 * @adev: amdgpu_device_pointer
2796 *
2797 * Returns true for supported, false for not supported
2798 */
2799bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2800{
2555039d
XY
2801 if (amdgpu_sriov_vf(adev))
2802 return false;
2803
4562236b
HW
2804 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2805}
2806
d4535e2c
AG
2807
2808static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2809{
2810 struct amdgpu_device *adev =
2811 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2812 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2813
c6a6e2db
AG
2814 /* It's a bug to not have a hive within this function */
2815 if (WARN_ON(!hive))
2816 return;
2817
2818 /*
2819 * Use task barrier to synchronize all xgmi reset works across the
2820 * hive. task_barrier_enter and task_barrier_exit will block
2821 * until all the threads running the xgmi reset works reach
2822 * those points. task_barrier_full will do both blocks.
2823 */
2824 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2825
2826 task_barrier_enter(&hive->tb);
2827 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2828
2829 if (adev->asic_reset_res)
2830 goto fail;
2831
2832 task_barrier_exit(&hive->tb);
2833 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2834
2835 if (adev->asic_reset_res)
2836 goto fail;
43c4d576
JC
2837
2838 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2839 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2840 } else {
2841
2842 task_barrier_full(&hive->tb);
2843 adev->asic_reset_res = amdgpu_asic_reset(adev);
2844 }
ce316fa5 2845
c6a6e2db 2846fail:
d4535e2c 2847 if (adev->asic_reset_res)
fed184e9 2848 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2849 adev->asic_reset_res, adev->ddev->unique);
2850}
2851
71f98027
AD
2852static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2853{
2854 char *input = amdgpu_lockup_timeout;
2855 char *timeout_setting = NULL;
2856 int index = 0;
2857 long timeout;
2858 int ret = 0;
2859
2860 /*
2861 * By default timeout for non compute jobs is 10000.
2862 * And there is no timeout enforced on compute jobs.
2863 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2864 * jobs are 60000 by default.
71f98027
AD
2865 */
2866 adev->gfx_timeout = msecs_to_jiffies(10000);
2867 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2868 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2869 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2870 else
2871 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2872
f440ff44 2873 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2874 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2875 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2876 ret = kstrtol(timeout_setting, 0, &timeout);
2877 if (ret)
2878 return ret;
2879
2880 if (timeout == 0) {
2881 index++;
2882 continue;
2883 } else if (timeout < 0) {
2884 timeout = MAX_SCHEDULE_TIMEOUT;
2885 } else {
2886 timeout = msecs_to_jiffies(timeout);
2887 }
2888
2889 switch (index++) {
2890 case 0:
2891 adev->gfx_timeout = timeout;
2892 break;
2893 case 1:
2894 adev->compute_timeout = timeout;
2895 break;
2896 case 2:
2897 adev->sdma_timeout = timeout;
2898 break;
2899 case 3:
2900 adev->video_timeout = timeout;
2901 break;
2902 default:
2903 break;
2904 }
2905 }
2906 /*
2907 * There is only one value specified and
2908 * it should apply to all non-compute jobs.
2909 */
bcccee89 2910 if (index == 1) {
71f98027 2911 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2912 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2913 adev->compute_timeout = adev->gfx_timeout;
2914 }
71f98027
AD
2915 }
2916
2917 return ret;
2918}
d4535e2c 2919
77f3a5cd
ND
2920static const struct attribute *amdgpu_dev_attributes[] = {
2921 &dev_attr_product_name.attr,
2922 &dev_attr_product_number.attr,
2923 &dev_attr_serial_number.attr,
2924 &dev_attr_pcie_replay_count.attr,
2925 NULL
2926};
2927
d38ceaf9
AD
2928/**
2929 * amdgpu_device_init - initialize the driver
2930 *
2931 * @adev: amdgpu_device pointer
87e3f136 2932 * @ddev: drm dev pointer
d38ceaf9
AD
2933 * @pdev: pci dev pointer
2934 * @flags: driver flags
2935 *
2936 * Initializes the driver info and hw (all asics).
2937 * Returns 0 for success or an error on failure.
2938 * Called at driver startup.
2939 */
2940int amdgpu_device_init(struct amdgpu_device *adev,
2941 struct drm_device *ddev,
2942 struct pci_dev *pdev,
2943 uint32_t flags)
2944{
2945 int r, i;
3840c5bc 2946 bool boco = false;
95844d20 2947 u32 max_MBps;
d38ceaf9
AD
2948
2949 adev->shutdown = false;
2950 adev->dev = &pdev->dev;
2951 adev->ddev = ddev;
2952 adev->pdev = pdev;
2953 adev->flags = flags;
4e66d7d2
YZ
2954
2955 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2956 adev->asic_type = amdgpu_force_asic_type;
2957 else
2958 adev->asic_type = flags & AMD_ASIC_MASK;
2959
d38ceaf9 2960 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2961 if (amdgpu_emu_mode == 1)
8bdab6bb 2962 adev->usec_timeout *= 10;
770d13b1 2963 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2964 adev->accel_working = false;
2965 adev->num_rings = 0;
2966 adev->mman.buffer_funcs = NULL;
2967 adev->mman.buffer_funcs_ring = NULL;
2968 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2969 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2970 adev->gmc.gmc_funcs = NULL;
f54d1867 2971 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2972 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2973
2974 adev->smc_rreg = &amdgpu_invalid_rreg;
2975 adev->smc_wreg = &amdgpu_invalid_wreg;
2976 adev->pcie_rreg = &amdgpu_invalid_rreg;
2977 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2978 adev->pciep_rreg = &amdgpu_invalid_rreg;
2979 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2980 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2981 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2982 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2983 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2984 adev->didt_rreg = &amdgpu_invalid_rreg;
2985 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2986 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2987 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2988 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2989 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2990
3e39ab90
AD
2991 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2992 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2993 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2994
2995 /* mutex initialization are all done here so we
2996 * can recall function without having locking issues */
d38ceaf9 2997 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2998 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2999 mutex_init(&adev->pm.mutex);
3000 mutex_init(&adev->gfx.gpu_clock_mutex);
3001 mutex_init(&adev->srbm_mutex);
b8866c26 3002 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3003 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3004 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3005 mutex_init(&adev->mn_lock);
e23b74aa 3006 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3007 hash_init(adev->mn_hash);
13a752e3 3008 mutex_init(&adev->lock_reset);
32eaeae0 3009 mutex_init(&adev->psp.mutex);
bd052211 3010 mutex_init(&adev->notifier_lock);
d38ceaf9 3011
912dfc84
EQ
3012 r = amdgpu_device_check_arguments(adev);
3013 if (r)
3014 return r;
d38ceaf9 3015
d38ceaf9
AD
3016 spin_lock_init(&adev->mmio_idx_lock);
3017 spin_lock_init(&adev->smc_idx_lock);
3018 spin_lock_init(&adev->pcie_idx_lock);
3019 spin_lock_init(&adev->uvd_ctx_idx_lock);
3020 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3021 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3022 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3023 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3024 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3025
0c4e7fa5
CZ
3026 INIT_LIST_HEAD(&adev->shadow_list);
3027 mutex_init(&adev->shadow_list_lock);
3028
beff74bc
AD
3029 INIT_DELAYED_WORK(&adev->delayed_init_work,
3030 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3031 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3032 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3033
d4535e2c
AG
3034 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3035
d23ee13f 3036 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3037 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3038
0fa49558
AX
3039 /* Registers mapping */
3040 /* TODO: block userspace mapping of io register */
da69c161
KW
3041 if (adev->asic_type >= CHIP_BONAIRE) {
3042 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3043 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3044 } else {
3045 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3046 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3047 }
d38ceaf9 3048
d38ceaf9
AD
3049 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3050 if (adev->rmmio == NULL) {
3051 return -ENOMEM;
3052 }
3053 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3054 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3055
d38ceaf9
AD
3056 /* io port mapping */
3057 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3058 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3059 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3060 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3061 break;
3062 }
3063 }
3064 if (adev->rio_mem == NULL)
b64a18c5 3065 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3066
b2109d8e
JX
3067 /* enable PCIE atomic ops */
3068 r = pci_enable_atomic_ops_to_root(adev->pdev,
3069 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3070 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3071 if (r) {
3072 adev->have_atomics_support = false;
3073 DRM_INFO("PCIE atomic ops is not supported\n");
3074 } else {
3075 adev->have_atomics_support = true;
3076 }
3077
5494d864
AD
3078 amdgpu_device_get_pcie_info(adev);
3079
b239c017
JX
3080 if (amdgpu_mcbp)
3081 DRM_INFO("MCBP is enabled\n");
3082
5f84cc63
JX
3083 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3084 adev->enable_mes = true;
3085
3aa0115d
ML
3086 /* detect hw virtualization here */
3087 amdgpu_detect_virtualization(adev);
3088
dffa11b4
ML
3089 r = amdgpu_device_get_job_timeout_settings(adev);
3090 if (r) {
3091 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3092 return r;
a190d1c7
XY
3093 }
3094
d38ceaf9 3095 /* early init functions */
06ec9070 3096 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3097 if (r)
3098 return r;
3099
6585661d
OZ
3100 /* doorbell bar mapping and doorbell index init*/
3101 amdgpu_device_doorbell_init(adev);
3102
d38ceaf9
AD
3103 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3104 /* this will fail for cards that aren't VGA class devices, just
3105 * ignore it */
06ec9070 3106 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3107
31af062a 3108 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3109 boco = true;
3110 if (amdgpu_has_atpx() &&
3111 (amdgpu_is_atpx_hybrid() ||
3112 amdgpu_has_atpx_dgpu_power_cntl()) &&
3113 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3114 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3115 &amdgpu_switcheroo_ops, boco);
3116 if (boco)
d38ceaf9
AD
3117 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3118
9475a943
SL
3119 if (amdgpu_emu_mode == 1) {
3120 /* post the asic on emulation mode */
3121 emu_soc_asic_init(adev);
bfca0289 3122 goto fence_driver_init;
9475a943 3123 }
bfca0289 3124
4e99a44e
ML
3125 /* detect if we are with an SRIOV vbios */
3126 amdgpu_device_detect_sriov_bios(adev);
048765ad 3127
95e8e59e
AD
3128 /* check if we need to reset the asic
3129 * E.g., driver was not cleanly unloaded previously, etc.
3130 */
f14899fd 3131 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3132 r = amdgpu_asic_reset(adev);
3133 if (r) {
3134 dev_err(adev->dev, "asic reset on init failed\n");
3135 goto failed;
3136 }
3137 }
3138
d38ceaf9 3139 /* Post card if necessary */
39c640c0 3140 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3141 if (!adev->bios) {
bec86378 3142 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3143 r = -EINVAL;
3144 goto failed;
d38ceaf9 3145 }
bec86378 3146 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3147 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3148 if (r) {
3149 dev_err(adev->dev, "gpu post error!\n");
3150 goto failed;
3151 }
d38ceaf9
AD
3152 }
3153
88b64e95
AD
3154 if (adev->is_atom_fw) {
3155 /* Initialize clocks */
3156 r = amdgpu_atomfirmware_get_clock_info(adev);
3157 if (r) {
3158 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3159 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3160 goto failed;
3161 }
3162 } else {
a5bde2f9
AD
3163 /* Initialize clocks */
3164 r = amdgpu_atombios_get_clock_info(adev);
3165 if (r) {
3166 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3167 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3168 goto failed;
a5bde2f9
AD
3169 }
3170 /* init i2c buses */
4562236b
HW
3171 if (!amdgpu_device_has_dc_support(adev))
3172 amdgpu_atombios_i2c_init(adev);
2c1a2784 3173 }
d38ceaf9 3174
bfca0289 3175fence_driver_init:
d38ceaf9
AD
3176 /* Fence driver */
3177 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3178 if (r) {
3179 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3180 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3181 goto failed;
2c1a2784 3182 }
d38ceaf9
AD
3183
3184 /* init the mode config */
3185 drm_mode_config_init(adev->ddev);
3186
06ec9070 3187 r = amdgpu_device_ip_init(adev);
d38ceaf9 3188 if (r) {
8840a387 3189 /* failed in exclusive mode due to timeout */
3190 if (amdgpu_sriov_vf(adev) &&
3191 !amdgpu_sriov_runtime(adev) &&
3192 amdgpu_virt_mmio_blocked(adev) &&
3193 !amdgpu_virt_wait_reset(adev)) {
3194 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3195 /* Don't send request since VF is inactive. */
3196 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3197 adev->virt.ops = NULL;
8840a387 3198 r = -EAGAIN;
3199 goto failed;
3200 }
06ec9070 3201 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3202 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3203 goto failed;
d38ceaf9
AD
3204 }
3205
d69b8971
YZ
3206 dev_info(adev->dev,
3207 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3208 adev->gfx.config.max_shader_engines,
3209 adev->gfx.config.max_sh_per_se,
3210 adev->gfx.config.max_cu_per_sh,
3211 adev->gfx.cu_info.number);
3212
d38ceaf9
AD
3213 adev->accel_working = true;
3214
e59c0205
AX
3215 amdgpu_vm_check_compute_bug(adev);
3216
95844d20
MO
3217 /* Initialize the buffer migration limit. */
3218 if (amdgpu_moverate >= 0)
3219 max_MBps = amdgpu_moverate;
3220 else
3221 max_MBps = 8; /* Allow 8 MB/s. */
3222 /* Get a log2 for easy divisions. */
3223 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3224
9bc92b9c
ML
3225 amdgpu_fbdev_init(adev);
3226
d2f52ac8 3227 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3228 if (r) {
3229 adev->pm_sysfs_en = false;
d2f52ac8 3230 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3231 } else
3232 adev->pm_sysfs_en = true;
d2f52ac8 3233
5bb23532 3234 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3235 if (r) {
3236 adev->ucode_sysfs_en = false;
5bb23532 3237 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3238 } else
3239 adev->ucode_sysfs_en = true;
5bb23532 3240
d38ceaf9
AD
3241 if ((amdgpu_testing & 1)) {
3242 if (adev->accel_working)
3243 amdgpu_test_moves(adev);
3244 else
3245 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3246 }
d38ceaf9
AD
3247 if (amdgpu_benchmarking) {
3248 if (adev->accel_working)
3249 amdgpu_benchmark(adev, amdgpu_benchmarking);
3250 else
3251 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3252 }
3253
b0adca4d
EQ
3254 /*
3255 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3256 * Otherwise the mgpu fan boost feature will be skipped due to the
3257 * gpu instance is counted less.
3258 */
3259 amdgpu_register_gpu_instance(adev);
3260
d38ceaf9
AD
3261 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3262 * explicit gating rather than handling it automatically.
3263 */
06ec9070 3264 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3265 if (r) {
06ec9070 3266 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3267 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3268 goto failed;
2c1a2784 3269 }
d38ceaf9 3270
108c6a63 3271 /* must succeed. */
511fdbc3 3272 amdgpu_ras_resume(adev);
108c6a63 3273
beff74bc
AD
3274 queue_delayed_work(system_wq, &adev->delayed_init_work,
3275 msecs_to_jiffies(AMDGPU_RESUME_MS));
3276
77f3a5cd 3277 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3278 if (r) {
77f3a5cd 3279 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3280 return r;
3281 }
3282
d155bef0
AB
3283 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3284 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3285 if (r)
3286 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3287
d38ceaf9 3288 return 0;
83ba126a
AD
3289
3290failed:
89041940 3291 amdgpu_vf_error_trans_all(adev);
3840c5bc 3292 if (boco)
83ba126a 3293 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3294
83ba126a 3295 return r;
d38ceaf9
AD
3296}
3297
d38ceaf9
AD
3298/**
3299 * amdgpu_device_fini - tear down the driver
3300 *
3301 * @adev: amdgpu_device pointer
3302 *
3303 * Tear down the driver info (all asics).
3304 * Called at driver shutdown.
3305 */
3306void amdgpu_device_fini(struct amdgpu_device *adev)
3307{
3308 int r;
3309
3310 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3311 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3312 adev->shutdown = true;
9f875167 3313
752c683d
ML
3314 /* make sure IB test finished before entering exclusive mode
3315 * to avoid preemption on IB test
3316 * */
3317 if (amdgpu_sriov_vf(adev))
3318 amdgpu_virt_request_full_gpu(adev, false);
3319
e5b03032
ML
3320 /* disable all interrupts */
3321 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3322 if (adev->mode_info.mode_config_initialized){
3323 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3324 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3325 else
3326 drm_atomic_helper_shutdown(adev->ddev);
3327 }
d38ceaf9 3328 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3329 if (adev->pm_sysfs_en)
3330 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3331 amdgpu_fbdev_fini(adev);
06ec9070 3332 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3333 if (adev->firmware.gpu_info_fw) {
3334 release_firmware(adev->firmware.gpu_info_fw);
3335 adev->firmware.gpu_info_fw = NULL;
3336 }
d38ceaf9
AD
3337 adev->accel_working = false;
3338 /* free i2c buses */
4562236b
HW
3339 if (!amdgpu_device_has_dc_support(adev))
3340 amdgpu_i2c_fini(adev);
bfca0289
SL
3341
3342 if (amdgpu_emu_mode != 1)
3343 amdgpu_atombios_fini(adev);
3344
d38ceaf9
AD
3345 kfree(adev->bios);
3346 adev->bios = NULL;
3840c5bc
AD
3347 if (amdgpu_has_atpx() &&
3348 (amdgpu_is_atpx_hybrid() ||
3349 amdgpu_has_atpx_dgpu_power_cntl()) &&
3350 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3351 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3352 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3353 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3354 vga_client_register(adev->pdev, NULL, NULL, NULL);
3355 if (adev->rio_mem)
3356 pci_iounmap(adev->pdev, adev->rio_mem);
3357 adev->rio_mem = NULL;
3358 iounmap(adev->rmmio);
3359 adev->rmmio = NULL;
06ec9070 3360 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3361
7c868b59
YT
3362 if (adev->ucode_sysfs_en)
3363 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3364
3365 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3366 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3367 amdgpu_pmu_fini(adev);
f54eeab4 3368 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3369 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3370}
3371
3372
3373/*
3374 * Suspend & resume.
3375 */
3376/**
810ddc3a 3377 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3378 *
87e3f136
DP
3379 * @dev: drm dev pointer
3380 * @suspend: suspend state
3381 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3382 *
3383 * Puts the hw in the suspend state (all asics).
3384 * Returns 0 for success or an error on failure.
3385 * Called at driver suspend.
3386 */
de185019 3387int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3388{
3389 struct amdgpu_device *adev;
3390 struct drm_crtc *crtc;
3391 struct drm_connector *connector;
f8d2d39e 3392 struct drm_connector_list_iter iter;
5ceb54c6 3393 int r;
d38ceaf9
AD
3394
3395 if (dev == NULL || dev->dev_private == NULL) {
3396 return -ENODEV;
3397 }
3398
3399 adev = dev->dev_private;
3400
3401 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3402 return 0;
3403
44779b43 3404 adev->in_suspend = true;
d38ceaf9
AD
3405 drm_kms_helper_poll_disable(dev);
3406
5f818173
S
3407 if (fbcon)
3408 amdgpu_fbdev_set_suspend(adev, 1);
3409
beff74bc 3410 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3411
4562236b
HW
3412 if (!amdgpu_device_has_dc_support(adev)) {
3413 /* turn off display hw */
3414 drm_modeset_lock_all(dev);
f8d2d39e
LP
3415 drm_connector_list_iter_begin(dev, &iter);
3416 drm_for_each_connector_iter(connector, &iter)
3417 drm_helper_connector_dpms(connector,
3418 DRM_MODE_DPMS_OFF);
3419 drm_connector_list_iter_end(&iter);
4562236b 3420 drm_modeset_unlock_all(dev);
fe1053b7
AD
3421 /* unpin the front buffers and cursors */
3422 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3423 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3424 struct drm_framebuffer *fb = crtc->primary->fb;
3425 struct amdgpu_bo *robj;
3426
91334223 3427 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3428 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3429 r = amdgpu_bo_reserve(aobj, true);
3430 if (r == 0) {
3431 amdgpu_bo_unpin(aobj);
3432 amdgpu_bo_unreserve(aobj);
3433 }
756e6880 3434 }
756e6880 3435
fe1053b7
AD
3436 if (fb == NULL || fb->obj[0] == NULL) {
3437 continue;
3438 }
3439 robj = gem_to_amdgpu_bo(fb->obj[0]);
3440 /* don't unpin kernel fb objects */
3441 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3442 r = amdgpu_bo_reserve(robj, true);
3443 if (r == 0) {
3444 amdgpu_bo_unpin(robj);
3445 amdgpu_bo_unreserve(robj);
3446 }
d38ceaf9
AD
3447 }
3448 }
3449 }
fe1053b7 3450
5e6932fe 3451 amdgpu_ras_suspend(adev);
3452
fe1053b7
AD
3453 r = amdgpu_device_ip_suspend_phase1(adev);
3454
94fa5660
EQ
3455 amdgpu_amdkfd_suspend(adev, !fbcon);
3456
d38ceaf9
AD
3457 /* evict vram memory */
3458 amdgpu_bo_evict_vram(adev);
3459
5ceb54c6 3460 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3461
fe1053b7 3462 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3463
a0a71e49
AD
3464 /* evict remaining vram memory
3465 * This second call to evict vram is to evict the gart page table
3466 * using the CPU.
3467 */
d38ceaf9
AD
3468 amdgpu_bo_evict_vram(adev);
3469
d38ceaf9
AD
3470 return 0;
3471}
3472
3473/**
810ddc3a 3474 * amdgpu_device_resume - initiate device resume
d38ceaf9 3475 *
87e3f136
DP
3476 * @dev: drm dev pointer
3477 * @resume: resume state
3478 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3479 *
3480 * Bring the hw back to operating state (all asics).
3481 * Returns 0 for success or an error on failure.
3482 * Called at driver resume.
3483 */
de185019 3484int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3485{
3486 struct drm_connector *connector;
f8d2d39e 3487 struct drm_connector_list_iter iter;
d38ceaf9 3488 struct amdgpu_device *adev = dev->dev_private;
756e6880 3489 struct drm_crtc *crtc;
03161a6e 3490 int r = 0;
d38ceaf9
AD
3491
3492 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3493 return 0;
3494
d38ceaf9 3495 /* post card */
39c640c0 3496 if (amdgpu_device_need_post(adev)) {
74b0b157 3497 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3498 if (r)
3499 DRM_ERROR("amdgpu asic init failed\n");
3500 }
d38ceaf9 3501
06ec9070 3502 r = amdgpu_device_ip_resume(adev);
e6707218 3503 if (r) {
06ec9070 3504 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3505 return r;
e6707218 3506 }
5ceb54c6
AD
3507 amdgpu_fence_driver_resume(adev);
3508
d38ceaf9 3509
06ec9070 3510 r = amdgpu_device_ip_late_init(adev);
03161a6e 3511 if (r)
4d3b9ae5 3512 return r;
d38ceaf9 3513
beff74bc
AD
3514 queue_delayed_work(system_wq, &adev->delayed_init_work,
3515 msecs_to_jiffies(AMDGPU_RESUME_MS));
3516
fe1053b7
AD
3517 if (!amdgpu_device_has_dc_support(adev)) {
3518 /* pin cursors */
3519 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3520 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3521
91334223 3522 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3523 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3524 r = amdgpu_bo_reserve(aobj, true);
3525 if (r == 0) {
3526 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3527 if (r != 0)
3528 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3529 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3530 amdgpu_bo_unreserve(aobj);
3531 }
756e6880
AD
3532 }
3533 }
3534 }
9593f4d6 3535 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3536 if (r)
3537 return r;
756e6880 3538
96a5d8d4 3539 /* Make sure IB tests flushed */
beff74bc 3540 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3541
d38ceaf9
AD
3542 /* blat the mode back in */
3543 if (fbcon) {
4562236b
HW
3544 if (!amdgpu_device_has_dc_support(adev)) {
3545 /* pre DCE11 */
3546 drm_helper_resume_force_mode(dev);
3547
3548 /* turn on display hw */
3549 drm_modeset_lock_all(dev);
f8d2d39e
LP
3550
3551 drm_connector_list_iter_begin(dev, &iter);
3552 drm_for_each_connector_iter(connector, &iter)
3553 drm_helper_connector_dpms(connector,
3554 DRM_MODE_DPMS_ON);
3555 drm_connector_list_iter_end(&iter);
3556
4562236b 3557 drm_modeset_unlock_all(dev);
d38ceaf9 3558 }
4d3b9ae5 3559 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3560 }
3561
3562 drm_kms_helper_poll_enable(dev);
23a1a9e5 3563
5e6932fe 3564 amdgpu_ras_resume(adev);
3565
23a1a9e5
L
3566 /*
3567 * Most of the connector probing functions try to acquire runtime pm
3568 * refs to ensure that the GPU is powered on when connector polling is
3569 * performed. Since we're calling this from a runtime PM callback,
3570 * trying to acquire rpm refs will cause us to deadlock.
3571 *
3572 * Since we're guaranteed to be holding the rpm lock, it's safe to
3573 * temporarily disable the rpm helpers so this doesn't deadlock us.
3574 */
3575#ifdef CONFIG_PM
3576 dev->dev->power.disable_depth++;
3577#endif
4562236b
HW
3578 if (!amdgpu_device_has_dc_support(adev))
3579 drm_helper_hpd_irq_event(dev);
3580 else
3581 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3582#ifdef CONFIG_PM
3583 dev->dev->power.disable_depth--;
3584#endif
44779b43
RZ
3585 adev->in_suspend = false;
3586
4d3b9ae5 3587 return 0;
d38ceaf9
AD
3588}
3589
e3ecdffa
AD
3590/**
3591 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3592 *
3593 * @adev: amdgpu_device pointer
3594 *
3595 * The list of all the hardware IPs that make up the asic is walked and
3596 * the check_soft_reset callbacks are run. check_soft_reset determines
3597 * if the asic is still hung or not.
3598 * Returns true if any of the IPs are still in a hung state, false if not.
3599 */
06ec9070 3600static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3601{
3602 int i;
3603 bool asic_hang = false;
3604
f993d628
ML
3605 if (amdgpu_sriov_vf(adev))
3606 return true;
3607
8bc04c29
AD
3608 if (amdgpu_asic_need_full_reset(adev))
3609 return true;
3610
63fbf42f 3611 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3612 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3613 continue;
a1255107
AD
3614 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3615 adev->ip_blocks[i].status.hang =
3616 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3617 if (adev->ip_blocks[i].status.hang) {
3618 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3619 asic_hang = true;
3620 }
3621 }
3622 return asic_hang;
3623}
3624
e3ecdffa
AD
3625/**
3626 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3627 *
3628 * @adev: amdgpu_device pointer
3629 *
3630 * The list of all the hardware IPs that make up the asic is walked and the
3631 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3632 * handles any IP specific hardware or software state changes that are
3633 * necessary for a soft reset to succeed.
3634 * Returns 0 on success, negative error code on failure.
3635 */
06ec9070 3636static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3637{
3638 int i, r = 0;
3639
3640 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3641 if (!adev->ip_blocks[i].status.valid)
d31a501e 3642 continue;
a1255107
AD
3643 if (adev->ip_blocks[i].status.hang &&
3644 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3645 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3646 if (r)
3647 return r;
3648 }
3649 }
3650
3651 return 0;
3652}
3653
e3ecdffa
AD
3654/**
3655 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3656 *
3657 * @adev: amdgpu_device pointer
3658 *
3659 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3660 * reset is necessary to recover.
3661 * Returns true if a full asic reset is required, false if not.
3662 */
06ec9070 3663static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3664{
da146d3b
AD
3665 int i;
3666
8bc04c29
AD
3667 if (amdgpu_asic_need_full_reset(adev))
3668 return true;
3669
da146d3b 3670 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3671 if (!adev->ip_blocks[i].status.valid)
da146d3b 3672 continue;
a1255107
AD
3673 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3674 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3675 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3676 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3677 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3678 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3679 DRM_INFO("Some block need full reset!\n");
3680 return true;
3681 }
3682 }
35d782fe
CZ
3683 }
3684 return false;
3685}
3686
e3ecdffa
AD
3687/**
3688 * amdgpu_device_ip_soft_reset - do a soft reset
3689 *
3690 * @adev: amdgpu_device pointer
3691 *
3692 * The list of all the hardware IPs that make up the asic is walked and the
3693 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3694 * IP specific hardware or software state changes that are necessary to soft
3695 * reset the IP.
3696 * Returns 0 on success, negative error code on failure.
3697 */
06ec9070 3698static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3699{
3700 int i, r = 0;
3701
3702 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3703 if (!adev->ip_blocks[i].status.valid)
35d782fe 3704 continue;
a1255107
AD
3705 if (adev->ip_blocks[i].status.hang &&
3706 adev->ip_blocks[i].version->funcs->soft_reset) {
3707 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3708 if (r)
3709 return r;
3710 }
3711 }
3712
3713 return 0;
3714}
3715
e3ecdffa
AD
3716/**
3717 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3718 *
3719 * @adev: amdgpu_device pointer
3720 *
3721 * The list of all the hardware IPs that make up the asic is walked and the
3722 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3723 * handles any IP specific hardware or software state changes that are
3724 * necessary after the IP has been soft reset.
3725 * Returns 0 on success, negative error code on failure.
3726 */
06ec9070 3727static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3728{
3729 int i, r = 0;
3730
3731 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3732 if (!adev->ip_blocks[i].status.valid)
35d782fe 3733 continue;
a1255107
AD
3734 if (adev->ip_blocks[i].status.hang &&
3735 adev->ip_blocks[i].version->funcs->post_soft_reset)
3736 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3737 if (r)
3738 return r;
3739 }
3740
3741 return 0;
3742}
3743
e3ecdffa 3744/**
c33adbc7 3745 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3746 *
3747 * @adev: amdgpu_device pointer
3748 *
3749 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3750 * restore things like GPUVM page tables after a GPU reset where
3751 * the contents of VRAM might be lost.
403009bf
CK
3752 *
3753 * Returns:
3754 * 0 on success, negative error code on failure.
e3ecdffa 3755 */
c33adbc7 3756static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3757{
c41d1cf6 3758 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3759 struct amdgpu_bo *shadow;
3760 long r = 1, tmo;
c41d1cf6
ML
3761
3762 if (amdgpu_sriov_runtime(adev))
b045d3af 3763 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3764 else
3765 tmo = msecs_to_jiffies(100);
3766
3767 DRM_INFO("recover vram bo from shadow start\n");
3768 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3769 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3770
3771 /* No need to recover an evicted BO */
3772 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3773 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3774 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3775 continue;
3776
3777 r = amdgpu_bo_restore_shadow(shadow, &next);
3778 if (r)
3779 break;
3780
c41d1cf6 3781 if (fence) {
1712fb1a 3782 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3783 dma_fence_put(fence);
3784 fence = next;
1712fb1a 3785 if (tmo == 0) {
3786 r = -ETIMEDOUT;
c41d1cf6 3787 break;
1712fb1a 3788 } else if (tmo < 0) {
3789 r = tmo;
3790 break;
3791 }
403009bf
CK
3792 } else {
3793 fence = next;
c41d1cf6 3794 }
c41d1cf6
ML
3795 }
3796 mutex_unlock(&adev->shadow_list_lock);
3797
403009bf
CK
3798 if (fence)
3799 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3800 dma_fence_put(fence);
3801
1712fb1a 3802 if (r < 0 || tmo <= 0) {
3803 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3804 return -EIO;
3805 }
c41d1cf6 3806
403009bf
CK
3807 DRM_INFO("recover vram bo from shadow done\n");
3808 return 0;
c41d1cf6
ML
3809}
3810
a90ad3c2 3811
e3ecdffa 3812/**
06ec9070 3813 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3814 *
3815 * @adev: amdgpu device pointer
87e3f136 3816 * @from_hypervisor: request from hypervisor
5740682e
ML
3817 *
3818 * do VF FLR and reinitialize Asic
3f48c681 3819 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3820 */
3821static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3822 bool from_hypervisor)
5740682e
ML
3823{
3824 int r;
3825
3826 if (from_hypervisor)
3827 r = amdgpu_virt_request_full_gpu(adev, true);
3828 else
3829 r = amdgpu_virt_reset_gpu(adev);
3830 if (r)
3831 return r;
a90ad3c2 3832
b639c22c
JZ
3833 amdgpu_amdkfd_pre_reset(adev);
3834
a90ad3c2 3835 /* Resume IP prior to SMC */
06ec9070 3836 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3837 if (r)
3838 goto error;
a90ad3c2 3839
c9ffa427 3840 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3841 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3842 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3843
7a3e0bb2
RZ
3844 r = amdgpu_device_fw_loading(adev);
3845 if (r)
3846 return r;
3847
a90ad3c2 3848 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3849 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3850 if (r)
3851 goto error;
a90ad3c2
ML
3852
3853 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3854 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3855 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3856
abc34253
ED
3857error:
3858 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3859 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3860 amdgpu_inc_vram_lost(adev);
c33adbc7 3861 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3862 }
3863
3864 return r;
3865}
3866
12938fad
CK
3867/**
3868 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3869 *
3870 * @adev: amdgpu device pointer
3871 *
3872 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3873 * a hung GPU.
3874 */
3875bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3876{
3877 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3878 DRM_INFO("Timeout, but no hardware hang detected.\n");
3879 return false;
3880 }
3881
3ba7b418
AG
3882 if (amdgpu_gpu_recovery == 0)
3883 goto disabled;
3884
3885 if (amdgpu_sriov_vf(adev))
3886 return true;
3887
3888 if (amdgpu_gpu_recovery == -1) {
3889 switch (adev->asic_type) {
fc42d47c
AG
3890 case CHIP_BONAIRE:
3891 case CHIP_HAWAII:
3ba7b418
AG
3892 case CHIP_TOPAZ:
3893 case CHIP_TONGA:
3894 case CHIP_FIJI:
3895 case CHIP_POLARIS10:
3896 case CHIP_POLARIS11:
3897 case CHIP_POLARIS12:
3898 case CHIP_VEGAM:
3899 case CHIP_VEGA20:
3900 case CHIP_VEGA10:
3901 case CHIP_VEGA12:
c43b849f 3902 case CHIP_RAVEN:
e9d4cf91 3903 case CHIP_ARCTURUS:
2cb44fb0 3904 case CHIP_RENOIR:
658c6639
AD
3905 case CHIP_NAVI10:
3906 case CHIP_NAVI14:
3907 case CHIP_NAVI12:
3ba7b418
AG
3908 break;
3909 default:
3910 goto disabled;
3911 }
12938fad
CK
3912 }
3913
3914 return true;
3ba7b418
AG
3915
3916disabled:
3917 DRM_INFO("GPU recovery disabled.\n");
3918 return false;
12938fad
CK
3919}
3920
5c6dd71e 3921
26bc5340
AG
3922static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3923 struct amdgpu_job *job,
3924 bool *need_full_reset_arg)
3925{
3926 int i, r = 0;
3927 bool need_full_reset = *need_full_reset_arg;
71182665 3928
728e7e0c
JZ
3929 amdgpu_debugfs_wait_dump(adev);
3930
71182665 3931 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3932 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3933 struct amdgpu_ring *ring = adev->rings[i];
3934
51687759 3935 if (!ring || !ring->sched.thread)
0875dc9e 3936 continue;
5740682e 3937
2f9d4084
ML
3938 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3939 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3940 }
d38ceaf9 3941
222b5f04
AG
3942 if(job)
3943 drm_sched_increase_karma(&job->base);
3944
1d721ed6 3945 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3946 if (!amdgpu_sriov_vf(adev)) {
3947
3948 if (!need_full_reset)
3949 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3950
3951 if (!need_full_reset) {
3952 amdgpu_device_ip_pre_soft_reset(adev);
3953 r = amdgpu_device_ip_soft_reset(adev);
3954 amdgpu_device_ip_post_soft_reset(adev);
3955 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3956 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3957 need_full_reset = true;
3958 }
3959 }
3960
3961 if (need_full_reset)
3962 r = amdgpu_device_ip_suspend(adev);
3963
3964 *need_full_reset_arg = need_full_reset;
3965 }
3966
3967 return r;
3968}
3969
041a62bc 3970static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3971 struct list_head *device_list_handle,
3972 bool *need_full_reset_arg)
3973{
3974 struct amdgpu_device *tmp_adev = NULL;
3975 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3976 int r = 0;
3977
3978 /*
3979 * ASIC reset has to be done on all HGMI hive nodes ASAP
3980 * to allow proper links negotiation in FW (within 1 sec)
3981 */
3982 if (need_full_reset) {
3983 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3984 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3985 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3986 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3987 r = -EALREADY;
3988 } else
3989 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3990
041a62bc
AG
3991 if (r) {
3992 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3993 r, tmp_adev->ddev->unique);
3994 break;
ce316fa5
LM
3995 }
3996 }
3997
041a62bc
AG
3998 /* For XGMI wait for all resets to complete before proceed */
3999 if (!r) {
ce316fa5
LM
4000 list_for_each_entry(tmp_adev, device_list_handle,
4001 gmc.xgmi.head) {
4002 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4003 flush_work(&tmp_adev->xgmi_reset_work);
4004 r = tmp_adev->asic_reset_res;
4005 if (r)
4006 break;
ce316fa5
LM
4007 }
4008 }
4009 }
ce316fa5 4010 }
26bc5340 4011
43c4d576
JC
4012 if (!r && amdgpu_ras_intr_triggered()) {
4013 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4014 if (tmp_adev->mmhub.funcs &&
4015 tmp_adev->mmhub.funcs->reset_ras_error_count)
4016 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4017 }
4018
00eaa571 4019 amdgpu_ras_intr_cleared();
43c4d576 4020 }
00eaa571 4021
26bc5340
AG
4022 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4023 if (need_full_reset) {
4024 /* post card */
4025 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4026 DRM_WARN("asic atom init failed!");
4027
4028 if (!r) {
4029 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4030 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4031 if (r)
4032 goto out;
4033
4034 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4035 if (vram_lost) {
77e7f829 4036 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4037 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4038 }
4039
4040 r = amdgpu_gtt_mgr_recover(
4041 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4042 if (r)
4043 goto out;
4044
4045 r = amdgpu_device_fw_loading(tmp_adev);
4046 if (r)
4047 return r;
4048
4049 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4050 if (r)
4051 goto out;
4052
4053 if (vram_lost)
4054 amdgpu_device_fill_reset_magic(tmp_adev);
4055
fdafb359
EQ
4056 /*
4057 * Add this ASIC as tracked as reset was already
4058 * complete successfully.
4059 */
4060 amdgpu_register_gpu_instance(tmp_adev);
4061
7c04ca50 4062 r = amdgpu_device_ip_late_init(tmp_adev);
4063 if (r)
4064 goto out;
4065
565d1941
EQ
4066 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4067
e79a04d5 4068 /* must succeed. */
511fdbc3 4069 amdgpu_ras_resume(tmp_adev);
e79a04d5 4070
26bc5340
AG
4071 /* Update PSP FW topology after reset */
4072 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4073 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4074 }
4075 }
4076
4077
4078out:
4079 if (!r) {
4080 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4081 r = amdgpu_ib_ring_tests(tmp_adev);
4082 if (r) {
4083 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4084 r = amdgpu_device_ip_suspend(tmp_adev);
4085 need_full_reset = true;
4086 r = -EAGAIN;
4087 goto end;
4088 }
4089 }
4090
4091 if (!r)
4092 r = amdgpu_device_recover_vram(tmp_adev);
4093 else
4094 tmp_adev->asic_reset_res = r;
4095 }
4096
4097end:
4098 *need_full_reset_arg = need_full_reset;
4099 return r;
4100}
4101
1d721ed6 4102static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4103{
1d721ed6
AG
4104 if (trylock) {
4105 if (!mutex_trylock(&adev->lock_reset))
4106 return false;
4107 } else
4108 mutex_lock(&adev->lock_reset);
5740682e 4109
26bc5340 4110 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4111 adev->in_gpu_reset = true;
a3a09142
AD
4112 switch (amdgpu_asic_reset_method(adev)) {
4113 case AMD_RESET_METHOD_MODE1:
4114 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4115 break;
4116 case AMD_RESET_METHOD_MODE2:
4117 adev->mp1_state = PP_MP1_STATE_RESET;
4118 break;
4119 default:
4120 adev->mp1_state = PP_MP1_STATE_NONE;
4121 break;
4122 }
1d721ed6
AG
4123
4124 return true;
26bc5340 4125}
d38ceaf9 4126
26bc5340
AG
4127static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4128{
89041940 4129 amdgpu_vf_error_trans_all(adev);
a3a09142 4130 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4131 adev->in_gpu_reset = false;
13a752e3 4132 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4133}
4134
3f12acc8
EQ
4135static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4136{
4137 struct pci_dev *p = NULL;
4138
4139 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4140 adev->pdev->bus->number, 1);
4141 if (p) {
4142 pm_runtime_enable(&(p->dev));
4143 pm_runtime_resume(&(p->dev));
4144 }
4145}
4146
4147static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4148{
4149 enum amd_reset_method reset_method;
4150 struct pci_dev *p = NULL;
4151 u64 expires;
4152
4153 /*
4154 * For now, only BACO and mode1 reset are confirmed
4155 * to suffer the audio issue without proper suspended.
4156 */
4157 reset_method = amdgpu_asic_reset_method(adev);
4158 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4159 (reset_method != AMD_RESET_METHOD_MODE1))
4160 return -EINVAL;
4161
4162 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4163 adev->pdev->bus->number, 1);
4164 if (!p)
4165 return -ENODEV;
4166
4167 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4168 if (!expires)
4169 /*
4170 * If we cannot get the audio device autosuspend delay,
4171 * a fixed 4S interval will be used. Considering 3S is
4172 * the audio controller default autosuspend delay setting.
4173 * 4S used here is guaranteed to cover that.
4174 */
54b7feb9 4175 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4176
4177 while (!pm_runtime_status_suspended(&(p->dev))) {
4178 if (!pm_runtime_suspend(&(p->dev)))
4179 break;
4180
4181 if (expires < ktime_get_mono_fast_ns()) {
4182 dev_warn(adev->dev, "failed to suspend display audio\n");
4183 /* TODO: abort the succeeding gpu reset? */
4184 return -ETIMEDOUT;
4185 }
4186 }
4187
4188 pm_runtime_disable(&(p->dev));
4189
4190 return 0;
4191}
4192
26bc5340
AG
4193/**
4194 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4195 *
4196 * @adev: amdgpu device pointer
4197 * @job: which job trigger hang
4198 *
4199 * Attempt to reset the GPU if it has hung (all asics).
4200 * Attempt to do soft-reset or full-reset and reinitialize Asic
4201 * Returns 0 for success or an error on failure.
4202 */
4203
4204int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4205 struct amdgpu_job *job)
4206{
1d721ed6 4207 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4208 bool need_full_reset = false;
4209 bool job_signaled = false;
26bc5340 4210 struct amdgpu_hive_info *hive = NULL;
26bc5340 4211 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4212 int i, r = 0;
7c6e68c7 4213 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4214 bool use_baco =
4215 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4216 true : false;
3f12acc8 4217 bool audio_suspended = false;
26bc5340 4218
d5ea093e
AG
4219 /*
4220 * Flush RAM to disk so that after reboot
4221 * the user can read log and see why the system rebooted.
4222 */
b823821f 4223 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4224
4225 DRM_WARN("Emergency reboot.");
4226
4227 ksys_sync_helper();
4228 emergency_restart();
4229 }
4230
b823821f
LM
4231 dev_info(adev->dev, "GPU %s begin!\n",
4232 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340
AG
4233
4234 /*
1d721ed6
AG
4235 * Here we trylock to avoid chain of resets executing from
4236 * either trigger by jobs on different adevs in XGMI hive or jobs on
4237 * different schedulers for same device while this TO handler is running.
4238 * We always reset all schedulers for device and all devices for XGMI
4239 * hive so that should take care of them too.
26bc5340 4240 */
7dd8c205 4241 hive = amdgpu_get_xgmi_hive(adev, true);
1d721ed6
AG
4242 if (hive && !mutex_trylock(&hive->reset_lock)) {
4243 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4244 job ? job->base.id : -1, hive->hive_id);
9e94d22c 4245 mutex_unlock(&hive->hive_lock);
26bc5340 4246 return 0;
1d721ed6 4247 }
26bc5340 4248
9e94d22c
EQ
4249 /*
4250 * Build list of devices to reset.
4251 * In case we are in XGMI hive mode, resort the device list
4252 * to put adev in the 1st position.
4253 */
4254 INIT_LIST_HEAD(&device_list);
4255 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4256 if (!hive)
26bc5340 4257 return -ENODEV;
9e94d22c
EQ
4258 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4259 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4260 device_list_handle = &hive->device_list;
4261 } else {
4262 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4263 device_list_handle = &device_list;
4264 }
4265
1d721ed6
AG
4266 /* block all schedulers and reset given job's ring */
4267 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
9e94d22c
EQ
4268 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4269 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4270 job ? job->base.id : -1);
4271 mutex_unlock(&hive->hive_lock);
4272 return 0;
7c6e68c7
AG
4273 }
4274
3f12acc8
EQ
4275 /*
4276 * Try to put the audio codec into suspend state
4277 * before gpu reset started.
4278 *
4279 * Due to the power domain of the graphics device
4280 * is shared with AZ power domain. Without this,
4281 * we may change the audio hardware from behind
4282 * the audio driver's back. That will trigger
4283 * some audio codec errors.
4284 */
4285 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4286 audio_suspended = true;
4287
9e94d22c
EQ
4288 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4289
52fb44cf
EQ
4290 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4291
9e94d22c
EQ
4292 if (!amdgpu_sriov_vf(tmp_adev))
4293 amdgpu_amdkfd_pre_reset(tmp_adev);
4294
12ffa55d
AG
4295 /*
4296 * Mark these ASICs to be reseted as untracked first
4297 * And add them back after reset completed
4298 */
4299 amdgpu_unregister_gpu_instance(tmp_adev);
4300
a2f63ee8 4301 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4302
f1c1314b 4303 /* disable ras on ALL IPs */
b823821f
LM
4304 if (!(in_ras_intr && !use_baco) &&
4305 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4306 amdgpu_ras_suspend(tmp_adev);
4307
1d721ed6
AG
4308 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4309 struct amdgpu_ring *ring = tmp_adev->rings[i];
4310
4311 if (!ring || !ring->sched.thread)
4312 continue;
4313
0b2d2c2e 4314 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4315
b823821f 4316 if (in_ras_intr && !use_baco)
7c6e68c7 4317 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4318 }
4319 }
4320
b823821f 4321 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4322 goto skip_sched_resume;
4323
1d721ed6
AG
4324 /*
4325 * Must check guilty signal here since after this point all old
4326 * HW fences are force signaled.
4327 *
4328 * job->base holds a reference to parent fence
4329 */
4330 if (job && job->base.s_fence->parent &&
7dd8c205 4331 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4332 job_signaled = true;
1d721ed6
AG
4333 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4334 goto skip_hw_reset;
4335 }
4336
26bc5340
AG
4337retry: /* Rest of adevs pre asic reset from XGMI hive. */
4338 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4339 r = amdgpu_device_pre_asic_reset(tmp_adev,
4340 NULL,
4341 &need_full_reset);
4342 /*TODO Should we stop ?*/
4343 if (r) {
4344 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4345 r, tmp_adev->ddev->unique);
4346 tmp_adev->asic_reset_res = r;
4347 }
4348 }
4349
4350 /* Actual ASIC resets if needed.*/
4351 /* TODO Implement XGMI hive reset logic for SRIOV */
4352 if (amdgpu_sriov_vf(adev)) {
4353 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4354 if (r)
4355 adev->asic_reset_res = r;
4356 } else {
041a62bc 4357 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4358 if (r && r == -EAGAIN)
4359 goto retry;
4360 }
4361
1d721ed6
AG
4362skip_hw_reset:
4363
26bc5340
AG
4364 /* Post ASIC reset for all devs .*/
4365 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4366
1d721ed6
AG
4367 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4368 struct amdgpu_ring *ring = tmp_adev->rings[i];
4369
4370 if (!ring || !ring->sched.thread)
4371 continue;
4372
4373 /* No point to resubmit jobs if we didn't HW reset*/
4374 if (!tmp_adev->asic_reset_res && !job_signaled)
4375 drm_sched_resubmit_jobs(&ring->sched);
4376
4377 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4378 }
4379
4380 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4381 drm_helper_resume_force_mode(tmp_adev->ddev);
4382 }
4383
4384 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4385
4386 if (r) {
4387 /* bad news, how to tell it to userspace ? */
12ffa55d 4388 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4389 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4390 } else {
12ffa55d 4391 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4392 }
7c6e68c7 4393 }
26bc5340 4394
7c6e68c7
AG
4395skip_sched_resume:
4396 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4397 /*unlock kfd: SRIOV would do it separately */
b823821f 4398 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4399 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4400 if (audio_suspended)
4401 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4402 amdgpu_device_unlock_adev(tmp_adev);
4403 }
4404
9e94d22c 4405 if (hive) {
22d6575b 4406 mutex_unlock(&hive->reset_lock);
9e94d22c
EQ
4407 mutex_unlock(&hive->hive_lock);
4408 }
26bc5340
AG
4409
4410 if (r)
4411 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4412 return r;
4413}
4414
e3ecdffa
AD
4415/**
4416 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4417 *
4418 * @adev: amdgpu_device pointer
4419 *
4420 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4421 * and lanes) of the slot the device is in. Handles APUs and
4422 * virtualized environments where PCIE config space may not be available.
4423 */
5494d864 4424static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4425{
5d9a6330 4426 struct pci_dev *pdev;
c5313457
HK
4427 enum pci_bus_speed speed_cap, platform_speed_cap;
4428 enum pcie_link_width platform_link_width;
d0dd7f0c 4429
cd474ba0
AD
4430 if (amdgpu_pcie_gen_cap)
4431 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4432
cd474ba0
AD
4433 if (amdgpu_pcie_lane_cap)
4434 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4435
cd474ba0
AD
4436 /* covers APUs as well */
4437 if (pci_is_root_bus(adev->pdev->bus)) {
4438 if (adev->pm.pcie_gen_mask == 0)
4439 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4440 if (adev->pm.pcie_mlw_mask == 0)
4441 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4442 return;
cd474ba0 4443 }
d0dd7f0c 4444
c5313457
HK
4445 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4446 return;
4447
dbaa922b
AD
4448 pcie_bandwidth_available(adev->pdev, NULL,
4449 &platform_speed_cap, &platform_link_width);
c5313457 4450
cd474ba0 4451 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4452 /* asic caps */
4453 pdev = adev->pdev;
4454 speed_cap = pcie_get_speed_cap(pdev);
4455 if (speed_cap == PCI_SPEED_UNKNOWN) {
4456 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4457 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4458 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4459 } else {
5d9a6330
AD
4460 if (speed_cap == PCIE_SPEED_16_0GT)
4461 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4463 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4464 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4465 else if (speed_cap == PCIE_SPEED_8_0GT)
4466 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4467 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4469 else if (speed_cap == PCIE_SPEED_5_0GT)
4470 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4471 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4472 else
4473 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4474 }
4475 /* platform caps */
c5313457 4476 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4477 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4478 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4479 } else {
c5313457 4480 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4481 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4482 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4485 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4486 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4487 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4489 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4490 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4492 else
4493 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4494
cd474ba0
AD
4495 }
4496 }
4497 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4498 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4499 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4500 } else {
c5313457 4501 switch (platform_link_width) {
5d9a6330 4502 case PCIE_LNK_X32:
cd474ba0
AD
4503 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4504 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4510 break;
5d9a6330 4511 case PCIE_LNK_X16:
cd474ba0
AD
4512 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4518 break;
5d9a6330 4519 case PCIE_LNK_X12:
cd474ba0
AD
4520 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4525 break;
5d9a6330 4526 case PCIE_LNK_X8:
cd474ba0
AD
4527 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4531 break;
5d9a6330 4532 case PCIE_LNK_X4:
cd474ba0
AD
4533 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4534 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4535 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4536 break;
5d9a6330 4537 case PCIE_LNK_X2:
cd474ba0
AD
4538 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4540 break;
5d9a6330 4541 case PCIE_LNK_X1:
cd474ba0
AD
4542 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4543 break;
4544 default:
4545 break;
4546 }
d0dd7f0c
AD
4547 }
4548 }
4549}
d38ceaf9 4550
361dbd01
AD
4551int amdgpu_device_baco_enter(struct drm_device *dev)
4552{
4553 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4554 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4555
4556 if (!amdgpu_device_supports_baco(adev->ddev))
4557 return -ENOTSUPP;
4558
7a22677b
LM
4559 if (ras && ras->supported)
4560 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4561
9530273e 4562 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4563}
4564
4565int amdgpu_device_baco_exit(struct drm_device *dev)
4566{
4567 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4568 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4569 int ret = 0;
361dbd01
AD
4570
4571 if (!amdgpu_device_supports_baco(adev->ddev))
4572 return -ENOTSUPP;
4573
9530273e
EQ
4574 ret = amdgpu_dpm_baco_exit(adev);
4575 if (ret)
4576 return ret;
7a22677b
LM
4577
4578 if (ras && ras->supported)
4579 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4580
4581 return 0;
361dbd01 4582}