drm/amdgpu/display: give aux i2c buses more meaningful names
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
d5ea093e 71
e2a75f88 72MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 73MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 74MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 75MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 76MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 77MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 78MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 79MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 80MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 81MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 82
2dc80b00
S
83#define AMDGPU_RESUME_MS 2000
84
050091ab 85const char *amdgpu_asic_name[] = {
da69c161
KW
86 "TAHITI",
87 "PITCAIRN",
88 "VERDE",
89 "OLAND",
90 "HAINAN",
d38ceaf9
AD
91 "BONAIRE",
92 "KAVERI",
93 "KABINI",
94 "HAWAII",
95 "MULLINS",
96 "TOPAZ",
97 "TONGA",
48299f95 98 "FIJI",
d38ceaf9 99 "CARRIZO",
139f4917 100 "STONEY",
2cc0c0b5
FC
101 "POLARIS10",
102 "POLARIS11",
c4642a47 103 "POLARIS12",
48ff108d 104 "VEGAM",
d4196f01 105 "VEGA10",
8fab806a 106 "VEGA12",
956fcddc 107 "VEGA20",
2ca8a5d2 108 "RAVEN",
d6c3b24e 109 "ARCTURUS",
1eee4228 110 "RENOIR",
852a6626 111 "NAVI10",
87dbad02 112 "NAVI14",
9802f5d7 113 "NAVI12",
d38ceaf9
AD
114 "LAST",
115};
116
dcea6e65
KR
117/**
118 * DOC: pcie_replay_count
119 *
120 * The amdgpu driver provides a sysfs API for reporting the total number
121 * of PCIe replays (NAKs)
122 * The file pcie_replay_count is used for this and returns the total
123 * number of replays as a sum of the NAKs generated and NAKs received
124 */
125
126static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
127 struct device_attribute *attr, char *buf)
128{
129 struct drm_device *ddev = dev_get_drvdata(dev);
130 struct amdgpu_device *adev = ddev->dev_private;
131 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
132
133 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
134}
135
136static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
137 amdgpu_device_get_pcie_replay_count, NULL);
138
5494d864
AD
139static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
140
bd607166
KR
141/**
142 * DOC: product_name
143 *
144 * The amdgpu driver provides a sysfs API for reporting the product name
145 * for the device
146 * The file serial_number is used for this and returns the product name
147 * as returned from the FRU.
148 * NOTE: This is only available for certain server cards
149 */
150
151static ssize_t amdgpu_device_get_product_name(struct device *dev,
152 struct device_attribute *attr, char *buf)
153{
154 struct drm_device *ddev = dev_get_drvdata(dev);
155 struct amdgpu_device *adev = ddev->dev_private;
156
157 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
158}
159
160static DEVICE_ATTR(product_name, S_IRUGO,
161 amdgpu_device_get_product_name, NULL);
162
163/**
164 * DOC: product_number
165 *
166 * The amdgpu driver provides a sysfs API for reporting the part number
167 * for the device
168 * The file serial_number is used for this and returns the part number
169 * as returned from the FRU.
170 * NOTE: This is only available for certain server cards
171 */
172
173static ssize_t amdgpu_device_get_product_number(struct device *dev,
174 struct device_attribute *attr, char *buf)
175{
176 struct drm_device *ddev = dev_get_drvdata(dev);
177 struct amdgpu_device *adev = ddev->dev_private;
178
179 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
180}
181
182static DEVICE_ATTR(product_number, S_IRUGO,
183 amdgpu_device_get_product_number, NULL);
184
185/**
186 * DOC: serial_number
187 *
188 * The amdgpu driver provides a sysfs API for reporting the serial number
189 * for the device
190 * The file serial_number is used for this and returns the serial number
191 * as returned from the FRU.
192 * NOTE: This is only available for certain server cards
193 */
194
195static ssize_t amdgpu_device_get_serial_number(struct device *dev,
196 struct device_attribute *attr, char *buf)
197{
198 struct drm_device *ddev = dev_get_drvdata(dev);
199 struct amdgpu_device *adev = ddev->dev_private;
200
201 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
202}
203
204static DEVICE_ATTR(serial_number, S_IRUGO,
205 amdgpu_device_get_serial_number, NULL);
206
e3ecdffa 207/**
31af062a 208 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
209 *
210 * @dev: drm_device pointer
211 *
212 * Returns true if the device is a dGPU with HG/PX power control,
213 * otherwise return false.
214 */
31af062a 215bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
216{
217 struct amdgpu_device *adev = dev->dev_private;
218
2f7d10b3 219 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
220 return true;
221 return false;
222}
223
a69cba42
AD
224/**
225 * amdgpu_device_supports_baco - Does the device support BACO
226 *
227 * @dev: drm_device pointer
228 *
229 * Returns true if the device supporte BACO,
230 * otherwise return false.
231 */
232bool amdgpu_device_supports_baco(struct drm_device *dev)
233{
234 struct amdgpu_device *adev = dev->dev_private;
235
236 return amdgpu_asic_supports_baco(adev);
237}
238
e35e2b11
TY
239/**
240 * VRAM access helper functions.
241 *
242 * amdgpu_device_vram_access - read/write a buffer in vram
243 *
244 * @adev: amdgpu_device pointer
245 * @pos: offset of the buffer in vram
246 * @buf: virtual address of the buffer in system memory
247 * @size: read/write size, sizeof(@buf) must > @size
248 * @write: true - write to vram, otherwise - read from vram
249 */
250void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
251 uint32_t *buf, size_t size, bool write)
252{
e35e2b11 253 unsigned long flags;
ce05ac56
CK
254 uint32_t hi = ~0;
255 uint64_t last;
256
257 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
258 for (last = pos + size; pos < last; pos += 4) {
259 uint32_t tmp = pos >> 31;
e35e2b11 260
e35e2b11 261 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
262 if (tmp != hi) {
263 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
264 hi = tmp;
265 }
e35e2b11
TY
266 if (write)
267 WREG32_NO_KIQ(mmMM_DATA, *buf++);
268 else
269 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 270 }
ce05ac56 271 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
272}
273
d38ceaf9 274/*
2eee0229 275 * device register access helper functions.
d38ceaf9 276 */
e3ecdffa 277/**
2eee0229 278 * amdgpu_device_rreg - read a register
e3ecdffa
AD
279 *
280 * @adev: amdgpu_device pointer
281 * @reg: dword aligned register offset
282 * @acc_flags: access flags which require special behavior
283 *
284 * Returns the 32 bit value from the offset specified.
285 */
2eee0229
HZ
286uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
287 uint32_t acc_flags)
d38ceaf9 288{
f4b373f4
TSD
289 uint32_t ret;
290
f384ff95 291 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 292 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 293
ec59847e 294 if ((reg * 4) < adev->rmmio_size)
f4b373f4 295 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
296 else
297 ret = adev->pcie_rreg(adev, (reg * 4));
298 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
f4b373f4 299 return ret;
d38ceaf9
AD
300}
301
421a2a30
ML
302/*
303 * MMIO register read with bytes helper functions
304 * @offset:bytes offset from MMIO start
305 *
306*/
307
e3ecdffa
AD
308/**
309 * amdgpu_mm_rreg8 - read a memory mapped IO register
310 *
311 * @adev: amdgpu_device pointer
312 * @offset: byte aligned register offset
313 *
314 * Returns the 8 bit value from the offset specified.
315 */
421a2a30
ML
316uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
317 if (offset < adev->rmmio_size)
318 return (readb(adev->rmmio + offset));
319 BUG();
320}
321
322/*
323 * MMIO register write with bytes helper functions
324 * @offset:bytes offset from MMIO start
325 * @value: the value want to be written to the register
326 *
327*/
e3ecdffa
AD
328/**
329 * amdgpu_mm_wreg8 - read a memory mapped IO register
330 *
331 * @adev: amdgpu_device pointer
332 * @offset: byte aligned register offset
333 * @value: 8 bit value to write
334 *
335 * Writes the value specified to the offset specified.
336 */
421a2a30
ML
337void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
338 if (offset < adev->rmmio_size)
339 writeb(value, adev->rmmio + offset);
340 else
341 BUG();
342}
343
2eee0229
HZ
344void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
345 uint32_t v, uint32_t acc_flags)
2e0cc4d4 346{
2eee0229 347 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 348
ec59847e 349 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 350 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
351 else
352 adev->pcie_wreg(adev, (reg * 4), v);
2e0cc4d4
ML
353}
354
e3ecdffa 355/**
2eee0229 356 * amdgpu_device_wreg - write to a register
e3ecdffa
AD
357 *
358 * @adev: amdgpu_device pointer
359 * @reg: dword aligned register offset
360 * @v: 32 bit value to write to the register
361 * @acc_flags: access flags which require special behavior
362 *
363 * Writes the value specified to the offset specified.
364 */
2eee0229
HZ
365void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
366 uint32_t acc_flags)
d38ceaf9 367{
f384ff95 368 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 369 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 370
2eee0229 371 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
2e0cc4d4 372}
d38ceaf9 373
2e0cc4d4
ML
374/*
375 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
376 *
377 * this function is invoked only the debugfs register access
378 * */
379void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
380 uint32_t acc_flags)
381{
382 if (amdgpu_sriov_fullaccess(adev) &&
383 adev->gfx.rlc.funcs &&
384 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 385
2e0cc4d4
ML
386 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
387 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 388 }
2e0cc4d4 389
2eee0229 390 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
d38ceaf9
AD
391}
392
e3ecdffa
AD
393/**
394 * amdgpu_io_rreg - read an IO register
395 *
396 * @adev: amdgpu_device pointer
397 * @reg: dword aligned register offset
398 *
399 * Returns the 32 bit value from the offset specified.
400 */
d38ceaf9
AD
401u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
402{
403 if ((reg * 4) < adev->rio_mem_size)
404 return ioread32(adev->rio_mem + (reg * 4));
405 else {
406 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
407 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
408 }
409}
410
e3ecdffa
AD
411/**
412 * amdgpu_io_wreg - write to an IO register
413 *
414 * @adev: amdgpu_device pointer
415 * @reg: dword aligned register offset
416 * @v: 32 bit value to write to the register
417 *
418 * Writes the value specified to the offset specified.
419 */
d38ceaf9
AD
420void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
421{
d38ceaf9
AD
422 if ((reg * 4) < adev->rio_mem_size)
423 iowrite32(v, adev->rio_mem + (reg * 4));
424 else {
425 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
426 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
427 }
428}
429
430/**
431 * amdgpu_mm_rdoorbell - read a doorbell dword
432 *
433 * @adev: amdgpu_device pointer
434 * @index: doorbell index
435 *
436 * Returns the value in the doorbell aperture at the
437 * requested doorbell index (CIK).
438 */
439u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
440{
441 if (index < adev->doorbell.num_doorbells) {
442 return readl(adev->doorbell.ptr + index);
443 } else {
444 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
445 return 0;
446 }
447}
448
449/**
450 * amdgpu_mm_wdoorbell - write a doorbell dword
451 *
452 * @adev: amdgpu_device pointer
453 * @index: doorbell index
454 * @v: value to write
455 *
456 * Writes @v to the doorbell aperture at the
457 * requested doorbell index (CIK).
458 */
459void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
460{
461 if (index < adev->doorbell.num_doorbells) {
462 writel(v, adev->doorbell.ptr + index);
463 } else {
464 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
465 }
466}
467
832be404
KW
468/**
469 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
470 *
471 * @adev: amdgpu_device pointer
472 * @index: doorbell index
473 *
474 * Returns the value in the doorbell aperture at the
475 * requested doorbell index (VEGA10+).
476 */
477u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
478{
479 if (index < adev->doorbell.num_doorbells) {
480 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
481 } else {
482 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
483 return 0;
484 }
485}
486
487/**
488 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
489 *
490 * @adev: amdgpu_device pointer
491 * @index: doorbell index
492 * @v: value to write
493 *
494 * Writes @v to the doorbell aperture at the
495 * requested doorbell index (VEGA10+).
496 */
497void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
498{
499 if (index < adev->doorbell.num_doorbells) {
500 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
501 } else {
502 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
503 }
504}
505
d38ceaf9
AD
506/**
507 * amdgpu_invalid_rreg - dummy reg read function
508 *
509 * @adev: amdgpu device pointer
510 * @reg: offset of register
511 *
512 * Dummy register read function. Used for register blocks
513 * that certain asics don't have (all asics).
514 * Returns the value in the register.
515 */
516static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
517{
518 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
519 BUG();
520 return 0;
521}
522
523/**
524 * amdgpu_invalid_wreg - dummy reg write function
525 *
526 * @adev: amdgpu device pointer
527 * @reg: offset of register
528 * @v: value to write to the register
529 *
530 * Dummy register read function. Used for register blocks
531 * that certain asics don't have (all asics).
532 */
533static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
534{
535 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
536 reg, v);
537 BUG();
538}
539
4fa1c6a6
TZ
540/**
541 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
542 *
543 * @adev: amdgpu device pointer
544 * @reg: offset of register
545 *
546 * Dummy register read function. Used for register blocks
547 * that certain asics don't have (all asics).
548 * Returns the value in the register.
549 */
550static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
551{
552 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
553 BUG();
554 return 0;
555}
556
557/**
558 * amdgpu_invalid_wreg64 - dummy reg write function
559 *
560 * @adev: amdgpu device pointer
561 * @reg: offset of register
562 * @v: value to write to the register
563 *
564 * Dummy register read function. Used for register blocks
565 * that certain asics don't have (all asics).
566 */
567static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
568{
569 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
570 reg, v);
571 BUG();
572}
573
d38ceaf9
AD
574/**
575 * amdgpu_block_invalid_rreg - dummy reg read function
576 *
577 * @adev: amdgpu device pointer
578 * @block: offset of instance
579 * @reg: offset of register
580 *
581 * Dummy register read function. Used for register blocks
582 * that certain asics don't have (all asics).
583 * Returns the value in the register.
584 */
585static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
586 uint32_t block, uint32_t reg)
587{
588 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
589 reg, block);
590 BUG();
591 return 0;
592}
593
594/**
595 * amdgpu_block_invalid_wreg - dummy reg write function
596 *
597 * @adev: amdgpu device pointer
598 * @block: offset of instance
599 * @reg: offset of register
600 * @v: value to write to the register
601 *
602 * Dummy register read function. Used for register blocks
603 * that certain asics don't have (all asics).
604 */
605static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
606 uint32_t block,
607 uint32_t reg, uint32_t v)
608{
609 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
610 reg, block, v);
611 BUG();
612}
613
e3ecdffa
AD
614/**
615 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
616 *
617 * @adev: amdgpu device pointer
618 *
619 * Allocates a scratch page of VRAM for use by various things in the
620 * driver.
621 */
06ec9070 622static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 623{
a4a02777
CK
624 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
625 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
626 &adev->vram_scratch.robj,
627 &adev->vram_scratch.gpu_addr,
628 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
629}
630
e3ecdffa
AD
631/**
632 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
633 *
634 * @adev: amdgpu device pointer
635 *
636 * Frees the VRAM scratch page.
637 */
06ec9070 638static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 639{
078af1a3 640 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
641}
642
643/**
9c3f2b54 644 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
645 *
646 * @adev: amdgpu_device pointer
647 * @registers: pointer to the register array
648 * @array_size: size of the register array
649 *
650 * Programs an array or registers with and and or masks.
651 * This is a helper for setting golden registers.
652 */
9c3f2b54
AD
653void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
654 const u32 *registers,
655 const u32 array_size)
d38ceaf9
AD
656{
657 u32 tmp, reg, and_mask, or_mask;
658 int i;
659
660 if (array_size % 3)
661 return;
662
663 for (i = 0; i < array_size; i +=3) {
664 reg = registers[i + 0];
665 and_mask = registers[i + 1];
666 or_mask = registers[i + 2];
667
668 if (and_mask == 0xffffffff) {
669 tmp = or_mask;
670 } else {
671 tmp = RREG32(reg);
672 tmp &= ~and_mask;
e0d07657
HZ
673 if (adev->family >= AMDGPU_FAMILY_AI)
674 tmp |= (or_mask & and_mask);
675 else
676 tmp |= or_mask;
d38ceaf9
AD
677 }
678 WREG32(reg, tmp);
679 }
680}
681
e3ecdffa
AD
682/**
683 * amdgpu_device_pci_config_reset - reset the GPU
684 *
685 * @adev: amdgpu_device pointer
686 *
687 * Resets the GPU using the pci config reset sequence.
688 * Only applicable to asics prior to vega10.
689 */
8111c387 690void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
691{
692 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
693}
694
695/*
696 * GPU doorbell aperture helpers function.
697 */
698/**
06ec9070 699 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
700 *
701 * @adev: amdgpu_device pointer
702 *
703 * Init doorbell driver information (CIK)
704 * Returns 0 on success, error on failure.
705 */
06ec9070 706static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 707{
6585661d 708
705e519e
CK
709 /* No doorbell on SI hardware generation */
710 if (adev->asic_type < CHIP_BONAIRE) {
711 adev->doorbell.base = 0;
712 adev->doorbell.size = 0;
713 adev->doorbell.num_doorbells = 0;
714 adev->doorbell.ptr = NULL;
715 return 0;
716 }
717
d6895ad3
CK
718 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
719 return -EINVAL;
720
22357775
AD
721 amdgpu_asic_init_doorbell_index(adev);
722
d38ceaf9
AD
723 /* doorbell bar mapping */
724 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
725 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
726
edf600da 727 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 728 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
729 if (adev->doorbell.num_doorbells == 0)
730 return -EINVAL;
731
ec3db8a6 732 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
733 * paging queue doorbell use the second page. The
734 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
735 * doorbells are in the first page. So with paging queue enabled,
736 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
737 */
738 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 739 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 740
8972e5d2
CK
741 adev->doorbell.ptr = ioremap(adev->doorbell.base,
742 adev->doorbell.num_doorbells *
743 sizeof(u32));
744 if (adev->doorbell.ptr == NULL)
d38ceaf9 745 return -ENOMEM;
d38ceaf9
AD
746
747 return 0;
748}
749
750/**
06ec9070 751 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
752 *
753 * @adev: amdgpu_device pointer
754 *
755 * Tear down doorbell driver information (CIK)
756 */
06ec9070 757static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
758{
759 iounmap(adev->doorbell.ptr);
760 adev->doorbell.ptr = NULL;
761}
762
22cb0164 763
d38ceaf9
AD
764
765/*
06ec9070 766 * amdgpu_device_wb_*()
455a7bc2 767 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 768 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
769 */
770
771/**
06ec9070 772 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
773 *
774 * @adev: amdgpu_device pointer
775 *
776 * Disables Writeback and frees the Writeback memory (all asics).
777 * Used at driver shutdown.
778 */
06ec9070 779static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
780{
781 if (adev->wb.wb_obj) {
a76ed485
AD
782 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
783 &adev->wb.gpu_addr,
784 (void **)&adev->wb.wb);
d38ceaf9
AD
785 adev->wb.wb_obj = NULL;
786 }
787}
788
789/**
06ec9070 790 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
791 *
792 * @adev: amdgpu_device pointer
793 *
455a7bc2 794 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
795 * Used at driver startup.
796 * Returns 0 on success or an -error on failure.
797 */
06ec9070 798static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
799{
800 int r;
801
802 if (adev->wb.wb_obj == NULL) {
97407b63
AD
803 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
804 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
805 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
806 &adev->wb.wb_obj, &adev->wb.gpu_addr,
807 (void **)&adev->wb.wb);
d38ceaf9
AD
808 if (r) {
809 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
810 return r;
811 }
d38ceaf9
AD
812
813 adev->wb.num_wb = AMDGPU_MAX_WB;
814 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
815
816 /* clear wb memory */
73469585 817 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
818 }
819
820 return 0;
821}
822
823/**
131b4b36 824 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
825 *
826 * @adev: amdgpu_device pointer
827 * @wb: wb index
828 *
829 * Allocate a wb slot for use by the driver (all asics).
830 * Returns 0 on success or -EINVAL on failure.
831 */
131b4b36 832int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
833{
834 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 835
97407b63 836 if (offset < adev->wb.num_wb) {
7014285a 837 __set_bit(offset, adev->wb.used);
63ae07ca 838 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
839 return 0;
840 } else {
841 return -EINVAL;
842 }
843}
844
d38ceaf9 845/**
131b4b36 846 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
847 *
848 * @adev: amdgpu_device pointer
849 * @wb: wb index
850 *
851 * Free a wb slot allocated for use by the driver (all asics)
852 */
131b4b36 853void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 854{
73469585 855 wb >>= 3;
d38ceaf9 856 if (wb < adev->wb.num_wb)
73469585 857 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
858}
859
d6895ad3
CK
860/**
861 * amdgpu_device_resize_fb_bar - try to resize FB BAR
862 *
863 * @adev: amdgpu_device pointer
864 *
865 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
866 * to fail, but if any of the BARs is not accessible after the size we abort
867 * driver loading by returning -ENODEV.
868 */
869int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
870{
770d13b1 871 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 872 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
873 struct pci_bus *root;
874 struct resource *res;
875 unsigned i;
d6895ad3
CK
876 u16 cmd;
877 int r;
878
0c03b912 879 /* Bypass for VF */
880 if (amdgpu_sriov_vf(adev))
881 return 0;
882
31b8adab
CK
883 /* Check if the root BUS has 64bit memory resources */
884 root = adev->pdev->bus;
885 while (root->parent)
886 root = root->parent;
887
888 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 889 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
890 res->start > 0x100000000ull)
891 break;
892 }
893
894 /* Trying to resize is pointless without a root hub window above 4GB */
895 if (!res)
896 return 0;
897
d6895ad3
CK
898 /* Disable memory decoding while we change the BAR addresses and size */
899 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
900 pci_write_config_word(adev->pdev, PCI_COMMAND,
901 cmd & ~PCI_COMMAND_MEMORY);
902
903 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 904 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
905 if (adev->asic_type >= CHIP_BONAIRE)
906 pci_release_resource(adev->pdev, 2);
907
908 pci_release_resource(adev->pdev, 0);
909
910 r = pci_resize_resource(adev->pdev, 0, rbar_size);
911 if (r == -ENOSPC)
912 DRM_INFO("Not enough PCI address space for a large BAR.");
913 else if (r && r != -ENOTSUPP)
914 DRM_ERROR("Problem resizing BAR0 (%d).", r);
915
916 pci_assign_unassigned_bus_resources(adev->pdev->bus);
917
918 /* When the doorbell or fb BAR isn't available we have no chance of
919 * using the device.
920 */
06ec9070 921 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
922 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
923 return -ENODEV;
924
925 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
926
927 return 0;
928}
a05502e5 929
d38ceaf9
AD
930/*
931 * GPU helpers function.
932 */
933/**
39c640c0 934 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
935 *
936 * @adev: amdgpu_device pointer
937 *
c836fec5
JQ
938 * Check if the asic has been initialized (all asics) at driver startup
939 * or post is needed if hw reset is performed.
940 * Returns true if need or false if not.
d38ceaf9 941 */
39c640c0 942bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
943{
944 uint32_t reg;
945
bec86378
ML
946 if (amdgpu_sriov_vf(adev))
947 return false;
948
949 if (amdgpu_passthrough(adev)) {
1da2c326
ML
950 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
951 * some old smc fw still need driver do vPost otherwise gpu hang, while
952 * those smc fw version above 22.15 doesn't have this flaw, so we force
953 * vpost executed for smc version below 22.15
bec86378
ML
954 */
955 if (adev->asic_type == CHIP_FIJI) {
956 int err;
957 uint32_t fw_ver;
958 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
959 /* force vPost if error occured */
960 if (err)
961 return true;
962
963 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
964 if (fw_ver < 0x00160e00)
965 return true;
bec86378 966 }
bec86378 967 }
91fe77eb 968
969 if (adev->has_hw_reset) {
970 adev->has_hw_reset = false;
971 return true;
972 }
973
974 /* bios scratch used on CIK+ */
975 if (adev->asic_type >= CHIP_BONAIRE)
976 return amdgpu_atombios_scratch_need_asic_init(adev);
977
978 /* check MEM_SIZE for older asics */
979 reg = amdgpu_asic_get_config_memsize(adev);
980
981 if ((reg != 0) && (reg != 0xffffffff))
982 return false;
983
984 return true;
bec86378
ML
985}
986
d38ceaf9
AD
987/* if we get transitioned to only one device, take VGA back */
988/**
06ec9070 989 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
990 *
991 * @cookie: amdgpu_device pointer
992 * @state: enable/disable vga decode
993 *
994 * Enable/disable vga decode (all asics).
995 * Returns VGA resource flags.
996 */
06ec9070 997static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
998{
999 struct amdgpu_device *adev = cookie;
1000 amdgpu_asic_set_vga_state(adev, state);
1001 if (state)
1002 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1003 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1004 else
1005 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1006}
1007
e3ecdffa
AD
1008/**
1009 * amdgpu_device_check_block_size - validate the vm block size
1010 *
1011 * @adev: amdgpu_device pointer
1012 *
1013 * Validates the vm block size specified via module parameter.
1014 * The vm block size defines number of bits in page table versus page directory,
1015 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1016 * page table and the remaining bits are in the page directory.
1017 */
06ec9070 1018static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1019{
1020 /* defines number of bits in page table versus page directory,
1021 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1022 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1023 if (amdgpu_vm_block_size == -1)
1024 return;
a1adf8be 1025
bab4fee7 1026 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1027 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1028 amdgpu_vm_block_size);
97489129 1029 amdgpu_vm_block_size = -1;
a1adf8be 1030 }
a1adf8be
CZ
1031}
1032
e3ecdffa
AD
1033/**
1034 * amdgpu_device_check_vm_size - validate the vm size
1035 *
1036 * @adev: amdgpu_device pointer
1037 *
1038 * Validates the vm size in GB specified via module parameter.
1039 * The VM size is the size of the GPU virtual memory space in GB.
1040 */
06ec9070 1041static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1042{
64dab074
AD
1043 /* no need to check the default value */
1044 if (amdgpu_vm_size == -1)
1045 return;
1046
83ca145d
ZJ
1047 if (amdgpu_vm_size < 1) {
1048 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1049 amdgpu_vm_size);
f3368128 1050 amdgpu_vm_size = -1;
83ca145d 1051 }
83ca145d
ZJ
1052}
1053
7951e376
RZ
1054static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1055{
1056 struct sysinfo si;
a9d4fe2f 1057 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1058 uint64_t total_memory;
1059 uint64_t dram_size_seven_GB = 0x1B8000000;
1060 uint64_t dram_size_three_GB = 0xB8000000;
1061
1062 if (amdgpu_smu_memory_pool_size == 0)
1063 return;
1064
1065 if (!is_os_64) {
1066 DRM_WARN("Not 64-bit OS, feature not supported\n");
1067 goto def_value;
1068 }
1069 si_meminfo(&si);
1070 total_memory = (uint64_t)si.totalram * si.mem_unit;
1071
1072 if ((amdgpu_smu_memory_pool_size == 1) ||
1073 (amdgpu_smu_memory_pool_size == 2)) {
1074 if (total_memory < dram_size_three_GB)
1075 goto def_value1;
1076 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1077 (amdgpu_smu_memory_pool_size == 8)) {
1078 if (total_memory < dram_size_seven_GB)
1079 goto def_value1;
1080 } else {
1081 DRM_WARN("Smu memory pool size not supported\n");
1082 goto def_value;
1083 }
1084 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1085
1086 return;
1087
1088def_value1:
1089 DRM_WARN("No enough system memory\n");
1090def_value:
1091 adev->pm.smu_prv_buffer_size = 0;
1092}
1093
d38ceaf9 1094/**
06ec9070 1095 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1096 *
1097 * @adev: amdgpu_device pointer
1098 *
1099 * Validates certain module parameters and updates
1100 * the associated values used by the driver (all asics).
1101 */
912dfc84 1102static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1103{
5b011235
CZ
1104 if (amdgpu_sched_jobs < 4) {
1105 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1106 amdgpu_sched_jobs);
1107 amdgpu_sched_jobs = 4;
76117507 1108 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1109 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1110 amdgpu_sched_jobs);
1111 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1112 }
d38ceaf9 1113
83e74db6 1114 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1115 /* gart size must be greater or equal to 32M */
1116 dev_warn(adev->dev, "gart size (%d) too small\n",
1117 amdgpu_gart_size);
83e74db6 1118 amdgpu_gart_size = -1;
d38ceaf9
AD
1119 }
1120
36d38372 1121 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1122 /* gtt size must be greater or equal to 32M */
36d38372
CK
1123 dev_warn(adev->dev, "gtt size (%d) too small\n",
1124 amdgpu_gtt_size);
1125 amdgpu_gtt_size = -1;
d38ceaf9
AD
1126 }
1127
d07f14be
RH
1128 /* valid range is between 4 and 9 inclusive */
1129 if (amdgpu_vm_fragment_size != -1 &&
1130 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1131 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1132 amdgpu_vm_fragment_size = -1;
1133 }
1134
7951e376
RZ
1135 amdgpu_device_check_smu_prv_buffer_size(adev);
1136
06ec9070 1137 amdgpu_device_check_vm_size(adev);
d38ceaf9 1138
06ec9070 1139 amdgpu_device_check_block_size(adev);
6a7f76e7 1140
19aede77 1141 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1142
e3c00faa 1143 return 0;
d38ceaf9
AD
1144}
1145
1146/**
1147 * amdgpu_switcheroo_set_state - set switcheroo state
1148 *
1149 * @pdev: pci dev pointer
1694467b 1150 * @state: vga_switcheroo state
d38ceaf9
AD
1151 *
1152 * Callback for the switcheroo driver. Suspends or resumes the
1153 * the asics before or after it is powered up using ACPI methods.
1154 */
1155static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1156{
1157 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1158 int r;
d38ceaf9 1159
31af062a 1160 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1161 return;
1162
1163 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1164 pr_info("switched on\n");
d38ceaf9
AD
1165 /* don't suspend or resume card normally */
1166 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1167
de185019
AD
1168 pci_set_power_state(dev->pdev, PCI_D0);
1169 pci_restore_state(dev->pdev);
1170 r = pci_enable_device(dev->pdev);
1171 if (r)
1172 DRM_WARN("pci_enable_device failed (%d)\n", r);
1173 amdgpu_device_resume(dev, true);
d38ceaf9 1174
d38ceaf9
AD
1175 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1176 drm_kms_helper_poll_enable(dev);
1177 } else {
dd4fa6c1 1178 pr_info("switched off\n");
d38ceaf9
AD
1179 drm_kms_helper_poll_disable(dev);
1180 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1181 amdgpu_device_suspend(dev, true);
1182 pci_save_state(dev->pdev);
1183 /* Shut down the device */
1184 pci_disable_device(dev->pdev);
1185 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1186 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1187 }
1188}
1189
1190/**
1191 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1192 *
1193 * @pdev: pci dev pointer
1194 *
1195 * Callback for the switcheroo driver. Check of the switcheroo
1196 * state can be changed.
1197 * Returns true if the state can be changed, false if not.
1198 */
1199static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1200{
1201 struct drm_device *dev = pci_get_drvdata(pdev);
1202
1203 /*
1204 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1205 * locking inversion with the driver load path. And the access here is
1206 * completely racy anyway. So don't bother with locking for now.
1207 */
7e13ad89 1208 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1209}
1210
1211static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1212 .set_gpu_state = amdgpu_switcheroo_set_state,
1213 .reprobe = NULL,
1214 .can_switch = amdgpu_switcheroo_can_switch,
1215};
1216
e3ecdffa
AD
1217/**
1218 * amdgpu_device_ip_set_clockgating_state - set the CG state
1219 *
87e3f136 1220 * @dev: amdgpu_device pointer
e3ecdffa
AD
1221 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1222 * @state: clockgating state (gate or ungate)
1223 *
1224 * Sets the requested clockgating state for all instances of
1225 * the hardware IP specified.
1226 * Returns the error code from the last instance.
1227 */
43fa561f 1228int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1229 enum amd_ip_block_type block_type,
1230 enum amd_clockgating_state state)
d38ceaf9 1231{
43fa561f 1232 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1233 int i, r = 0;
1234
1235 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1236 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1237 continue;
c722865a
RZ
1238 if (adev->ip_blocks[i].version->type != block_type)
1239 continue;
1240 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1241 continue;
1242 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1243 (void *)adev, state);
1244 if (r)
1245 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1246 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1247 }
1248 return r;
1249}
1250
e3ecdffa
AD
1251/**
1252 * amdgpu_device_ip_set_powergating_state - set the PG state
1253 *
87e3f136 1254 * @dev: amdgpu_device pointer
e3ecdffa
AD
1255 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1256 * @state: powergating state (gate or ungate)
1257 *
1258 * Sets the requested powergating state for all instances of
1259 * the hardware IP specified.
1260 * Returns the error code from the last instance.
1261 */
43fa561f 1262int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1263 enum amd_ip_block_type block_type,
1264 enum amd_powergating_state state)
d38ceaf9 1265{
43fa561f 1266 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1267 int i, r = 0;
1268
1269 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1270 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1271 continue;
c722865a
RZ
1272 if (adev->ip_blocks[i].version->type != block_type)
1273 continue;
1274 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1275 continue;
1276 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1277 (void *)adev, state);
1278 if (r)
1279 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1280 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1281 }
1282 return r;
1283}
1284
e3ecdffa
AD
1285/**
1286 * amdgpu_device_ip_get_clockgating_state - get the CG state
1287 *
1288 * @adev: amdgpu_device pointer
1289 * @flags: clockgating feature flags
1290 *
1291 * Walks the list of IPs on the device and updates the clockgating
1292 * flags for each IP.
1293 * Updates @flags with the feature flags for each hardware IP where
1294 * clockgating is enabled.
1295 */
2990a1fc
AD
1296void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1297 u32 *flags)
6cb2d4e4
HR
1298{
1299 int i;
1300
1301 for (i = 0; i < adev->num_ip_blocks; i++) {
1302 if (!adev->ip_blocks[i].status.valid)
1303 continue;
1304 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1305 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1306 }
1307}
1308
e3ecdffa
AD
1309/**
1310 * amdgpu_device_ip_wait_for_idle - wait for idle
1311 *
1312 * @adev: amdgpu_device pointer
1313 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1314 *
1315 * Waits for the request hardware IP to be idle.
1316 * Returns 0 for success or a negative error code on failure.
1317 */
2990a1fc
AD
1318int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1319 enum amd_ip_block_type block_type)
5dbbb60b
AD
1320{
1321 int i, r;
1322
1323 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1324 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1325 continue;
a1255107
AD
1326 if (adev->ip_blocks[i].version->type == block_type) {
1327 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1328 if (r)
1329 return r;
1330 break;
1331 }
1332 }
1333 return 0;
1334
1335}
1336
e3ecdffa
AD
1337/**
1338 * amdgpu_device_ip_is_idle - is the hardware IP idle
1339 *
1340 * @adev: amdgpu_device pointer
1341 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1342 *
1343 * Check if the hardware IP is idle or not.
1344 * Returns true if it the IP is idle, false if not.
1345 */
2990a1fc
AD
1346bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1347 enum amd_ip_block_type block_type)
5dbbb60b
AD
1348{
1349 int i;
1350
1351 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1352 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1353 continue;
a1255107
AD
1354 if (adev->ip_blocks[i].version->type == block_type)
1355 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1356 }
1357 return true;
1358
1359}
1360
e3ecdffa
AD
1361/**
1362 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1363 *
1364 * @adev: amdgpu_device pointer
87e3f136 1365 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1366 *
1367 * Returns a pointer to the hardware IP block structure
1368 * if it exists for the asic, otherwise NULL.
1369 */
2990a1fc
AD
1370struct amdgpu_ip_block *
1371amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1372 enum amd_ip_block_type type)
d38ceaf9
AD
1373{
1374 int i;
1375
1376 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1377 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1378 return &adev->ip_blocks[i];
1379
1380 return NULL;
1381}
1382
1383/**
2990a1fc 1384 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1385 *
1386 * @adev: amdgpu_device pointer
5fc3aeeb 1387 * @type: enum amd_ip_block_type
d38ceaf9
AD
1388 * @major: major version
1389 * @minor: minor version
1390 *
1391 * return 0 if equal or greater
1392 * return 1 if smaller or the ip_block doesn't exist
1393 */
2990a1fc
AD
1394int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1395 enum amd_ip_block_type type,
1396 u32 major, u32 minor)
d38ceaf9 1397{
2990a1fc 1398 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1399
a1255107
AD
1400 if (ip_block && ((ip_block->version->major > major) ||
1401 ((ip_block->version->major == major) &&
1402 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1403 return 0;
1404
1405 return 1;
1406}
1407
a1255107 1408/**
2990a1fc 1409 * amdgpu_device_ip_block_add
a1255107
AD
1410 *
1411 * @adev: amdgpu_device pointer
1412 * @ip_block_version: pointer to the IP to add
1413 *
1414 * Adds the IP block driver information to the collection of IPs
1415 * on the asic.
1416 */
2990a1fc
AD
1417int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1418 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1419{
1420 if (!ip_block_version)
1421 return -EINVAL;
1422
e966a725 1423 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1424 ip_block_version->funcs->name);
1425
a1255107
AD
1426 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1427
1428 return 0;
1429}
1430
e3ecdffa
AD
1431/**
1432 * amdgpu_device_enable_virtual_display - enable virtual display feature
1433 *
1434 * @adev: amdgpu_device pointer
1435 *
1436 * Enabled the virtual display feature if the user has enabled it via
1437 * the module parameter virtual_display. This feature provides a virtual
1438 * display hardware on headless boards or in virtualized environments.
1439 * This function parses and validates the configuration string specified by
1440 * the user and configues the virtual display configuration (number of
1441 * virtual connectors, crtcs, etc.) specified.
1442 */
483ef985 1443static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1444{
1445 adev->enable_virtual_display = false;
1446
1447 if (amdgpu_virtual_display) {
1448 struct drm_device *ddev = adev->ddev;
1449 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1450 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1451
1452 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1453 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1454 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1455 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1456 if (!strcmp("all", pciaddname)
1457 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1458 long num_crtc;
1459 int res = -1;
1460
9accf2fd 1461 adev->enable_virtual_display = true;
0f66356d
ED
1462
1463 if (pciaddname_tmp)
1464 res = kstrtol(pciaddname_tmp, 10,
1465 &num_crtc);
1466
1467 if (!res) {
1468 if (num_crtc < 1)
1469 num_crtc = 1;
1470 if (num_crtc > 6)
1471 num_crtc = 6;
1472 adev->mode_info.num_crtc = num_crtc;
1473 } else {
1474 adev->mode_info.num_crtc = 1;
1475 }
9accf2fd
ED
1476 break;
1477 }
1478 }
1479
0f66356d
ED
1480 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1481 amdgpu_virtual_display, pci_address_name,
1482 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1483
1484 kfree(pciaddstr);
1485 }
1486}
1487
e3ecdffa
AD
1488/**
1489 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1490 *
1491 * @adev: amdgpu_device pointer
1492 *
1493 * Parses the asic configuration parameters specified in the gpu info
1494 * firmware and makes them availale to the driver for use in configuring
1495 * the asic.
1496 * Returns 0 on success, -EINVAL on failure.
1497 */
e2a75f88
AD
1498static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1499{
e2a75f88
AD
1500 const char *chip_name;
1501 char fw_name[30];
1502 int err;
1503 const struct gpu_info_firmware_header_v1_0 *hdr;
1504
ab4fe3e1
HR
1505 adev->firmware.gpu_info_fw = NULL;
1506
e2a75f88
AD
1507 switch (adev->asic_type) {
1508 case CHIP_TOPAZ:
1509 case CHIP_TONGA:
1510 case CHIP_FIJI:
e2a75f88 1511 case CHIP_POLARIS10:
cc07f18d 1512 case CHIP_POLARIS11:
e2a75f88 1513 case CHIP_POLARIS12:
cc07f18d 1514 case CHIP_VEGAM:
e2a75f88
AD
1515 case CHIP_CARRIZO:
1516 case CHIP_STONEY:
1517#ifdef CONFIG_DRM_AMDGPU_SI
1518 case CHIP_VERDE:
1519 case CHIP_TAHITI:
1520 case CHIP_PITCAIRN:
1521 case CHIP_OLAND:
1522 case CHIP_HAINAN:
1523#endif
1524#ifdef CONFIG_DRM_AMDGPU_CIK
1525 case CHIP_BONAIRE:
1526 case CHIP_HAWAII:
1527 case CHIP_KAVERI:
1528 case CHIP_KABINI:
1529 case CHIP_MULLINS:
1530#endif
27c0bc71 1531 case CHIP_VEGA20:
e2a75f88
AD
1532 default:
1533 return 0;
1534 case CHIP_VEGA10:
1535 chip_name = "vega10";
1536 break;
3f76dced
AD
1537 case CHIP_VEGA12:
1538 chip_name = "vega12";
1539 break;
2d2e5e7e 1540 case CHIP_RAVEN:
54c4d17e
FX
1541 if (adev->rev_id >= 8)
1542 chip_name = "raven2";
741deade
AD
1543 else if (adev->pdev->device == 0x15d8)
1544 chip_name = "picasso";
54c4d17e
FX
1545 else
1546 chip_name = "raven";
2d2e5e7e 1547 break;
65e60f6e
LM
1548 case CHIP_ARCTURUS:
1549 chip_name = "arcturus";
1550 break;
b51a26a0
HR
1551 case CHIP_RENOIR:
1552 chip_name = "renoir";
1553 break;
23c6268e
HR
1554 case CHIP_NAVI10:
1555 chip_name = "navi10";
1556 break;
ed42cfe1
XY
1557 case CHIP_NAVI14:
1558 chip_name = "navi14";
1559 break;
42b325e5
XY
1560 case CHIP_NAVI12:
1561 chip_name = "navi12";
1562 break;
e2a75f88
AD
1563 }
1564
1565 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1566 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1567 if (err) {
1568 dev_err(adev->dev,
1569 "Failed to load gpu_info firmware \"%s\"\n",
1570 fw_name);
1571 goto out;
1572 }
ab4fe3e1 1573 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1574 if (err) {
1575 dev_err(adev->dev,
1576 "Failed to validate gpu_info firmware \"%s\"\n",
1577 fw_name);
1578 goto out;
1579 }
1580
ab4fe3e1 1581 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1582 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1583
1584 switch (hdr->version_major) {
1585 case 1:
1586 {
1587 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1588 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1589 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1590
ec51d3fa
XY
1591 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1592 goto parse_soc_bounding_box;
1593
b5ab16bf
AD
1594 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1595 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1596 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1597 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1598 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1599 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1600 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1601 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1602 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1603 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1604 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1605 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1606 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1607 adev->gfx.cu_info.max_waves_per_simd =
1608 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1609 adev->gfx.cu_info.max_scratch_slots_per_cu =
1610 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1611 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1612 if (hdr->version_minor >= 1) {
35c2e910
HZ
1613 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1614 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1615 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1616 adev->gfx.config.num_sc_per_sh =
1617 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1618 adev->gfx.config.num_packer_per_sc =
1619 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1620 }
ec51d3fa
XY
1621
1622parse_soc_bounding_box:
ec51d3fa
XY
1623 /*
1624 * soc bounding box info is not integrated in disocovery table,
1625 * we always need to parse it from gpu info firmware.
1626 */
48321c3d
HW
1627 if (hdr->version_minor == 2) {
1628 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1629 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1630 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1631 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1632 }
e2a75f88
AD
1633 break;
1634 }
1635 default:
1636 dev_err(adev->dev,
1637 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1638 err = -EINVAL;
1639 goto out;
1640 }
1641out:
e2a75f88
AD
1642 return err;
1643}
1644
e3ecdffa
AD
1645/**
1646 * amdgpu_device_ip_early_init - run early init for hardware IPs
1647 *
1648 * @adev: amdgpu_device pointer
1649 *
1650 * Early initialization pass for hardware IPs. The hardware IPs that make
1651 * up each asic are discovered each IP's early_init callback is run. This
1652 * is the first stage in initializing the asic.
1653 * Returns 0 on success, negative error code on failure.
1654 */
06ec9070 1655static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1656{
aaa36a97 1657 int i, r;
d38ceaf9 1658
483ef985 1659 amdgpu_device_enable_virtual_display(adev);
a6be7570 1660
d38ceaf9 1661 switch (adev->asic_type) {
aaa36a97
AD
1662 case CHIP_TOPAZ:
1663 case CHIP_TONGA:
48299f95 1664 case CHIP_FIJI:
2cc0c0b5 1665 case CHIP_POLARIS10:
32cc7e53 1666 case CHIP_POLARIS11:
c4642a47 1667 case CHIP_POLARIS12:
32cc7e53 1668 case CHIP_VEGAM:
aaa36a97 1669 case CHIP_CARRIZO:
39bb0c92
SL
1670 case CHIP_STONEY:
1671 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1672 adev->family = AMDGPU_FAMILY_CZ;
1673 else
1674 adev->family = AMDGPU_FAMILY_VI;
1675
1676 r = vi_set_ip_blocks(adev);
1677 if (r)
1678 return r;
1679 break;
33f34802
KW
1680#ifdef CONFIG_DRM_AMDGPU_SI
1681 case CHIP_VERDE:
1682 case CHIP_TAHITI:
1683 case CHIP_PITCAIRN:
1684 case CHIP_OLAND:
1685 case CHIP_HAINAN:
295d0daf 1686 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1687 r = si_set_ip_blocks(adev);
1688 if (r)
1689 return r;
1690 break;
1691#endif
a2e73f56
AD
1692#ifdef CONFIG_DRM_AMDGPU_CIK
1693 case CHIP_BONAIRE:
1694 case CHIP_HAWAII:
1695 case CHIP_KAVERI:
1696 case CHIP_KABINI:
1697 case CHIP_MULLINS:
1698 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1699 adev->family = AMDGPU_FAMILY_CI;
1700 else
1701 adev->family = AMDGPU_FAMILY_KV;
1702
1703 r = cik_set_ip_blocks(adev);
1704 if (r)
1705 return r;
1706 break;
1707#endif
e48a3cd9
AD
1708 case CHIP_VEGA10:
1709 case CHIP_VEGA12:
e4bd8170 1710 case CHIP_VEGA20:
e48a3cd9 1711 case CHIP_RAVEN:
61cf44c1 1712 case CHIP_ARCTURUS:
b51a26a0
HR
1713 case CHIP_RENOIR:
1714 if (adev->asic_type == CHIP_RAVEN ||
1715 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1716 adev->family = AMDGPU_FAMILY_RV;
1717 else
1718 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1719
1720 r = soc15_set_ip_blocks(adev);
1721 if (r)
1722 return r;
1723 break;
0a5b8c7b 1724 case CHIP_NAVI10:
7ecb5cd4 1725 case CHIP_NAVI14:
4808cf9c 1726 case CHIP_NAVI12:
0a5b8c7b
HR
1727 adev->family = AMDGPU_FAMILY_NV;
1728
1729 r = nv_set_ip_blocks(adev);
1730 if (r)
1731 return r;
1732 break;
d38ceaf9
AD
1733 default:
1734 /* FIXME: not supported yet */
1735 return -EINVAL;
1736 }
1737
e2a75f88
AD
1738 r = amdgpu_device_parse_gpu_info_fw(adev);
1739 if (r)
1740 return r;
1741
ec51d3fa
XY
1742 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1743 amdgpu_discovery_get_gfx_info(adev);
1744
1884734a 1745 amdgpu_amdkfd_device_probe(adev);
1746
3149d9da 1747 if (amdgpu_sriov_vf(adev)) {
122078de
ML
1748 /* handle vbios stuff prior full access mode for new handshake */
1749 if (adev->virt.req_init_data_ver == 1) {
1750 if (!amdgpu_get_bios(adev)) {
1751 DRM_ERROR("failed to get vbios\n");
1752 return -EINVAL;
1753 }
1754
1755 r = amdgpu_atombios_init(adev);
1756 if (r) {
1757 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1758 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1759 return r;
1760 }
1761 }
2f294132 1762 }
122078de 1763
2f294132
ML
1764 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1765 * will not be prepared by host for this VF */
1766 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
3149d9da
XY
1767 r = amdgpu_virt_request_full_gpu(adev, true);
1768 if (r)
2f294132 1769 return r;
3149d9da
XY
1770 }
1771
3b94fb10 1772 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1773 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1774 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1775
d38ceaf9
AD
1776 for (i = 0; i < adev->num_ip_blocks; i++) {
1777 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1778 DRM_ERROR("disabled ip block: %d <%s>\n",
1779 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1780 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1781 } else {
a1255107
AD
1782 if (adev->ip_blocks[i].version->funcs->early_init) {
1783 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1784 if (r == -ENOENT) {
a1255107 1785 adev->ip_blocks[i].status.valid = false;
2c1a2784 1786 } else if (r) {
a1255107
AD
1787 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1788 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1789 return r;
2c1a2784 1790 } else {
a1255107 1791 adev->ip_blocks[i].status.valid = true;
2c1a2784 1792 }
974e6b64 1793 } else {
a1255107 1794 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1795 }
d38ceaf9 1796 }
21a249ca
AD
1797 /* get the vbios after the asic_funcs are set up */
1798 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
122078de
ML
1799 /* skip vbios handling for new handshake */
1800 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1801 continue;
1802
21a249ca
AD
1803 /* Read BIOS */
1804 if (!amdgpu_get_bios(adev))
1805 return -EINVAL;
1806
1807 r = amdgpu_atombios_init(adev);
1808 if (r) {
1809 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1810 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1811 return r;
1812 }
1813 }
d38ceaf9
AD
1814 }
1815
395d1fb9
NH
1816 adev->cg_flags &= amdgpu_cg_mask;
1817 adev->pg_flags &= amdgpu_pg_mask;
1818
d38ceaf9
AD
1819 return 0;
1820}
1821
0a4f2520
RZ
1822static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1823{
1824 int i, r;
1825
1826 for (i = 0; i < adev->num_ip_blocks; i++) {
1827 if (!adev->ip_blocks[i].status.sw)
1828 continue;
1829 if (adev->ip_blocks[i].status.hw)
1830 continue;
1831 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1832 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1833 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1834 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1835 if (r) {
1836 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1837 adev->ip_blocks[i].version->funcs->name, r);
1838 return r;
1839 }
1840 adev->ip_blocks[i].status.hw = true;
1841 }
1842 }
1843
1844 return 0;
1845}
1846
1847static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1848{
1849 int i, r;
1850
1851 for (i = 0; i < adev->num_ip_blocks; i++) {
1852 if (!adev->ip_blocks[i].status.sw)
1853 continue;
1854 if (adev->ip_blocks[i].status.hw)
1855 continue;
1856 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1857 if (r) {
1858 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1859 adev->ip_blocks[i].version->funcs->name, r);
1860 return r;
1861 }
1862 adev->ip_blocks[i].status.hw = true;
1863 }
1864
1865 return 0;
1866}
1867
7a3e0bb2
RZ
1868static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1869{
1870 int r = 0;
1871 int i;
80f41f84 1872 uint32_t smu_version;
7a3e0bb2
RZ
1873
1874 if (adev->asic_type >= CHIP_VEGA10) {
1875 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1876 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1877 continue;
1878
1879 /* no need to do the fw loading again if already done*/
1880 if (adev->ip_blocks[i].status.hw == true)
1881 break;
1882
1883 if (adev->in_gpu_reset || adev->in_suspend) {
1884 r = adev->ip_blocks[i].version->funcs->resume(adev);
1885 if (r) {
1886 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1887 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1888 return r;
1889 }
1890 } else {
1891 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1892 if (r) {
1893 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1894 adev->ip_blocks[i].version->funcs->name, r);
1895 return r;
7a3e0bb2 1896 }
7a3e0bb2 1897 }
482f0e53
ML
1898
1899 adev->ip_blocks[i].status.hw = true;
1900 break;
7a3e0bb2
RZ
1901 }
1902 }
482f0e53 1903
8973d9ec
ED
1904 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1905 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1906
80f41f84 1907 return r;
7a3e0bb2
RZ
1908}
1909
e3ecdffa
AD
1910/**
1911 * amdgpu_device_ip_init - run init for hardware IPs
1912 *
1913 * @adev: amdgpu_device pointer
1914 *
1915 * Main initialization pass for hardware IPs. The list of all the hardware
1916 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1917 * are run. sw_init initializes the software state associated with each IP
1918 * and hw_init initializes the hardware associated with each IP.
1919 * Returns 0 on success, negative error code on failure.
1920 */
06ec9070 1921static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1922{
1923 int i, r;
1924
c030f2e4 1925 r = amdgpu_ras_init(adev);
1926 if (r)
1927 return r;
1928
2f294132
ML
1929 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1930 r = amdgpu_virt_request_full_gpu(adev, true);
1931 if (r)
1932 return -EAGAIN;
1933 }
1934
d38ceaf9 1935 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1936 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1937 continue;
a1255107 1938 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1939 if (r) {
a1255107
AD
1940 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1941 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1942 goto init_failed;
2c1a2784 1943 }
a1255107 1944 adev->ip_blocks[i].status.sw = true;
bfca0289 1945
d38ceaf9 1946 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1947 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1948 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1949 if (r) {
1950 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1951 goto init_failed;
2c1a2784 1952 }
a1255107 1953 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1954 if (r) {
1955 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1956 goto init_failed;
2c1a2784 1957 }
06ec9070 1958 r = amdgpu_device_wb_init(adev);
2c1a2784 1959 if (r) {
06ec9070 1960 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1961 goto init_failed;
2c1a2784 1962 }
a1255107 1963 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1964
1965 /* right after GMC hw init, we create CSA */
f92d5c61 1966 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1967 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1968 AMDGPU_GEM_DOMAIN_VRAM,
1969 AMDGPU_CSA_SIZE);
2493664f
ML
1970 if (r) {
1971 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1972 goto init_failed;
2493664f
ML
1973 }
1974 }
d38ceaf9
AD
1975 }
1976 }
1977
c9ffa427
YT
1978 if (amdgpu_sriov_vf(adev))
1979 amdgpu_virt_init_data_exchange(adev);
1980
533aed27
AG
1981 r = amdgpu_ib_pool_init(adev);
1982 if (r) {
1983 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1984 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1985 goto init_failed;
1986 }
1987
c8963ea4
RZ
1988 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1989 if (r)
72d3f592 1990 goto init_failed;
0a4f2520
RZ
1991
1992 r = amdgpu_device_ip_hw_init_phase1(adev);
1993 if (r)
72d3f592 1994 goto init_failed;
0a4f2520 1995
7a3e0bb2
RZ
1996 r = amdgpu_device_fw_loading(adev);
1997 if (r)
72d3f592 1998 goto init_failed;
7a3e0bb2 1999
0a4f2520
RZ
2000 r = amdgpu_device_ip_hw_init_phase2(adev);
2001 if (r)
72d3f592 2002 goto init_failed;
d38ceaf9 2003
121a2bc6
AG
2004 /*
2005 * retired pages will be loaded from eeprom and reserved here,
2006 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2007 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2008 * for I2C communication which only true at this point.
2009 * recovery_init may fail, but it can free all resources allocated by
2010 * itself and its failure should not stop amdgpu init process.
2011 *
2012 * Note: theoretically, this should be called before all vram allocations
2013 * to protect retired page from abusing
2014 */
2015 amdgpu_ras_recovery_init(adev);
2016
3e2e2ab5
HZ
2017 if (adev->gmc.xgmi.num_physical_nodes > 1)
2018 amdgpu_xgmi_add_device(adev);
1884734a 2019 amdgpu_amdkfd_device_init(adev);
c6332b97 2020
bd607166
KR
2021 amdgpu_fru_get_product_info(adev);
2022
72d3f592 2023init_failed:
c9ffa427 2024 if (amdgpu_sriov_vf(adev))
c6332b97 2025 amdgpu_virt_release_full_gpu(adev, true);
2026
72d3f592 2027 return r;
d38ceaf9
AD
2028}
2029
e3ecdffa
AD
2030/**
2031 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2032 *
2033 * @adev: amdgpu_device pointer
2034 *
2035 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2036 * this function before a GPU reset. If the value is retained after a
2037 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2038 */
06ec9070 2039static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2040{
2041 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2042}
2043
e3ecdffa
AD
2044/**
2045 * amdgpu_device_check_vram_lost - check if vram is valid
2046 *
2047 * @adev: amdgpu_device pointer
2048 *
2049 * Checks the reset magic value written to the gart pointer in VRAM.
2050 * The driver calls this after a GPU reset to see if the contents of
2051 * VRAM is lost or now.
2052 * returns true if vram is lost, false if not.
2053 */
06ec9070 2054static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2055{
dadce777
EQ
2056 if (memcmp(adev->gart.ptr, adev->reset_magic,
2057 AMDGPU_RESET_MAGIC_NUM))
2058 return true;
2059
2060 if (!adev->in_gpu_reset)
2061 return false;
2062
2063 /*
2064 * For all ASICs with baco/mode1 reset, the VRAM is
2065 * always assumed to be lost.
2066 */
2067 switch (amdgpu_asic_reset_method(adev)) {
2068 case AMD_RESET_METHOD_BACO:
2069 case AMD_RESET_METHOD_MODE1:
2070 return true;
2071 default:
2072 return false;
2073 }
0c49e0b8
CZ
2074}
2075
e3ecdffa 2076/**
1112a46b 2077 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2078 *
2079 * @adev: amdgpu_device pointer
b8b72130 2080 * @state: clockgating state (gate or ungate)
e3ecdffa 2081 *
e3ecdffa 2082 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2083 * set_clockgating_state callbacks are run.
2084 * Late initialization pass enabling clockgating for hardware IPs.
2085 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2086 * Returns 0 on success, negative error code on failure.
2087 */
fdd34271 2088
1112a46b
RZ
2089static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2090 enum amd_clockgating_state state)
d38ceaf9 2091{
1112a46b 2092 int i, j, r;
d38ceaf9 2093
4a2ba394
SL
2094 if (amdgpu_emu_mode == 1)
2095 return 0;
2096
1112a46b
RZ
2097 for (j = 0; j < adev->num_ip_blocks; j++) {
2098 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2099 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2100 continue;
4a446d55 2101 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2102 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2103 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2104 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2105 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2106 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2107 /* enable clockgating to save power */
a1255107 2108 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2109 state);
4a446d55
AD
2110 if (r) {
2111 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2112 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2113 return r;
2114 }
b0b00ff1 2115 }
d38ceaf9 2116 }
06b18f61 2117
c9f96fd5
RZ
2118 return 0;
2119}
2120
1112a46b 2121static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2122{
1112a46b 2123 int i, j, r;
06b18f61 2124
c9f96fd5
RZ
2125 if (amdgpu_emu_mode == 1)
2126 return 0;
2127
1112a46b
RZ
2128 for (j = 0; j < adev->num_ip_blocks; j++) {
2129 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2130 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2131 continue;
2132 /* skip CG for VCE/UVD, it's handled specially */
2133 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2134 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2135 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2136 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2137 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2138 /* enable powergating to save power */
2139 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2140 state);
c9f96fd5
RZ
2141 if (r) {
2142 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2143 adev->ip_blocks[i].version->funcs->name, r);
2144 return r;
2145 }
2146 }
2147 }
2dc80b00
S
2148 return 0;
2149}
2150
beff74bc
AD
2151static int amdgpu_device_enable_mgpu_fan_boost(void)
2152{
2153 struct amdgpu_gpu_instance *gpu_ins;
2154 struct amdgpu_device *adev;
2155 int i, ret = 0;
2156
2157 mutex_lock(&mgpu_info.mutex);
2158
2159 /*
2160 * MGPU fan boost feature should be enabled
2161 * only when there are two or more dGPUs in
2162 * the system
2163 */
2164 if (mgpu_info.num_dgpu < 2)
2165 goto out;
2166
2167 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2168 gpu_ins = &(mgpu_info.gpu_ins[i]);
2169 adev = gpu_ins->adev;
2170 if (!(adev->flags & AMD_IS_APU) &&
2171 !gpu_ins->mgpu_fan_enabled &&
2172 adev->powerplay.pp_funcs &&
2173 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2174 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2175 if (ret)
2176 break;
2177
2178 gpu_ins->mgpu_fan_enabled = 1;
2179 }
2180 }
2181
2182out:
2183 mutex_unlock(&mgpu_info.mutex);
2184
2185 return ret;
2186}
2187
e3ecdffa
AD
2188/**
2189 * amdgpu_device_ip_late_init - run late init for hardware IPs
2190 *
2191 * @adev: amdgpu_device pointer
2192 *
2193 * Late initialization pass for hardware IPs. The list of all the hardware
2194 * IPs that make up the asic is walked and the late_init callbacks are run.
2195 * late_init covers any special initialization that an IP requires
2196 * after all of the have been initialized or something that needs to happen
2197 * late in the init process.
2198 * Returns 0 on success, negative error code on failure.
2199 */
06ec9070 2200static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2201{
60599a03 2202 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2203 int i = 0, r;
2204
2205 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2206 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2207 continue;
2208 if (adev->ip_blocks[i].version->funcs->late_init) {
2209 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2210 if (r) {
2211 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2212 adev->ip_blocks[i].version->funcs->name, r);
2213 return r;
2214 }
2dc80b00 2215 }
73f847db 2216 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2217 }
2218
1112a46b
RZ
2219 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2220 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2221
06ec9070 2222 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2223
beff74bc
AD
2224 r = amdgpu_device_enable_mgpu_fan_boost();
2225 if (r)
2226 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2227
60599a03
EQ
2228
2229 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2230 mutex_lock(&mgpu_info.mutex);
2231
2232 /*
2233 * Reset device p-state to low as this was booted with high.
2234 *
2235 * This should be performed only after all devices from the same
2236 * hive get initialized.
2237 *
2238 * However, it's unknown how many device in the hive in advance.
2239 * As this is counted one by one during devices initializations.
2240 *
2241 * So, we wait for all XGMI interlinked devices initialized.
2242 * This may bring some delays as those devices may come from
2243 * different hives. But that should be OK.
2244 */
2245 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2246 for (i = 0; i < mgpu_info.num_gpu; i++) {
2247 gpu_instance = &(mgpu_info.gpu_ins[i]);
2248 if (gpu_instance->adev->flags & AMD_IS_APU)
2249 continue;
2250
2251 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2252 if (r) {
2253 DRM_ERROR("pstate setting failed (%d).\n", r);
2254 break;
2255 }
2256 }
2257 }
2258
2259 mutex_unlock(&mgpu_info.mutex);
2260 }
2261
d38ceaf9
AD
2262 return 0;
2263}
2264
e3ecdffa
AD
2265/**
2266 * amdgpu_device_ip_fini - run fini for hardware IPs
2267 *
2268 * @adev: amdgpu_device pointer
2269 *
2270 * Main teardown pass for hardware IPs. The list of all the hardware
2271 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2272 * are run. hw_fini tears down the hardware associated with each IP
2273 * and sw_fini tears down any software state associated with each IP.
2274 * Returns 0 on success, negative error code on failure.
2275 */
06ec9070 2276static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2277{
2278 int i, r;
2279
c030f2e4 2280 amdgpu_ras_pre_fini(adev);
2281
a82400b5
AG
2282 if (adev->gmc.xgmi.num_physical_nodes > 1)
2283 amdgpu_xgmi_remove_device(adev);
2284
1884734a 2285 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2286
2287 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2288 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2289
3e96dbfd
AD
2290 /* need to disable SMC first */
2291 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2292 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2293 continue;
fdd34271 2294 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2295 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2296 /* XXX handle errors */
2297 if (r) {
2298 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2299 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2300 }
a1255107 2301 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2302 break;
2303 }
2304 }
2305
d38ceaf9 2306 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2307 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2308 continue;
8201a67a 2309
a1255107 2310 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2311 /* XXX handle errors */
2c1a2784 2312 if (r) {
a1255107
AD
2313 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2314 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2315 }
8201a67a 2316
a1255107 2317 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2318 }
2319
9950cda2 2320
d38ceaf9 2321 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2322 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2323 continue;
c12aba3a
ML
2324
2325 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2326 amdgpu_ucode_free_bo(adev);
1e256e27 2327 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2328 amdgpu_device_wb_fini(adev);
2329 amdgpu_device_vram_scratch_fini(adev);
533aed27 2330 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2331 }
2332
a1255107 2333 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2334 /* XXX handle errors */
2c1a2784 2335 if (r) {
a1255107
AD
2336 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2337 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2338 }
a1255107
AD
2339 adev->ip_blocks[i].status.sw = false;
2340 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2341 }
2342
a6dcfd9c 2343 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2344 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2345 continue;
a1255107
AD
2346 if (adev->ip_blocks[i].version->funcs->late_fini)
2347 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2348 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2349 }
2350
c030f2e4 2351 amdgpu_ras_fini(adev);
2352
030308fc 2353 if (amdgpu_sriov_vf(adev))
24136135
ML
2354 if (amdgpu_virt_release_full_gpu(adev, false))
2355 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2356
d38ceaf9
AD
2357 return 0;
2358}
2359
e3ecdffa 2360/**
beff74bc 2361 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2362 *
1112a46b 2363 * @work: work_struct.
e3ecdffa 2364 */
beff74bc 2365static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2366{
2367 struct amdgpu_device *adev =
beff74bc 2368 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2369 int r;
2370
2371 r = amdgpu_ib_ring_tests(adev);
2372 if (r)
2373 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2374}
2375
1e317b99
RZ
2376static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2377{
2378 struct amdgpu_device *adev =
2379 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2380
2381 mutex_lock(&adev->gfx.gfx_off_mutex);
2382 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2383 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2384 adev->gfx.gfx_off_state = true;
2385 }
2386 mutex_unlock(&adev->gfx.gfx_off_mutex);
2387}
2388
e3ecdffa 2389/**
e7854a03 2390 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2391 *
2392 * @adev: amdgpu_device pointer
2393 *
2394 * Main suspend function for hardware IPs. The list of all the hardware
2395 * IPs that make up the asic is walked, clockgating is disabled and the
2396 * suspend callbacks are run. suspend puts the hardware and software state
2397 * in each IP into a state suitable for suspend.
2398 * Returns 0 on success, negative error code on failure.
2399 */
e7854a03
AD
2400static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2401{
2402 int i, r;
2403
ced1ba97
PL
2404 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2405 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2406
e7854a03
AD
2407 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2408 if (!adev->ip_blocks[i].status.valid)
2409 continue;
2410 /* displays are handled separately */
2411 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2412 /* XXX handle errors */
2413 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2414 /* XXX handle errors */
2415 if (r) {
2416 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2417 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2418 return r;
e7854a03 2419 }
482f0e53 2420 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2421 }
2422 }
2423
e7854a03
AD
2424 return 0;
2425}
2426
2427/**
2428 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2429 *
2430 * @adev: amdgpu_device pointer
2431 *
2432 * Main suspend function for hardware IPs. The list of all the hardware
2433 * IPs that make up the asic is walked, clockgating is disabled and the
2434 * suspend callbacks are run. suspend puts the hardware and software state
2435 * in each IP into a state suitable for suspend.
2436 * Returns 0 on success, negative error code on failure.
2437 */
2438static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2439{
2440 int i, r;
2441
2442 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2443 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2444 continue;
e7854a03
AD
2445 /* displays are handled in phase1 */
2446 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2447 continue;
bff77e86
LM
2448 /* PSP lost connection when err_event_athub occurs */
2449 if (amdgpu_ras_intr_triggered() &&
2450 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2451 adev->ip_blocks[i].status.hw = false;
2452 continue;
2453 }
d38ceaf9 2454 /* XXX handle errors */
a1255107 2455 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2456 /* XXX handle errors */
2c1a2784 2457 if (r) {
a1255107
AD
2458 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2459 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2460 }
876923fb 2461 adev->ip_blocks[i].status.hw = false;
a3a09142 2462 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2463 if(!amdgpu_sriov_vf(adev)){
2464 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2465 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2466 if (r) {
2467 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2468 adev->mp1_state, r);
2469 return r;
2470 }
a3a09142
AD
2471 }
2472 }
b5507c7e 2473 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2474 }
2475
2476 return 0;
2477}
2478
e7854a03
AD
2479/**
2480 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2481 *
2482 * @adev: amdgpu_device pointer
2483 *
2484 * Main suspend function for hardware IPs. The list of all the hardware
2485 * IPs that make up the asic is walked, clockgating is disabled and the
2486 * suspend callbacks are run. suspend puts the hardware and software state
2487 * in each IP into a state suitable for suspend.
2488 * Returns 0 on success, negative error code on failure.
2489 */
2490int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2491{
2492 int r;
2493
e7819644
YT
2494 if (amdgpu_sriov_vf(adev))
2495 amdgpu_virt_request_full_gpu(adev, false);
2496
e7854a03
AD
2497 r = amdgpu_device_ip_suspend_phase1(adev);
2498 if (r)
2499 return r;
2500 r = amdgpu_device_ip_suspend_phase2(adev);
2501
e7819644
YT
2502 if (amdgpu_sriov_vf(adev))
2503 amdgpu_virt_release_full_gpu(adev, false);
2504
e7854a03
AD
2505 return r;
2506}
2507
06ec9070 2508static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2509{
2510 int i, r;
2511
2cb681b6
ML
2512 static enum amd_ip_block_type ip_order[] = {
2513 AMD_IP_BLOCK_TYPE_GMC,
2514 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2515 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2516 AMD_IP_BLOCK_TYPE_IH,
2517 };
a90ad3c2 2518
2cb681b6
ML
2519 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2520 int j;
2521 struct amdgpu_ip_block *block;
a90ad3c2 2522
2cb681b6
ML
2523 for (j = 0; j < adev->num_ip_blocks; j++) {
2524 block = &adev->ip_blocks[j];
2525
482f0e53 2526 block->status.hw = false;
2cb681b6
ML
2527 if (block->version->type != ip_order[i] ||
2528 !block->status.valid)
2529 continue;
2530
2531 r = block->version->funcs->hw_init(adev);
0aaeefcc 2532 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2533 if (r)
2534 return r;
482f0e53 2535 block->status.hw = true;
a90ad3c2
ML
2536 }
2537 }
2538
2539 return 0;
2540}
2541
06ec9070 2542static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2543{
2544 int i, r;
2545
2cb681b6
ML
2546 static enum amd_ip_block_type ip_order[] = {
2547 AMD_IP_BLOCK_TYPE_SMC,
2548 AMD_IP_BLOCK_TYPE_DCE,
2549 AMD_IP_BLOCK_TYPE_GFX,
2550 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2551 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2552 AMD_IP_BLOCK_TYPE_VCE,
2553 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2554 };
a90ad3c2 2555
2cb681b6
ML
2556 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2557 int j;
2558 struct amdgpu_ip_block *block;
a90ad3c2 2559
2cb681b6
ML
2560 for (j = 0; j < adev->num_ip_blocks; j++) {
2561 block = &adev->ip_blocks[j];
2562
2563 if (block->version->type != ip_order[i] ||
482f0e53
ML
2564 !block->status.valid ||
2565 block->status.hw)
2cb681b6
ML
2566 continue;
2567
895bd048
JZ
2568 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2569 r = block->version->funcs->resume(adev);
2570 else
2571 r = block->version->funcs->hw_init(adev);
2572
0aaeefcc 2573 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2574 if (r)
2575 return r;
482f0e53 2576 block->status.hw = true;
a90ad3c2
ML
2577 }
2578 }
2579
2580 return 0;
2581}
2582
e3ecdffa
AD
2583/**
2584 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2585 *
2586 * @adev: amdgpu_device pointer
2587 *
2588 * First resume function for hardware IPs. The list of all the hardware
2589 * IPs that make up the asic is walked and the resume callbacks are run for
2590 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2591 * after a suspend and updates the software state as necessary. This
2592 * function is also used for restoring the GPU after a GPU reset.
2593 * Returns 0 on success, negative error code on failure.
2594 */
06ec9070 2595static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2596{
2597 int i, r;
2598
a90ad3c2 2599 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2600 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2601 continue;
a90ad3c2 2602 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2603 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2604 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2605
fcf0649f
CZ
2606 r = adev->ip_blocks[i].version->funcs->resume(adev);
2607 if (r) {
2608 DRM_ERROR("resume of IP block <%s> failed %d\n",
2609 adev->ip_blocks[i].version->funcs->name, r);
2610 return r;
2611 }
482f0e53 2612 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2613 }
2614 }
2615
2616 return 0;
2617}
2618
e3ecdffa
AD
2619/**
2620 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2621 *
2622 * @adev: amdgpu_device pointer
2623 *
2624 * First resume function for hardware IPs. The list of all the hardware
2625 * IPs that make up the asic is walked and the resume callbacks are run for
2626 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2627 * functional state after a suspend and updates the software state as
2628 * necessary. This function is also used for restoring the GPU after a GPU
2629 * reset.
2630 * Returns 0 on success, negative error code on failure.
2631 */
06ec9070 2632static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2633{
2634 int i, r;
2635
2636 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2637 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2638 continue;
fcf0649f 2639 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2640 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2641 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2642 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2643 continue;
a1255107 2644 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2645 if (r) {
a1255107
AD
2646 DRM_ERROR("resume of IP block <%s> failed %d\n",
2647 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2648 return r;
2c1a2784 2649 }
482f0e53 2650 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2651 }
2652
2653 return 0;
2654}
2655
e3ecdffa
AD
2656/**
2657 * amdgpu_device_ip_resume - run resume for hardware IPs
2658 *
2659 * @adev: amdgpu_device pointer
2660 *
2661 * Main resume function for hardware IPs. The hardware IPs
2662 * are split into two resume functions because they are
2663 * are also used in in recovering from a GPU reset and some additional
2664 * steps need to be take between them. In this case (S3/S4) they are
2665 * run sequentially.
2666 * Returns 0 on success, negative error code on failure.
2667 */
06ec9070 2668static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2669{
2670 int r;
2671
06ec9070 2672 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2673 if (r)
2674 return r;
7a3e0bb2
RZ
2675
2676 r = amdgpu_device_fw_loading(adev);
2677 if (r)
2678 return r;
2679
06ec9070 2680 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2681
2682 return r;
2683}
2684
e3ecdffa
AD
2685/**
2686 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2687 *
2688 * @adev: amdgpu_device pointer
2689 *
2690 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2691 */
4e99a44e 2692static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2693{
6867e1b5
ML
2694 if (amdgpu_sriov_vf(adev)) {
2695 if (adev->is_atom_fw) {
2696 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2697 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2698 } else {
2699 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2700 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2701 }
2702
2703 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2704 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2705 }
048765ad
AR
2706}
2707
e3ecdffa
AD
2708/**
2709 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2710 *
2711 * @asic_type: AMD asic type
2712 *
2713 * Check if there is DC (new modesetting infrastructre) support for an asic.
2714 * returns true if DC has support, false if not.
2715 */
4562236b
HW
2716bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2717{
2718 switch (asic_type) {
2719#if defined(CONFIG_DRM_AMD_DC)
2720 case CHIP_BONAIRE:
0d6fbccb 2721 case CHIP_KAVERI:
367e6687
AD
2722 case CHIP_KABINI:
2723 case CHIP_MULLINS:
d9fda248
HW
2724 /*
2725 * We have systems in the wild with these ASICs that require
2726 * LVDS and VGA support which is not supported with DC.
2727 *
2728 * Fallback to the non-DC driver here by default so as not to
2729 * cause regressions.
2730 */
2731 return amdgpu_dc > 0;
2732 case CHIP_HAWAII:
4562236b
HW
2733 case CHIP_CARRIZO:
2734 case CHIP_STONEY:
4562236b 2735 case CHIP_POLARIS10:
675fd32b 2736 case CHIP_POLARIS11:
2c8ad2d5 2737 case CHIP_POLARIS12:
675fd32b 2738 case CHIP_VEGAM:
4562236b
HW
2739 case CHIP_TONGA:
2740 case CHIP_FIJI:
42f8ffa1 2741 case CHIP_VEGA10:
dca7b401 2742 case CHIP_VEGA12:
c6034aa2 2743 case CHIP_VEGA20:
b86a1aa3 2744#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2745 case CHIP_RAVEN:
b4f199c7 2746 case CHIP_NAVI10:
8fceceb6 2747 case CHIP_NAVI14:
078655d9 2748 case CHIP_NAVI12:
e1c14c43 2749 case CHIP_RENOIR:
42f8ffa1 2750#endif
fd187853 2751 return amdgpu_dc != 0;
4562236b
HW
2752#endif
2753 default:
93b09a9a
SS
2754 if (amdgpu_dc > 0)
2755 DRM_INFO("Display Core has been requested via kernel parameter "
2756 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2757 return false;
2758 }
2759}
2760
2761/**
2762 * amdgpu_device_has_dc_support - check if dc is supported
2763 *
2764 * @adev: amdgpu_device_pointer
2765 *
2766 * Returns true for supported, false for not supported
2767 */
2768bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2769{
2555039d
XY
2770 if (amdgpu_sriov_vf(adev))
2771 return false;
2772
4562236b
HW
2773 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2774}
2775
d4535e2c
AG
2776
2777static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2778{
2779 struct amdgpu_device *adev =
2780 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2781 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2782
c6a6e2db
AG
2783 /* It's a bug to not have a hive within this function */
2784 if (WARN_ON(!hive))
2785 return;
2786
2787 /*
2788 * Use task barrier to synchronize all xgmi reset works across the
2789 * hive. task_barrier_enter and task_barrier_exit will block
2790 * until all the threads running the xgmi reset works reach
2791 * those points. task_barrier_full will do both blocks.
2792 */
2793 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2794
2795 task_barrier_enter(&hive->tb);
2796 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2797
2798 if (adev->asic_reset_res)
2799 goto fail;
2800
2801 task_barrier_exit(&hive->tb);
2802 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2803
2804 if (adev->asic_reset_res)
2805 goto fail;
43c4d576
JC
2806
2807 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2808 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2809 } else {
2810
2811 task_barrier_full(&hive->tb);
2812 adev->asic_reset_res = amdgpu_asic_reset(adev);
2813 }
ce316fa5 2814
c6a6e2db 2815fail:
d4535e2c 2816 if (adev->asic_reset_res)
fed184e9 2817 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2818 adev->asic_reset_res, adev->ddev->unique);
2819}
2820
71f98027
AD
2821static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2822{
2823 char *input = amdgpu_lockup_timeout;
2824 char *timeout_setting = NULL;
2825 int index = 0;
2826 long timeout;
2827 int ret = 0;
2828
2829 /*
2830 * By default timeout for non compute jobs is 10000.
2831 * And there is no timeout enforced on compute jobs.
2832 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2833 * jobs are 60000 by default.
71f98027
AD
2834 */
2835 adev->gfx_timeout = msecs_to_jiffies(10000);
2836 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2837 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2838 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2839 else
2840 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2841
f440ff44 2842 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2843 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2844 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2845 ret = kstrtol(timeout_setting, 0, &timeout);
2846 if (ret)
2847 return ret;
2848
2849 if (timeout == 0) {
2850 index++;
2851 continue;
2852 } else if (timeout < 0) {
2853 timeout = MAX_SCHEDULE_TIMEOUT;
2854 } else {
2855 timeout = msecs_to_jiffies(timeout);
2856 }
2857
2858 switch (index++) {
2859 case 0:
2860 adev->gfx_timeout = timeout;
2861 break;
2862 case 1:
2863 adev->compute_timeout = timeout;
2864 break;
2865 case 2:
2866 adev->sdma_timeout = timeout;
2867 break;
2868 case 3:
2869 adev->video_timeout = timeout;
2870 break;
2871 default:
2872 break;
2873 }
2874 }
2875 /*
2876 * There is only one value specified and
2877 * it should apply to all non-compute jobs.
2878 */
bcccee89 2879 if (index == 1) {
71f98027 2880 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2881 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2882 adev->compute_timeout = adev->gfx_timeout;
2883 }
71f98027
AD
2884 }
2885
2886 return ret;
2887}
d4535e2c 2888
d38ceaf9
AD
2889/**
2890 * amdgpu_device_init - initialize the driver
2891 *
2892 * @adev: amdgpu_device pointer
87e3f136 2893 * @ddev: drm dev pointer
d38ceaf9
AD
2894 * @pdev: pci dev pointer
2895 * @flags: driver flags
2896 *
2897 * Initializes the driver info and hw (all asics).
2898 * Returns 0 for success or an error on failure.
2899 * Called at driver startup.
2900 */
2901int amdgpu_device_init(struct amdgpu_device *adev,
2902 struct drm_device *ddev,
2903 struct pci_dev *pdev,
2904 uint32_t flags)
2905{
2906 int r, i;
3840c5bc 2907 bool boco = false;
95844d20 2908 u32 max_MBps;
d38ceaf9
AD
2909
2910 adev->shutdown = false;
2911 adev->dev = &pdev->dev;
2912 adev->ddev = ddev;
2913 adev->pdev = pdev;
2914 adev->flags = flags;
4e66d7d2
YZ
2915
2916 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2917 adev->asic_type = amdgpu_force_asic_type;
2918 else
2919 adev->asic_type = flags & AMD_ASIC_MASK;
2920
d38ceaf9 2921 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2922 if (amdgpu_emu_mode == 1)
8bdab6bb 2923 adev->usec_timeout *= 10;
770d13b1 2924 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2925 adev->accel_working = false;
2926 adev->num_rings = 0;
2927 adev->mman.buffer_funcs = NULL;
2928 adev->mman.buffer_funcs_ring = NULL;
2929 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2930 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2931 adev->gmc.gmc_funcs = NULL;
f54d1867 2932 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2933 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2934
2935 adev->smc_rreg = &amdgpu_invalid_rreg;
2936 adev->smc_wreg = &amdgpu_invalid_wreg;
2937 adev->pcie_rreg = &amdgpu_invalid_rreg;
2938 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2939 adev->pciep_rreg = &amdgpu_invalid_rreg;
2940 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2941 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2942 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2943 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2944 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2945 adev->didt_rreg = &amdgpu_invalid_rreg;
2946 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2947 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2948 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2949 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2950 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2951
3e39ab90
AD
2952 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2953 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2954 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2955
2956 /* mutex initialization are all done here so we
2957 * can recall function without having locking issues */
d38ceaf9 2958 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2959 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2960 mutex_init(&adev->pm.mutex);
2961 mutex_init(&adev->gfx.gpu_clock_mutex);
2962 mutex_init(&adev->srbm_mutex);
b8866c26 2963 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2964 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2965 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2966 mutex_init(&adev->mn_lock);
e23b74aa 2967 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2968 hash_init(adev->mn_hash);
13a752e3 2969 mutex_init(&adev->lock_reset);
32eaeae0 2970 mutex_init(&adev->psp.mutex);
bd052211 2971 mutex_init(&adev->notifier_lock);
d38ceaf9 2972
912dfc84
EQ
2973 r = amdgpu_device_check_arguments(adev);
2974 if (r)
2975 return r;
d38ceaf9 2976
d38ceaf9
AD
2977 spin_lock_init(&adev->mmio_idx_lock);
2978 spin_lock_init(&adev->smc_idx_lock);
2979 spin_lock_init(&adev->pcie_idx_lock);
2980 spin_lock_init(&adev->uvd_ctx_idx_lock);
2981 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2982 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2983 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2984 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2985 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2986
0c4e7fa5
CZ
2987 INIT_LIST_HEAD(&adev->shadow_list);
2988 mutex_init(&adev->shadow_list_lock);
2989
795f2813
AR
2990 INIT_LIST_HEAD(&adev->ring_lru_list);
2991 spin_lock_init(&adev->ring_lru_list_lock);
2992
beff74bc
AD
2993 INIT_DELAYED_WORK(&adev->delayed_init_work,
2994 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2995 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2996 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2997
d4535e2c
AG
2998 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2999
d23ee13f 3000 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
3001 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
3002
0fa49558
AX
3003 /* Registers mapping */
3004 /* TODO: block userspace mapping of io register */
da69c161
KW
3005 if (adev->asic_type >= CHIP_BONAIRE) {
3006 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3007 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3008 } else {
3009 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3010 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3011 }
d38ceaf9 3012
d38ceaf9
AD
3013 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3014 if (adev->rmmio == NULL) {
3015 return -ENOMEM;
3016 }
3017 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3018 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3019
d38ceaf9
AD
3020 /* io port mapping */
3021 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3022 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3023 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3024 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3025 break;
3026 }
3027 }
3028 if (adev->rio_mem == NULL)
b64a18c5 3029 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3030
b2109d8e
JX
3031 /* enable PCIE atomic ops */
3032 r = pci_enable_atomic_ops_to_root(adev->pdev,
3033 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3034 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3035 if (r) {
3036 adev->have_atomics_support = false;
3037 DRM_INFO("PCIE atomic ops is not supported\n");
3038 } else {
3039 adev->have_atomics_support = true;
3040 }
3041
5494d864
AD
3042 amdgpu_device_get_pcie_info(adev);
3043
b239c017
JX
3044 if (amdgpu_mcbp)
3045 DRM_INFO("MCBP is enabled\n");
3046
5f84cc63
JX
3047 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3048 adev->enable_mes = true;
3049
3aa0115d
ML
3050 /* detect hw virtualization here */
3051 amdgpu_detect_virtualization(adev);
3052
dffa11b4
ML
3053 r = amdgpu_device_get_job_timeout_settings(adev);
3054 if (r) {
3055 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3056 return r;
a190d1c7
XY
3057 }
3058
d38ceaf9 3059 /* early init functions */
06ec9070 3060 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3061 if (r)
3062 return r;
3063
6585661d
OZ
3064 /* doorbell bar mapping and doorbell index init*/
3065 amdgpu_device_doorbell_init(adev);
3066
d38ceaf9
AD
3067 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3068 /* this will fail for cards that aren't VGA class devices, just
3069 * ignore it */
06ec9070 3070 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3071
31af062a 3072 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3073 boco = true;
3074 if (amdgpu_has_atpx() &&
3075 (amdgpu_is_atpx_hybrid() ||
3076 amdgpu_has_atpx_dgpu_power_cntl()) &&
3077 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3078 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3079 &amdgpu_switcheroo_ops, boco);
3080 if (boco)
d38ceaf9
AD
3081 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3082
9475a943
SL
3083 if (amdgpu_emu_mode == 1) {
3084 /* post the asic on emulation mode */
3085 emu_soc_asic_init(adev);
bfca0289 3086 goto fence_driver_init;
9475a943 3087 }
bfca0289 3088
4e99a44e
ML
3089 /* detect if we are with an SRIOV vbios */
3090 amdgpu_device_detect_sriov_bios(adev);
048765ad 3091
95e8e59e
AD
3092 /* check if we need to reset the asic
3093 * E.g., driver was not cleanly unloaded previously, etc.
3094 */
f14899fd 3095 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3096 r = amdgpu_asic_reset(adev);
3097 if (r) {
3098 dev_err(adev->dev, "asic reset on init failed\n");
3099 goto failed;
3100 }
3101 }
3102
d38ceaf9 3103 /* Post card if necessary */
39c640c0 3104 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3105 if (!adev->bios) {
bec86378 3106 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3107 r = -EINVAL;
3108 goto failed;
d38ceaf9 3109 }
bec86378 3110 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3111 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3112 if (r) {
3113 dev_err(adev->dev, "gpu post error!\n");
3114 goto failed;
3115 }
d38ceaf9
AD
3116 }
3117
88b64e95
AD
3118 if (adev->is_atom_fw) {
3119 /* Initialize clocks */
3120 r = amdgpu_atomfirmware_get_clock_info(adev);
3121 if (r) {
3122 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3123 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3124 goto failed;
3125 }
3126 } else {
a5bde2f9
AD
3127 /* Initialize clocks */
3128 r = amdgpu_atombios_get_clock_info(adev);
3129 if (r) {
3130 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3131 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3132 goto failed;
a5bde2f9
AD
3133 }
3134 /* init i2c buses */
4562236b
HW
3135 if (!amdgpu_device_has_dc_support(adev))
3136 amdgpu_atombios_i2c_init(adev);
2c1a2784 3137 }
d38ceaf9 3138
bfca0289 3139fence_driver_init:
d38ceaf9
AD
3140 /* Fence driver */
3141 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3142 if (r) {
3143 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3144 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3145 goto failed;
2c1a2784 3146 }
d38ceaf9
AD
3147
3148 /* init the mode config */
3149 drm_mode_config_init(adev->ddev);
3150
06ec9070 3151 r = amdgpu_device_ip_init(adev);
d38ceaf9 3152 if (r) {
8840a387 3153 /* failed in exclusive mode due to timeout */
3154 if (amdgpu_sriov_vf(adev) &&
3155 !amdgpu_sriov_runtime(adev) &&
3156 amdgpu_virt_mmio_blocked(adev) &&
3157 !amdgpu_virt_wait_reset(adev)) {
3158 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3159 /* Don't send request since VF is inactive. */
3160 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3161 adev->virt.ops = NULL;
8840a387 3162 r = -EAGAIN;
3163 goto failed;
3164 }
06ec9070 3165 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3166 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3167 goto failed;
d38ceaf9
AD
3168 }
3169
d7f72fe4
YZ
3170 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3171 adev->gfx.config.max_shader_engines,
3172 adev->gfx.config.max_sh_per_se,
3173 adev->gfx.config.max_cu_per_sh,
3174 adev->gfx.cu_info.number);
3175
d38ceaf9
AD
3176 adev->accel_working = true;
3177
e59c0205
AX
3178 amdgpu_vm_check_compute_bug(adev);
3179
95844d20
MO
3180 /* Initialize the buffer migration limit. */
3181 if (amdgpu_moverate >= 0)
3182 max_MBps = amdgpu_moverate;
3183 else
3184 max_MBps = 8; /* Allow 8 MB/s. */
3185 /* Get a log2 for easy divisions. */
3186 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3187
9bc92b9c
ML
3188 amdgpu_fbdev_init(adev);
3189
d2f52ac8 3190 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3191 if (r) {
3192 adev->pm_sysfs_en = false;
d2f52ac8 3193 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3194 } else
3195 adev->pm_sysfs_en = true;
d2f52ac8 3196
5bb23532 3197 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3198 if (r) {
3199 adev->ucode_sysfs_en = false;
5bb23532 3200 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3201 } else
3202 adev->ucode_sysfs_en = true;
5bb23532 3203
d38ceaf9
AD
3204 if ((amdgpu_testing & 1)) {
3205 if (adev->accel_working)
3206 amdgpu_test_moves(adev);
3207 else
3208 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3209 }
d38ceaf9
AD
3210 if (amdgpu_benchmarking) {
3211 if (adev->accel_working)
3212 amdgpu_benchmark(adev, amdgpu_benchmarking);
3213 else
3214 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3215 }
3216
b0adca4d
EQ
3217 /*
3218 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3219 * Otherwise the mgpu fan boost feature will be skipped due to the
3220 * gpu instance is counted less.
3221 */
3222 amdgpu_register_gpu_instance(adev);
3223
d38ceaf9
AD
3224 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3225 * explicit gating rather than handling it automatically.
3226 */
06ec9070 3227 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3228 if (r) {
06ec9070 3229 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3230 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3231 goto failed;
2c1a2784 3232 }
d38ceaf9 3233
108c6a63 3234 /* must succeed. */
511fdbc3 3235 amdgpu_ras_resume(adev);
108c6a63 3236
beff74bc
AD
3237 queue_delayed_work(system_wq, &adev->delayed_init_work,
3238 msecs_to_jiffies(AMDGPU_RESUME_MS));
3239
dcea6e65
KR
3240 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3241 if (r) {
3242 dev_err(adev->dev, "Could not create pcie_replay_count");
3243 return r;
3244 }
108c6a63 3245
bd607166
KR
3246 r = device_create_file(adev->dev, &dev_attr_product_name);
3247 if (r) {
3248 dev_err(adev->dev, "Could not create product_name");
3249 return r;
3250 }
3251
3252 r = device_create_file(adev->dev, &dev_attr_product_number);
3253 if (r) {
3254 dev_err(adev->dev, "Could not create product_number");
3255 return r;
3256 }
3257
3258 r = device_create_file(adev->dev, &dev_attr_serial_number);
3259 if (r) {
3260 dev_err(adev->dev, "Could not create serial_number");
3261 return r;
3262 }
3263
d155bef0
AB
3264 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3265 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3266 if (r)
3267 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3268
d38ceaf9 3269 return 0;
83ba126a
AD
3270
3271failed:
89041940 3272 amdgpu_vf_error_trans_all(adev);
3840c5bc 3273 if (boco)
83ba126a 3274 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3275
83ba126a 3276 return r;
d38ceaf9
AD
3277}
3278
d38ceaf9
AD
3279/**
3280 * amdgpu_device_fini - tear down the driver
3281 *
3282 * @adev: amdgpu_device pointer
3283 *
3284 * Tear down the driver info (all asics).
3285 * Called at driver shutdown.
3286 */
3287void amdgpu_device_fini(struct amdgpu_device *adev)
3288{
3289 int r;
3290
3291 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3292 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3293 adev->shutdown = true;
9f875167 3294
752c683d
ML
3295 /* make sure IB test finished before entering exclusive mode
3296 * to avoid preemption on IB test
3297 * */
3298 if (amdgpu_sriov_vf(adev))
3299 amdgpu_virt_request_full_gpu(adev, false);
3300
e5b03032
ML
3301 /* disable all interrupts */
3302 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3303 if (adev->mode_info.mode_config_initialized){
3304 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3305 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3306 else
3307 drm_atomic_helper_shutdown(adev->ddev);
3308 }
d38ceaf9 3309 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3310 if (adev->pm_sysfs_en)
3311 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3312 amdgpu_fbdev_fini(adev);
06ec9070 3313 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3314 if (adev->firmware.gpu_info_fw) {
3315 release_firmware(adev->firmware.gpu_info_fw);
3316 adev->firmware.gpu_info_fw = NULL;
3317 }
d38ceaf9
AD
3318 adev->accel_working = false;
3319 /* free i2c buses */
4562236b
HW
3320 if (!amdgpu_device_has_dc_support(adev))
3321 amdgpu_i2c_fini(adev);
bfca0289
SL
3322
3323 if (amdgpu_emu_mode != 1)
3324 amdgpu_atombios_fini(adev);
3325
d38ceaf9
AD
3326 kfree(adev->bios);
3327 adev->bios = NULL;
3840c5bc
AD
3328 if (amdgpu_has_atpx() &&
3329 (amdgpu_is_atpx_hybrid() ||
3330 amdgpu_has_atpx_dgpu_power_cntl()) &&
3331 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3332 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3333 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3334 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3335 vga_client_register(adev->pdev, NULL, NULL, NULL);
3336 if (adev->rio_mem)
3337 pci_iounmap(adev->pdev, adev->rio_mem);
3338 adev->rio_mem = NULL;
3339 iounmap(adev->rmmio);
3340 adev->rmmio = NULL;
06ec9070 3341 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3342
dcea6e65 3343 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3344 if (adev->ucode_sysfs_en)
3345 amdgpu_ucode_sysfs_fini(adev);
bd607166
KR
3346 device_remove_file(adev->dev, &dev_attr_product_name);
3347 device_remove_file(adev->dev, &dev_attr_product_number);
3348 device_remove_file(adev->dev, &dev_attr_serial_number);
d155bef0
AB
3349 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3350 amdgpu_pmu_fini(adev);
f54eeab4 3351 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3352 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3353}
3354
3355
3356/*
3357 * Suspend & resume.
3358 */
3359/**
810ddc3a 3360 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3361 *
87e3f136
DP
3362 * @dev: drm dev pointer
3363 * @suspend: suspend state
3364 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3365 *
3366 * Puts the hw in the suspend state (all asics).
3367 * Returns 0 for success or an error on failure.
3368 * Called at driver suspend.
3369 */
de185019 3370int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3371{
3372 struct amdgpu_device *adev;
3373 struct drm_crtc *crtc;
3374 struct drm_connector *connector;
f8d2d39e 3375 struct drm_connector_list_iter iter;
5ceb54c6 3376 int r;
d38ceaf9
AD
3377
3378 if (dev == NULL || dev->dev_private == NULL) {
3379 return -ENODEV;
3380 }
3381
3382 adev = dev->dev_private;
3383
3384 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3385 return 0;
3386
44779b43 3387 adev->in_suspend = true;
d38ceaf9
AD
3388 drm_kms_helper_poll_disable(dev);
3389
5f818173
S
3390 if (fbcon)
3391 amdgpu_fbdev_set_suspend(adev, 1);
3392
beff74bc 3393 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3394
4562236b
HW
3395 if (!amdgpu_device_has_dc_support(adev)) {
3396 /* turn off display hw */
3397 drm_modeset_lock_all(dev);
f8d2d39e
LP
3398 drm_connector_list_iter_begin(dev, &iter);
3399 drm_for_each_connector_iter(connector, &iter)
3400 drm_helper_connector_dpms(connector,
3401 DRM_MODE_DPMS_OFF);
3402 drm_connector_list_iter_end(&iter);
4562236b 3403 drm_modeset_unlock_all(dev);
fe1053b7
AD
3404 /* unpin the front buffers and cursors */
3405 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3406 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3407 struct drm_framebuffer *fb = crtc->primary->fb;
3408 struct amdgpu_bo *robj;
3409
91334223 3410 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3411 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3412 r = amdgpu_bo_reserve(aobj, true);
3413 if (r == 0) {
3414 amdgpu_bo_unpin(aobj);
3415 amdgpu_bo_unreserve(aobj);
3416 }
756e6880 3417 }
756e6880 3418
fe1053b7
AD
3419 if (fb == NULL || fb->obj[0] == NULL) {
3420 continue;
3421 }
3422 robj = gem_to_amdgpu_bo(fb->obj[0]);
3423 /* don't unpin kernel fb objects */
3424 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3425 r = amdgpu_bo_reserve(robj, true);
3426 if (r == 0) {
3427 amdgpu_bo_unpin(robj);
3428 amdgpu_bo_unreserve(robj);
3429 }
d38ceaf9
AD
3430 }
3431 }
3432 }
fe1053b7 3433
a23ca7f7
PL
3434 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3435 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3436
9593f4d6 3437 amdgpu_amdkfd_suspend(adev, !fbcon);
fe1053b7 3438
5e6932fe 3439 amdgpu_ras_suspend(adev);
3440
fe1053b7
AD
3441 r = amdgpu_device_ip_suspend_phase1(adev);
3442
d38ceaf9
AD
3443 /* evict vram memory */
3444 amdgpu_bo_evict_vram(adev);
3445
5ceb54c6 3446 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3447
fe1053b7 3448 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3449
a0a71e49
AD
3450 /* evict remaining vram memory
3451 * This second call to evict vram is to evict the gart page table
3452 * using the CPU.
3453 */
d38ceaf9
AD
3454 amdgpu_bo_evict_vram(adev);
3455
d38ceaf9
AD
3456 return 0;
3457}
3458
3459/**
810ddc3a 3460 * amdgpu_device_resume - initiate device resume
d38ceaf9 3461 *
87e3f136
DP
3462 * @dev: drm dev pointer
3463 * @resume: resume state
3464 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3465 *
3466 * Bring the hw back to operating state (all asics).
3467 * Returns 0 for success or an error on failure.
3468 * Called at driver resume.
3469 */
de185019 3470int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3471{
3472 struct drm_connector *connector;
f8d2d39e 3473 struct drm_connector_list_iter iter;
d38ceaf9 3474 struct amdgpu_device *adev = dev->dev_private;
756e6880 3475 struct drm_crtc *crtc;
03161a6e 3476 int r = 0;
d38ceaf9
AD
3477
3478 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3479 return 0;
3480
d38ceaf9 3481 /* post card */
39c640c0 3482 if (amdgpu_device_need_post(adev)) {
74b0b157 3483 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3484 if (r)
3485 DRM_ERROR("amdgpu asic init failed\n");
3486 }
d38ceaf9 3487
06ec9070 3488 r = amdgpu_device_ip_resume(adev);
e6707218 3489 if (r) {
06ec9070 3490 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3491 return r;
e6707218 3492 }
5ceb54c6
AD
3493 amdgpu_fence_driver_resume(adev);
3494
d38ceaf9 3495
06ec9070 3496 r = amdgpu_device_ip_late_init(adev);
03161a6e 3497 if (r)
4d3b9ae5 3498 return r;
d38ceaf9 3499
beff74bc
AD
3500 queue_delayed_work(system_wq, &adev->delayed_init_work,
3501 msecs_to_jiffies(AMDGPU_RESUME_MS));
3502
fe1053b7
AD
3503 if (!amdgpu_device_has_dc_support(adev)) {
3504 /* pin cursors */
3505 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3506 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3507
91334223 3508 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3509 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3510 r = amdgpu_bo_reserve(aobj, true);
3511 if (r == 0) {
3512 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3513 if (r != 0)
3514 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3515 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3516 amdgpu_bo_unreserve(aobj);
3517 }
756e6880
AD
3518 }
3519 }
3520 }
9593f4d6 3521 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3522 if (r)
3523 return r;
756e6880 3524
96a5d8d4 3525 /* Make sure IB tests flushed */
beff74bc 3526 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3527
d38ceaf9
AD
3528 /* blat the mode back in */
3529 if (fbcon) {
4562236b
HW
3530 if (!amdgpu_device_has_dc_support(adev)) {
3531 /* pre DCE11 */
3532 drm_helper_resume_force_mode(dev);
3533
3534 /* turn on display hw */
3535 drm_modeset_lock_all(dev);
f8d2d39e
LP
3536
3537 drm_connector_list_iter_begin(dev, &iter);
3538 drm_for_each_connector_iter(connector, &iter)
3539 drm_helper_connector_dpms(connector,
3540 DRM_MODE_DPMS_ON);
3541 drm_connector_list_iter_end(&iter);
3542
4562236b 3543 drm_modeset_unlock_all(dev);
d38ceaf9 3544 }
4d3b9ae5 3545 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3546 }
3547
3548 drm_kms_helper_poll_enable(dev);
23a1a9e5 3549
5e6932fe 3550 amdgpu_ras_resume(adev);
3551
23a1a9e5
L
3552 /*
3553 * Most of the connector probing functions try to acquire runtime pm
3554 * refs to ensure that the GPU is powered on when connector polling is
3555 * performed. Since we're calling this from a runtime PM callback,
3556 * trying to acquire rpm refs will cause us to deadlock.
3557 *
3558 * Since we're guaranteed to be holding the rpm lock, it's safe to
3559 * temporarily disable the rpm helpers so this doesn't deadlock us.
3560 */
3561#ifdef CONFIG_PM
3562 dev->dev->power.disable_depth++;
3563#endif
4562236b
HW
3564 if (!amdgpu_device_has_dc_support(adev))
3565 drm_helper_hpd_irq_event(dev);
3566 else
3567 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3568#ifdef CONFIG_PM
3569 dev->dev->power.disable_depth--;
3570#endif
44779b43
RZ
3571 adev->in_suspend = false;
3572
4d3b9ae5 3573 return 0;
d38ceaf9
AD
3574}
3575
e3ecdffa
AD
3576/**
3577 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3578 *
3579 * @adev: amdgpu_device pointer
3580 *
3581 * The list of all the hardware IPs that make up the asic is walked and
3582 * the check_soft_reset callbacks are run. check_soft_reset determines
3583 * if the asic is still hung or not.
3584 * Returns true if any of the IPs are still in a hung state, false if not.
3585 */
06ec9070 3586static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3587{
3588 int i;
3589 bool asic_hang = false;
3590
f993d628
ML
3591 if (amdgpu_sriov_vf(adev))
3592 return true;
3593
8bc04c29
AD
3594 if (amdgpu_asic_need_full_reset(adev))
3595 return true;
3596
63fbf42f 3597 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3598 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3599 continue;
a1255107
AD
3600 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3601 adev->ip_blocks[i].status.hang =
3602 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3603 if (adev->ip_blocks[i].status.hang) {
3604 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3605 asic_hang = true;
3606 }
3607 }
3608 return asic_hang;
3609}
3610
e3ecdffa
AD
3611/**
3612 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3613 *
3614 * @adev: amdgpu_device pointer
3615 *
3616 * The list of all the hardware IPs that make up the asic is walked and the
3617 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3618 * handles any IP specific hardware or software state changes that are
3619 * necessary for a soft reset to succeed.
3620 * Returns 0 on success, negative error code on failure.
3621 */
06ec9070 3622static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3623{
3624 int i, r = 0;
3625
3626 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3627 if (!adev->ip_blocks[i].status.valid)
d31a501e 3628 continue;
a1255107
AD
3629 if (adev->ip_blocks[i].status.hang &&
3630 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3631 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3632 if (r)
3633 return r;
3634 }
3635 }
3636
3637 return 0;
3638}
3639
e3ecdffa
AD
3640/**
3641 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3642 *
3643 * @adev: amdgpu_device pointer
3644 *
3645 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3646 * reset is necessary to recover.
3647 * Returns true if a full asic reset is required, false if not.
3648 */
06ec9070 3649static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3650{
da146d3b
AD
3651 int i;
3652
8bc04c29
AD
3653 if (amdgpu_asic_need_full_reset(adev))
3654 return true;
3655
da146d3b 3656 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3657 if (!adev->ip_blocks[i].status.valid)
da146d3b 3658 continue;
a1255107
AD
3659 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3660 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3661 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3662 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3663 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3664 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3665 DRM_INFO("Some block need full reset!\n");
3666 return true;
3667 }
3668 }
35d782fe
CZ
3669 }
3670 return false;
3671}
3672
e3ecdffa
AD
3673/**
3674 * amdgpu_device_ip_soft_reset - do a soft reset
3675 *
3676 * @adev: amdgpu_device pointer
3677 *
3678 * The list of all the hardware IPs that make up the asic is walked and the
3679 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3680 * IP specific hardware or software state changes that are necessary to soft
3681 * reset the IP.
3682 * Returns 0 on success, negative error code on failure.
3683 */
06ec9070 3684static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3685{
3686 int i, r = 0;
3687
3688 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3689 if (!adev->ip_blocks[i].status.valid)
35d782fe 3690 continue;
a1255107
AD
3691 if (adev->ip_blocks[i].status.hang &&
3692 adev->ip_blocks[i].version->funcs->soft_reset) {
3693 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3694 if (r)
3695 return r;
3696 }
3697 }
3698
3699 return 0;
3700}
3701
e3ecdffa
AD
3702/**
3703 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3704 *
3705 * @adev: amdgpu_device pointer
3706 *
3707 * The list of all the hardware IPs that make up the asic is walked and the
3708 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3709 * handles any IP specific hardware or software state changes that are
3710 * necessary after the IP has been soft reset.
3711 * Returns 0 on success, negative error code on failure.
3712 */
06ec9070 3713static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3714{
3715 int i, r = 0;
3716
3717 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3718 if (!adev->ip_blocks[i].status.valid)
35d782fe 3719 continue;
a1255107
AD
3720 if (adev->ip_blocks[i].status.hang &&
3721 adev->ip_blocks[i].version->funcs->post_soft_reset)
3722 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3723 if (r)
3724 return r;
3725 }
3726
3727 return 0;
3728}
3729
e3ecdffa 3730/**
c33adbc7 3731 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3732 *
3733 * @adev: amdgpu_device pointer
3734 *
3735 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3736 * restore things like GPUVM page tables after a GPU reset where
3737 * the contents of VRAM might be lost.
403009bf
CK
3738 *
3739 * Returns:
3740 * 0 on success, negative error code on failure.
e3ecdffa 3741 */
c33adbc7 3742static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3743{
c41d1cf6 3744 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3745 struct amdgpu_bo *shadow;
3746 long r = 1, tmo;
c41d1cf6
ML
3747
3748 if (amdgpu_sriov_runtime(adev))
b045d3af 3749 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3750 else
3751 tmo = msecs_to_jiffies(100);
3752
3753 DRM_INFO("recover vram bo from shadow start\n");
3754 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3755 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3756
3757 /* No need to recover an evicted BO */
3758 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3759 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3760 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3761 continue;
3762
3763 r = amdgpu_bo_restore_shadow(shadow, &next);
3764 if (r)
3765 break;
3766
c41d1cf6 3767 if (fence) {
1712fb1a 3768 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3769 dma_fence_put(fence);
3770 fence = next;
1712fb1a 3771 if (tmo == 0) {
3772 r = -ETIMEDOUT;
c41d1cf6 3773 break;
1712fb1a 3774 } else if (tmo < 0) {
3775 r = tmo;
3776 break;
3777 }
403009bf
CK
3778 } else {
3779 fence = next;
c41d1cf6 3780 }
c41d1cf6
ML
3781 }
3782 mutex_unlock(&adev->shadow_list_lock);
3783
403009bf
CK
3784 if (fence)
3785 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3786 dma_fence_put(fence);
3787
1712fb1a 3788 if (r < 0 || tmo <= 0) {
3789 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3790 return -EIO;
3791 }
c41d1cf6 3792
403009bf
CK
3793 DRM_INFO("recover vram bo from shadow done\n");
3794 return 0;
c41d1cf6
ML
3795}
3796
a90ad3c2 3797
e3ecdffa 3798/**
06ec9070 3799 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3800 *
3801 * @adev: amdgpu device pointer
87e3f136 3802 * @from_hypervisor: request from hypervisor
5740682e
ML
3803 *
3804 * do VF FLR and reinitialize Asic
3f48c681 3805 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3806 */
3807static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3808 bool from_hypervisor)
5740682e
ML
3809{
3810 int r;
3811
3812 if (from_hypervisor)
3813 r = amdgpu_virt_request_full_gpu(adev, true);
3814 else
3815 r = amdgpu_virt_reset_gpu(adev);
3816 if (r)
3817 return r;
a90ad3c2 3818
b639c22c
JZ
3819 amdgpu_amdkfd_pre_reset(adev);
3820
a90ad3c2 3821 /* Resume IP prior to SMC */
06ec9070 3822 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3823 if (r)
3824 goto error;
a90ad3c2 3825
c9ffa427 3826 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3827 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3828 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3829
7a3e0bb2
RZ
3830 r = amdgpu_device_fw_loading(adev);
3831 if (r)
3832 return r;
3833
a90ad3c2 3834 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3835 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3836 if (r)
3837 goto error;
a90ad3c2
ML
3838
3839 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3840 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3841 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3842
abc34253
ED
3843error:
3844 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3845 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3846 amdgpu_inc_vram_lost(adev);
c33adbc7 3847 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3848 }
3849
3850 return r;
3851}
3852
12938fad
CK
3853/**
3854 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3855 *
3856 * @adev: amdgpu device pointer
3857 *
3858 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3859 * a hung GPU.
3860 */
3861bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3862{
3863 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3864 DRM_INFO("Timeout, but no hardware hang detected.\n");
3865 return false;
3866 }
3867
3ba7b418
AG
3868 if (amdgpu_gpu_recovery == 0)
3869 goto disabled;
3870
3871 if (amdgpu_sriov_vf(adev))
3872 return true;
3873
3874 if (amdgpu_gpu_recovery == -1) {
3875 switch (adev->asic_type) {
fc42d47c
AG
3876 case CHIP_BONAIRE:
3877 case CHIP_HAWAII:
3ba7b418
AG
3878 case CHIP_TOPAZ:
3879 case CHIP_TONGA:
3880 case CHIP_FIJI:
3881 case CHIP_POLARIS10:
3882 case CHIP_POLARIS11:
3883 case CHIP_POLARIS12:
3884 case CHIP_VEGAM:
3885 case CHIP_VEGA20:
3886 case CHIP_VEGA10:
3887 case CHIP_VEGA12:
c43b849f 3888 case CHIP_RAVEN:
e9d4cf91 3889 case CHIP_ARCTURUS:
2cb44fb0 3890 case CHIP_RENOIR:
658c6639
AD
3891 case CHIP_NAVI10:
3892 case CHIP_NAVI14:
3893 case CHIP_NAVI12:
3ba7b418
AG
3894 break;
3895 default:
3896 goto disabled;
3897 }
12938fad
CK
3898 }
3899
3900 return true;
3ba7b418
AG
3901
3902disabled:
3903 DRM_INFO("GPU recovery disabled.\n");
3904 return false;
12938fad
CK
3905}
3906
5c6dd71e 3907
26bc5340
AG
3908static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3909 struct amdgpu_job *job,
3910 bool *need_full_reset_arg)
3911{
3912 int i, r = 0;
3913 bool need_full_reset = *need_full_reset_arg;
71182665 3914
71182665 3915 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3916 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3917 struct amdgpu_ring *ring = adev->rings[i];
3918
51687759 3919 if (!ring || !ring->sched.thread)
0875dc9e 3920 continue;
5740682e 3921
2f9d4084
ML
3922 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3923 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3924 }
d38ceaf9 3925
222b5f04
AG
3926 if(job)
3927 drm_sched_increase_karma(&job->base);
3928
1d721ed6 3929 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3930 if (!amdgpu_sriov_vf(adev)) {
3931
3932 if (!need_full_reset)
3933 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3934
3935 if (!need_full_reset) {
3936 amdgpu_device_ip_pre_soft_reset(adev);
3937 r = amdgpu_device_ip_soft_reset(adev);
3938 amdgpu_device_ip_post_soft_reset(adev);
3939 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3940 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3941 need_full_reset = true;
3942 }
3943 }
3944
3945 if (need_full_reset)
3946 r = amdgpu_device_ip_suspend(adev);
3947
3948 *need_full_reset_arg = need_full_reset;
3949 }
3950
3951 return r;
3952}
3953
041a62bc 3954static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3955 struct list_head *device_list_handle,
3956 bool *need_full_reset_arg)
3957{
3958 struct amdgpu_device *tmp_adev = NULL;
3959 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3960 int r = 0;
3961
3962 /*
3963 * ASIC reset has to be done on all HGMI hive nodes ASAP
3964 * to allow proper links negotiation in FW (within 1 sec)
3965 */
3966 if (need_full_reset) {
3967 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3968 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3969 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3970 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3971 r = -EALREADY;
3972 } else
3973 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3974
041a62bc
AG
3975 if (r) {
3976 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3977 r, tmp_adev->ddev->unique);
3978 break;
ce316fa5
LM
3979 }
3980 }
3981
041a62bc
AG
3982 /* For XGMI wait for all resets to complete before proceed */
3983 if (!r) {
ce316fa5
LM
3984 list_for_each_entry(tmp_adev, device_list_handle,
3985 gmc.xgmi.head) {
3986 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3987 flush_work(&tmp_adev->xgmi_reset_work);
3988 r = tmp_adev->asic_reset_res;
3989 if (r)
3990 break;
ce316fa5
LM
3991 }
3992 }
3993 }
ce316fa5 3994 }
26bc5340 3995
43c4d576
JC
3996 if (!r && amdgpu_ras_intr_triggered()) {
3997 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3998 if (tmp_adev->mmhub.funcs &&
3999 tmp_adev->mmhub.funcs->reset_ras_error_count)
4000 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4001 }
4002
00eaa571 4003 amdgpu_ras_intr_cleared();
43c4d576 4004 }
00eaa571 4005
26bc5340
AG
4006 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4007 if (need_full_reset) {
4008 /* post card */
4009 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4010 DRM_WARN("asic atom init failed!");
4011
4012 if (!r) {
4013 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4014 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4015 if (r)
4016 goto out;
4017
4018 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4019 if (vram_lost) {
77e7f829 4020 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4021 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4022 }
4023
4024 r = amdgpu_gtt_mgr_recover(
4025 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4026 if (r)
4027 goto out;
4028
4029 r = amdgpu_device_fw_loading(tmp_adev);
4030 if (r)
4031 return r;
4032
4033 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4034 if (r)
4035 goto out;
4036
4037 if (vram_lost)
4038 amdgpu_device_fill_reset_magic(tmp_adev);
4039
fdafb359
EQ
4040 /*
4041 * Add this ASIC as tracked as reset was already
4042 * complete successfully.
4043 */
4044 amdgpu_register_gpu_instance(tmp_adev);
4045
7c04ca50 4046 r = amdgpu_device_ip_late_init(tmp_adev);
4047 if (r)
4048 goto out;
4049
565d1941
EQ
4050 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4051
e79a04d5 4052 /* must succeed. */
511fdbc3 4053 amdgpu_ras_resume(tmp_adev);
e79a04d5 4054
26bc5340
AG
4055 /* Update PSP FW topology after reset */
4056 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4057 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4058 }
4059 }
4060
4061
4062out:
4063 if (!r) {
4064 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4065 r = amdgpu_ib_ring_tests(tmp_adev);
4066 if (r) {
4067 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4068 r = amdgpu_device_ip_suspend(tmp_adev);
4069 need_full_reset = true;
4070 r = -EAGAIN;
4071 goto end;
4072 }
4073 }
4074
4075 if (!r)
4076 r = amdgpu_device_recover_vram(tmp_adev);
4077 else
4078 tmp_adev->asic_reset_res = r;
4079 }
4080
4081end:
4082 *need_full_reset_arg = need_full_reset;
4083 return r;
4084}
4085
1d721ed6 4086static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4087{
1d721ed6
AG
4088 if (trylock) {
4089 if (!mutex_trylock(&adev->lock_reset))
4090 return false;
4091 } else
4092 mutex_lock(&adev->lock_reset);
5740682e 4093
26bc5340 4094 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4095 adev->in_gpu_reset = true;
a3a09142
AD
4096 switch (amdgpu_asic_reset_method(adev)) {
4097 case AMD_RESET_METHOD_MODE1:
4098 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4099 break;
4100 case AMD_RESET_METHOD_MODE2:
4101 adev->mp1_state = PP_MP1_STATE_RESET;
4102 break;
4103 default:
4104 adev->mp1_state = PP_MP1_STATE_NONE;
4105 break;
4106 }
1d721ed6
AG
4107
4108 return true;
26bc5340 4109}
d38ceaf9 4110
26bc5340
AG
4111static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4112{
89041940 4113 amdgpu_vf_error_trans_all(adev);
a3a09142 4114 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4115 adev->in_gpu_reset = false;
13a752e3 4116 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4117}
4118
26bc5340
AG
4119/**
4120 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4121 *
4122 * @adev: amdgpu device pointer
4123 * @job: which job trigger hang
4124 *
4125 * Attempt to reset the GPU if it has hung (all asics).
4126 * Attempt to do soft-reset or full-reset and reinitialize Asic
4127 * Returns 0 for success or an error on failure.
4128 */
4129
4130int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4131 struct amdgpu_job *job)
4132{
1d721ed6
AG
4133 struct list_head device_list, *device_list_handle = NULL;
4134 bool need_full_reset, job_signaled;
26bc5340 4135 struct amdgpu_hive_info *hive = NULL;
26bc5340 4136 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4137 int i, r = 0;
7c6e68c7 4138 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4139 bool use_baco =
4140 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4141 true : false;
26bc5340 4142
d5ea093e
AG
4143 /*
4144 * Flush RAM to disk so that after reboot
4145 * the user can read log and see why the system rebooted.
4146 */
b823821f 4147 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4148
4149 DRM_WARN("Emergency reboot.");
4150
4151 ksys_sync_helper();
4152 emergency_restart();
4153 }
4154
1d721ed6 4155 need_full_reset = job_signaled = false;
26bc5340
AG
4156 INIT_LIST_HEAD(&device_list);
4157
61380faa
JC
4158 amdgpu_ras_set_error_query_ready(adev, false);
4159
b823821f
LM
4160 dev_info(adev->dev, "GPU %s begin!\n",
4161 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4162
beff74bc 4163 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4164
1d721ed6
AG
4165 hive = amdgpu_get_xgmi_hive(adev, false);
4166
26bc5340 4167 /*
1d721ed6
AG
4168 * Here we trylock to avoid chain of resets executing from
4169 * either trigger by jobs on different adevs in XGMI hive or jobs on
4170 * different schedulers for same device while this TO handler is running.
4171 * We always reset all schedulers for device and all devices for XGMI
4172 * hive so that should take care of them too.
26bc5340 4173 */
1d721ed6
AG
4174
4175 if (hive && !mutex_trylock(&hive->reset_lock)) {
4176 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4177 job ? job->base.id : -1, hive->hive_id);
26bc5340 4178 return 0;
1d721ed6 4179 }
26bc5340
AG
4180
4181 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4182 if (!amdgpu_device_lock_adev(adev, !hive)) {
4183 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4184 job ? job->base.id : -1);
1d721ed6 4185 return 0;
26bc5340
AG
4186 }
4187
7c6e68c7
AG
4188 /* Block kfd: SRIOV would do it separately */
4189 if (!amdgpu_sriov_vf(adev))
4190 amdgpu_amdkfd_pre_reset(adev);
4191
26bc5340 4192 /* Build list of devices to reset */
1d721ed6 4193 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4194 if (!hive) {
7c6e68c7
AG
4195 /*unlock kfd: SRIOV would do it separately */
4196 if (!amdgpu_sriov_vf(adev))
4197 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4198 amdgpu_device_unlock_adev(adev);
4199 return -ENODEV;
4200 }
4201
4202 /*
4203 * In case we are in XGMI hive mode device reset is done for all the
4204 * nodes in the hive to retrain all XGMI links and hence the reset
4205 * sequence is executed in loop on all nodes.
4206 */
4207 device_list_handle = &hive->device_list;
4208 } else {
4209 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4210 device_list_handle = &device_list;
4211 }
4212
1d721ed6
AG
4213 /* block all schedulers and reset given job's ring */
4214 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4215 if (tmp_adev != adev) {
61380faa 4216 amdgpu_ras_set_error_query_ready(tmp_adev, false);
12ffa55d 4217 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4218 if (!amdgpu_sriov_vf(tmp_adev))
4219 amdgpu_amdkfd_pre_reset(tmp_adev);
4220 }
4221
12ffa55d
AG
4222 /*
4223 * Mark these ASICs to be reseted as untracked first
4224 * And add them back after reset completed
4225 */
4226 amdgpu_unregister_gpu_instance(tmp_adev);
4227
565d1941
EQ
4228 amdgpu_fbdev_set_suspend(adev, 1);
4229
f1c1314b 4230 /* disable ras on ALL IPs */
b823821f
LM
4231 if (!(in_ras_intr && !use_baco) &&
4232 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4233 amdgpu_ras_suspend(tmp_adev);
4234
1d721ed6
AG
4235 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4236 struct amdgpu_ring *ring = tmp_adev->rings[i];
4237
4238 if (!ring || !ring->sched.thread)
4239 continue;
4240
0b2d2c2e 4241 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4242
b823821f 4243 if (in_ras_intr && !use_baco)
7c6e68c7 4244 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4245 }
4246 }
4247
4248
b823821f 4249 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4250 goto skip_sched_resume;
4251
1d721ed6
AG
4252 /*
4253 * Must check guilty signal here since after this point all old
4254 * HW fences are force signaled.
4255 *
4256 * job->base holds a reference to parent fence
4257 */
4258 if (job && job->base.s_fence->parent &&
4259 dma_fence_is_signaled(job->base.s_fence->parent))
4260 job_signaled = true;
4261
1d721ed6
AG
4262 if (job_signaled) {
4263 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4264 goto skip_hw_reset;
4265 }
4266
4267
4268 /* Guilty job will be freed after this*/
0b2d2c2e 4269 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4270 if (r) {
4271 /*TODO Should we stop ?*/
4272 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4273 r, adev->ddev->unique);
4274 adev->asic_reset_res = r;
4275 }
4276
26bc5340
AG
4277retry: /* Rest of adevs pre asic reset from XGMI hive. */
4278 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4279
4280 if (tmp_adev == adev)
4281 continue;
4282
26bc5340
AG
4283 r = amdgpu_device_pre_asic_reset(tmp_adev,
4284 NULL,
4285 &need_full_reset);
4286 /*TODO Should we stop ?*/
4287 if (r) {
4288 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4289 r, tmp_adev->ddev->unique);
4290 tmp_adev->asic_reset_res = r;
4291 }
4292 }
4293
4294 /* Actual ASIC resets if needed.*/
4295 /* TODO Implement XGMI hive reset logic for SRIOV */
4296 if (amdgpu_sriov_vf(adev)) {
4297 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4298 if (r)
4299 adev->asic_reset_res = r;
4300 } else {
041a62bc 4301 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4302 if (r && r == -EAGAIN)
4303 goto retry;
4304 }
4305
1d721ed6
AG
4306skip_hw_reset:
4307
26bc5340
AG
4308 /* Post ASIC reset for all devs .*/
4309 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4310
1d721ed6
AG
4311 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4312 struct amdgpu_ring *ring = tmp_adev->rings[i];
4313
4314 if (!ring || !ring->sched.thread)
4315 continue;
4316
4317 /* No point to resubmit jobs if we didn't HW reset*/
4318 if (!tmp_adev->asic_reset_res && !job_signaled)
4319 drm_sched_resubmit_jobs(&ring->sched);
4320
4321 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4322 }
4323
4324 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4325 drm_helper_resume_force_mode(tmp_adev->ddev);
4326 }
4327
4328 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4329
4330 if (r) {
4331 /* bad news, how to tell it to userspace ? */
12ffa55d 4332 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4333 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4334 } else {
12ffa55d 4335 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4336 }
7c6e68c7 4337 }
26bc5340 4338
7c6e68c7
AG
4339skip_sched_resume:
4340 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4341 /*unlock kfd: SRIOV would do it separately */
b823821f 4342 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4343 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4344 amdgpu_device_unlock_adev(tmp_adev);
4345 }
4346
1d721ed6 4347 if (hive)
22d6575b 4348 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4349
4350 if (r)
4351 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4352 return r;
4353}
4354
e3ecdffa
AD
4355/**
4356 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4357 *
4358 * @adev: amdgpu_device pointer
4359 *
4360 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4361 * and lanes) of the slot the device is in. Handles APUs and
4362 * virtualized environments where PCIE config space may not be available.
4363 */
5494d864 4364static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4365{
5d9a6330 4366 struct pci_dev *pdev;
c5313457
HK
4367 enum pci_bus_speed speed_cap, platform_speed_cap;
4368 enum pcie_link_width platform_link_width;
d0dd7f0c 4369
cd474ba0
AD
4370 if (amdgpu_pcie_gen_cap)
4371 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4372
cd474ba0
AD
4373 if (amdgpu_pcie_lane_cap)
4374 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4375
cd474ba0
AD
4376 /* covers APUs as well */
4377 if (pci_is_root_bus(adev->pdev->bus)) {
4378 if (adev->pm.pcie_gen_mask == 0)
4379 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4380 if (adev->pm.pcie_mlw_mask == 0)
4381 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4382 return;
cd474ba0 4383 }
d0dd7f0c 4384
c5313457
HK
4385 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4386 return;
4387
dbaa922b
AD
4388 pcie_bandwidth_available(adev->pdev, NULL,
4389 &platform_speed_cap, &platform_link_width);
c5313457 4390
cd474ba0 4391 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4392 /* asic caps */
4393 pdev = adev->pdev;
4394 speed_cap = pcie_get_speed_cap(pdev);
4395 if (speed_cap == PCI_SPEED_UNKNOWN) {
4396 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4397 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4398 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4399 } else {
5d9a6330
AD
4400 if (speed_cap == PCIE_SPEED_16_0GT)
4401 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4402 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4403 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4404 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4405 else if (speed_cap == PCIE_SPEED_8_0GT)
4406 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4407 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4408 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4409 else if (speed_cap == PCIE_SPEED_5_0GT)
4410 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4411 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4412 else
4413 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4414 }
4415 /* platform caps */
c5313457 4416 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4417 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4418 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4419 } else {
c5313457 4420 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4421 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4422 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4423 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4424 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4425 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4426 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4427 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4428 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4429 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4430 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4431 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4432 else
4433 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4434
cd474ba0
AD
4435 }
4436 }
4437 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4438 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4439 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4440 } else {
c5313457 4441 switch (platform_link_width) {
5d9a6330 4442 case PCIE_LNK_X32:
cd474ba0
AD
4443 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4444 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4445 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4446 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4447 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4448 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4449 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4450 break;
5d9a6330 4451 case PCIE_LNK_X16:
cd474ba0
AD
4452 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4453 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4454 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4455 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4456 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4457 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4458 break;
5d9a6330 4459 case PCIE_LNK_X12:
cd474ba0
AD
4460 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4461 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4462 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4463 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4464 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4465 break;
5d9a6330 4466 case PCIE_LNK_X8:
cd474ba0
AD
4467 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4468 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4469 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4470 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4471 break;
5d9a6330 4472 case PCIE_LNK_X4:
cd474ba0
AD
4473 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4474 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4475 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4476 break;
5d9a6330 4477 case PCIE_LNK_X2:
cd474ba0
AD
4478 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4479 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4480 break;
5d9a6330 4481 case PCIE_LNK_X1:
cd474ba0
AD
4482 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4483 break;
4484 default:
4485 break;
4486 }
d0dd7f0c
AD
4487 }
4488 }
4489}
d38ceaf9 4490
361dbd01
AD
4491int amdgpu_device_baco_enter(struct drm_device *dev)
4492{
4493 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4494 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4495
4496 if (!amdgpu_device_supports_baco(adev->ddev))
4497 return -ENOTSUPP;
4498
7a22677b
LM
4499 if (ras && ras->supported)
4500 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4501
9530273e 4502 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4503}
4504
4505int amdgpu_device_baco_exit(struct drm_device *dev)
4506{
4507 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4508 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4509 int ret = 0;
361dbd01
AD
4510
4511 if (!amdgpu_device_supports_baco(adev->ddev))
4512 return -ENOTSUPP;
4513
9530273e
EQ
4514 ret = amdgpu_dpm_baco_exit(adev);
4515 if (ret)
4516 return ret;
7a22677b
LM
4517
4518 if (ras && ras->supported)
4519 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4520
4521 return 0;
361dbd01 4522}