drm/amdgpu: correct fbdev suspend on gpu reset
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
d5ea093e 71
e2a75f88 72MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 73MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 74MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 75MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 76MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 77MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 78MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 79MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 80MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 81MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 82
2dc80b00
S
83#define AMDGPU_RESUME_MS 2000
84
050091ab 85const char *amdgpu_asic_name[] = {
da69c161
KW
86 "TAHITI",
87 "PITCAIRN",
88 "VERDE",
89 "OLAND",
90 "HAINAN",
d38ceaf9
AD
91 "BONAIRE",
92 "KAVERI",
93 "KABINI",
94 "HAWAII",
95 "MULLINS",
96 "TOPAZ",
97 "TONGA",
48299f95 98 "FIJI",
d38ceaf9 99 "CARRIZO",
139f4917 100 "STONEY",
2cc0c0b5
FC
101 "POLARIS10",
102 "POLARIS11",
c4642a47 103 "POLARIS12",
48ff108d 104 "VEGAM",
d4196f01 105 "VEGA10",
8fab806a 106 "VEGA12",
956fcddc 107 "VEGA20",
2ca8a5d2 108 "RAVEN",
d6c3b24e 109 "ARCTURUS",
1eee4228 110 "RENOIR",
852a6626 111 "NAVI10",
87dbad02 112 "NAVI14",
9802f5d7 113 "NAVI12",
d38ceaf9
AD
114 "LAST",
115};
116
dcea6e65
KR
117/**
118 * DOC: pcie_replay_count
119 *
120 * The amdgpu driver provides a sysfs API for reporting the total number
121 * of PCIe replays (NAKs)
122 * The file pcie_replay_count is used for this and returns the total
123 * number of replays as a sum of the NAKs generated and NAKs received
124 */
125
126static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
127 struct device_attribute *attr, char *buf)
128{
129 struct drm_device *ddev = dev_get_drvdata(dev);
130 struct amdgpu_device *adev = ddev->dev_private;
131 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
132
133 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
134}
135
136static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
137 amdgpu_device_get_pcie_replay_count, NULL);
138
5494d864
AD
139static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
140
bd607166
KR
141/**
142 * DOC: product_name
143 *
144 * The amdgpu driver provides a sysfs API for reporting the product name
145 * for the device
146 * The file serial_number is used for this and returns the product name
147 * as returned from the FRU.
148 * NOTE: This is only available for certain server cards
149 */
150
151static ssize_t amdgpu_device_get_product_name(struct device *dev,
152 struct device_attribute *attr, char *buf)
153{
154 struct drm_device *ddev = dev_get_drvdata(dev);
155 struct amdgpu_device *adev = ddev->dev_private;
156
157 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
158}
159
160static DEVICE_ATTR(product_name, S_IRUGO,
161 amdgpu_device_get_product_name, NULL);
162
163/**
164 * DOC: product_number
165 *
166 * The amdgpu driver provides a sysfs API for reporting the part number
167 * for the device
168 * The file serial_number is used for this and returns the part number
169 * as returned from the FRU.
170 * NOTE: This is only available for certain server cards
171 */
172
173static ssize_t amdgpu_device_get_product_number(struct device *dev,
174 struct device_attribute *attr, char *buf)
175{
176 struct drm_device *ddev = dev_get_drvdata(dev);
177 struct amdgpu_device *adev = ddev->dev_private;
178
179 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
180}
181
182static DEVICE_ATTR(product_number, S_IRUGO,
183 amdgpu_device_get_product_number, NULL);
184
185/**
186 * DOC: serial_number
187 *
188 * The amdgpu driver provides a sysfs API for reporting the serial number
189 * for the device
190 * The file serial_number is used for this and returns the serial number
191 * as returned from the FRU.
192 * NOTE: This is only available for certain server cards
193 */
194
195static ssize_t amdgpu_device_get_serial_number(struct device *dev,
196 struct device_attribute *attr, char *buf)
197{
198 struct drm_device *ddev = dev_get_drvdata(dev);
199 struct amdgpu_device *adev = ddev->dev_private;
200
201 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
202}
203
204static DEVICE_ATTR(serial_number, S_IRUGO,
205 amdgpu_device_get_serial_number, NULL);
206
e3ecdffa 207/**
31af062a 208 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
209 *
210 * @dev: drm_device pointer
211 *
212 * Returns true if the device is a dGPU with HG/PX power control,
213 * otherwise return false.
214 */
31af062a 215bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
216{
217 struct amdgpu_device *adev = dev->dev_private;
218
2f7d10b3 219 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
220 return true;
221 return false;
222}
223
a69cba42
AD
224/**
225 * amdgpu_device_supports_baco - Does the device support BACO
226 *
227 * @dev: drm_device pointer
228 *
229 * Returns true if the device supporte BACO,
230 * otherwise return false.
231 */
232bool amdgpu_device_supports_baco(struct drm_device *dev)
233{
234 struct amdgpu_device *adev = dev->dev_private;
235
236 return amdgpu_asic_supports_baco(adev);
237}
238
e35e2b11
TY
239/**
240 * VRAM access helper functions.
241 *
242 * amdgpu_device_vram_access - read/write a buffer in vram
243 *
244 * @adev: amdgpu_device pointer
245 * @pos: offset of the buffer in vram
246 * @buf: virtual address of the buffer in system memory
247 * @size: read/write size, sizeof(@buf) must > @size
248 * @write: true - write to vram, otherwise - read from vram
249 */
250void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
251 uint32_t *buf, size_t size, bool write)
252{
e35e2b11 253 unsigned long flags;
ce05ac56
CK
254 uint32_t hi = ~0;
255 uint64_t last;
256
257 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
258 for (last = pos + size; pos < last; pos += 4) {
259 uint32_t tmp = pos >> 31;
e35e2b11 260
e35e2b11 261 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
262 if (tmp != hi) {
263 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
264 hi = tmp;
265 }
e35e2b11
TY
266 if (write)
267 WREG32_NO_KIQ(mmMM_DATA, *buf++);
268 else
269 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 270 }
ce05ac56 271 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
272}
273
d38ceaf9 274/*
2eee0229 275 * device register access helper functions.
d38ceaf9 276 */
e3ecdffa 277/**
2eee0229 278 * amdgpu_device_rreg - read a register
e3ecdffa
AD
279 *
280 * @adev: amdgpu_device pointer
281 * @reg: dword aligned register offset
282 * @acc_flags: access flags which require special behavior
283 *
284 * Returns the 32 bit value from the offset specified.
285 */
2eee0229
HZ
286uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
287 uint32_t acc_flags)
d38ceaf9 288{
f4b373f4
TSD
289 uint32_t ret;
290
f384ff95 291 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 292 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 293
ec59847e 294 if ((reg * 4) < adev->rmmio_size)
f4b373f4 295 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
296 else
297 ret = adev->pcie_rreg(adev, (reg * 4));
298 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
f4b373f4 299 return ret;
d38ceaf9
AD
300}
301
421a2a30
ML
302/*
303 * MMIO register read with bytes helper functions
304 * @offset:bytes offset from MMIO start
305 *
306*/
307
e3ecdffa
AD
308/**
309 * amdgpu_mm_rreg8 - read a memory mapped IO register
310 *
311 * @adev: amdgpu_device pointer
312 * @offset: byte aligned register offset
313 *
314 * Returns the 8 bit value from the offset specified.
315 */
421a2a30
ML
316uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
317 if (offset < adev->rmmio_size)
318 return (readb(adev->rmmio + offset));
319 BUG();
320}
321
322/*
323 * MMIO register write with bytes helper functions
324 * @offset:bytes offset from MMIO start
325 * @value: the value want to be written to the register
326 *
327*/
e3ecdffa
AD
328/**
329 * amdgpu_mm_wreg8 - read a memory mapped IO register
330 *
331 * @adev: amdgpu_device pointer
332 * @offset: byte aligned register offset
333 * @value: 8 bit value to write
334 *
335 * Writes the value specified to the offset specified.
336 */
421a2a30
ML
337void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
338 if (offset < adev->rmmio_size)
339 writeb(value, adev->rmmio + offset);
340 else
341 BUG();
342}
343
2eee0229
HZ
344void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
345 uint32_t v, uint32_t acc_flags)
2e0cc4d4 346{
2eee0229 347 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 348
ec59847e 349 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 350 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
351 else
352 adev->pcie_wreg(adev, (reg * 4), v);
2e0cc4d4
ML
353}
354
e3ecdffa 355/**
2eee0229 356 * amdgpu_device_wreg - write to a register
e3ecdffa
AD
357 *
358 * @adev: amdgpu_device pointer
359 * @reg: dword aligned register offset
360 * @v: 32 bit value to write to the register
361 * @acc_flags: access flags which require special behavior
362 *
363 * Writes the value specified to the offset specified.
364 */
2eee0229
HZ
365void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
366 uint32_t acc_flags)
d38ceaf9 367{
f384ff95 368 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 369 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 370
2eee0229 371 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
2e0cc4d4 372}
d38ceaf9 373
2e0cc4d4
ML
374/*
375 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
376 *
377 * this function is invoked only the debugfs register access
378 * */
379void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
380 uint32_t acc_flags)
381{
382 if (amdgpu_sriov_fullaccess(adev) &&
383 adev->gfx.rlc.funcs &&
384 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 385
2e0cc4d4
ML
386 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
387 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 388 }
2e0cc4d4 389
2eee0229 390 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
d38ceaf9
AD
391}
392
e3ecdffa
AD
393/**
394 * amdgpu_io_rreg - read an IO register
395 *
396 * @adev: amdgpu_device pointer
397 * @reg: dword aligned register offset
398 *
399 * Returns the 32 bit value from the offset specified.
400 */
d38ceaf9
AD
401u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
402{
403 if ((reg * 4) < adev->rio_mem_size)
404 return ioread32(adev->rio_mem + (reg * 4));
405 else {
406 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
407 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
408 }
409}
410
e3ecdffa
AD
411/**
412 * amdgpu_io_wreg - write to an IO register
413 *
414 * @adev: amdgpu_device pointer
415 * @reg: dword aligned register offset
416 * @v: 32 bit value to write to the register
417 *
418 * Writes the value specified to the offset specified.
419 */
d38ceaf9
AD
420void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
421{
d38ceaf9
AD
422 if ((reg * 4) < adev->rio_mem_size)
423 iowrite32(v, adev->rio_mem + (reg * 4));
424 else {
425 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
426 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
427 }
428}
429
430/**
431 * amdgpu_mm_rdoorbell - read a doorbell dword
432 *
433 * @adev: amdgpu_device pointer
434 * @index: doorbell index
435 *
436 * Returns the value in the doorbell aperture at the
437 * requested doorbell index (CIK).
438 */
439u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
440{
441 if (index < adev->doorbell.num_doorbells) {
442 return readl(adev->doorbell.ptr + index);
443 } else {
444 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
445 return 0;
446 }
447}
448
449/**
450 * amdgpu_mm_wdoorbell - write a doorbell dword
451 *
452 * @adev: amdgpu_device pointer
453 * @index: doorbell index
454 * @v: value to write
455 *
456 * Writes @v to the doorbell aperture at the
457 * requested doorbell index (CIK).
458 */
459void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
460{
461 if (index < adev->doorbell.num_doorbells) {
462 writel(v, adev->doorbell.ptr + index);
463 } else {
464 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
465 }
466}
467
832be404
KW
468/**
469 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
470 *
471 * @adev: amdgpu_device pointer
472 * @index: doorbell index
473 *
474 * Returns the value in the doorbell aperture at the
475 * requested doorbell index (VEGA10+).
476 */
477u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
478{
479 if (index < adev->doorbell.num_doorbells) {
480 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
481 } else {
482 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
483 return 0;
484 }
485}
486
487/**
488 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
489 *
490 * @adev: amdgpu_device pointer
491 * @index: doorbell index
492 * @v: value to write
493 *
494 * Writes @v to the doorbell aperture at the
495 * requested doorbell index (VEGA10+).
496 */
497void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
498{
499 if (index < adev->doorbell.num_doorbells) {
500 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
501 } else {
502 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
503 }
504}
505
d38ceaf9
AD
506/**
507 * amdgpu_invalid_rreg - dummy reg read function
508 *
509 * @adev: amdgpu device pointer
510 * @reg: offset of register
511 *
512 * Dummy register read function. Used for register blocks
513 * that certain asics don't have (all asics).
514 * Returns the value in the register.
515 */
516static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
517{
518 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
519 BUG();
520 return 0;
521}
522
523/**
524 * amdgpu_invalid_wreg - dummy reg write function
525 *
526 * @adev: amdgpu device pointer
527 * @reg: offset of register
528 * @v: value to write to the register
529 *
530 * Dummy register read function. Used for register blocks
531 * that certain asics don't have (all asics).
532 */
533static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
534{
535 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
536 reg, v);
537 BUG();
538}
539
4fa1c6a6
TZ
540/**
541 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
542 *
543 * @adev: amdgpu device pointer
544 * @reg: offset of register
545 *
546 * Dummy register read function. Used for register blocks
547 * that certain asics don't have (all asics).
548 * Returns the value in the register.
549 */
550static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
551{
552 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
553 BUG();
554 return 0;
555}
556
557/**
558 * amdgpu_invalid_wreg64 - dummy reg write function
559 *
560 * @adev: amdgpu device pointer
561 * @reg: offset of register
562 * @v: value to write to the register
563 *
564 * Dummy register read function. Used for register blocks
565 * that certain asics don't have (all asics).
566 */
567static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
568{
569 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
570 reg, v);
571 BUG();
572}
573
d38ceaf9
AD
574/**
575 * amdgpu_block_invalid_rreg - dummy reg read function
576 *
577 * @adev: amdgpu device pointer
578 * @block: offset of instance
579 * @reg: offset of register
580 *
581 * Dummy register read function. Used for register blocks
582 * that certain asics don't have (all asics).
583 * Returns the value in the register.
584 */
585static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
586 uint32_t block, uint32_t reg)
587{
588 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
589 reg, block);
590 BUG();
591 return 0;
592}
593
594/**
595 * amdgpu_block_invalid_wreg - dummy reg write function
596 *
597 * @adev: amdgpu device pointer
598 * @block: offset of instance
599 * @reg: offset of register
600 * @v: value to write to the register
601 *
602 * Dummy register read function. Used for register blocks
603 * that certain asics don't have (all asics).
604 */
605static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
606 uint32_t block,
607 uint32_t reg, uint32_t v)
608{
609 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
610 reg, block, v);
611 BUG();
612}
613
e3ecdffa
AD
614/**
615 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
616 *
617 * @adev: amdgpu device pointer
618 *
619 * Allocates a scratch page of VRAM for use by various things in the
620 * driver.
621 */
06ec9070 622static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 623{
a4a02777
CK
624 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
625 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
626 &adev->vram_scratch.robj,
627 &adev->vram_scratch.gpu_addr,
628 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
629}
630
e3ecdffa
AD
631/**
632 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
633 *
634 * @adev: amdgpu device pointer
635 *
636 * Frees the VRAM scratch page.
637 */
06ec9070 638static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 639{
078af1a3 640 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
641}
642
643/**
9c3f2b54 644 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
645 *
646 * @adev: amdgpu_device pointer
647 * @registers: pointer to the register array
648 * @array_size: size of the register array
649 *
650 * Programs an array or registers with and and or masks.
651 * This is a helper for setting golden registers.
652 */
9c3f2b54
AD
653void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
654 const u32 *registers,
655 const u32 array_size)
d38ceaf9
AD
656{
657 u32 tmp, reg, and_mask, or_mask;
658 int i;
659
660 if (array_size % 3)
661 return;
662
663 for (i = 0; i < array_size; i +=3) {
664 reg = registers[i + 0];
665 and_mask = registers[i + 1];
666 or_mask = registers[i + 2];
667
668 if (and_mask == 0xffffffff) {
669 tmp = or_mask;
670 } else {
671 tmp = RREG32(reg);
672 tmp &= ~and_mask;
e0d07657
HZ
673 if (adev->family >= AMDGPU_FAMILY_AI)
674 tmp |= (or_mask & and_mask);
675 else
676 tmp |= or_mask;
d38ceaf9
AD
677 }
678 WREG32(reg, tmp);
679 }
680}
681
e3ecdffa
AD
682/**
683 * amdgpu_device_pci_config_reset - reset the GPU
684 *
685 * @adev: amdgpu_device pointer
686 *
687 * Resets the GPU using the pci config reset sequence.
688 * Only applicable to asics prior to vega10.
689 */
8111c387 690void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
691{
692 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
693}
694
695/*
696 * GPU doorbell aperture helpers function.
697 */
698/**
06ec9070 699 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
700 *
701 * @adev: amdgpu_device pointer
702 *
703 * Init doorbell driver information (CIK)
704 * Returns 0 on success, error on failure.
705 */
06ec9070 706static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 707{
6585661d 708
705e519e
CK
709 /* No doorbell on SI hardware generation */
710 if (adev->asic_type < CHIP_BONAIRE) {
711 adev->doorbell.base = 0;
712 adev->doorbell.size = 0;
713 adev->doorbell.num_doorbells = 0;
714 adev->doorbell.ptr = NULL;
715 return 0;
716 }
717
d6895ad3
CK
718 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
719 return -EINVAL;
720
22357775
AD
721 amdgpu_asic_init_doorbell_index(adev);
722
d38ceaf9
AD
723 /* doorbell bar mapping */
724 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
725 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
726
edf600da 727 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 728 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
729 if (adev->doorbell.num_doorbells == 0)
730 return -EINVAL;
731
ec3db8a6 732 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
733 * paging queue doorbell use the second page. The
734 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
735 * doorbells are in the first page. So with paging queue enabled,
736 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
737 */
738 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 739 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 740
8972e5d2
CK
741 adev->doorbell.ptr = ioremap(adev->doorbell.base,
742 adev->doorbell.num_doorbells *
743 sizeof(u32));
744 if (adev->doorbell.ptr == NULL)
d38ceaf9 745 return -ENOMEM;
d38ceaf9
AD
746
747 return 0;
748}
749
750/**
06ec9070 751 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
752 *
753 * @adev: amdgpu_device pointer
754 *
755 * Tear down doorbell driver information (CIK)
756 */
06ec9070 757static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
758{
759 iounmap(adev->doorbell.ptr);
760 adev->doorbell.ptr = NULL;
761}
762
22cb0164 763
d38ceaf9
AD
764
765/*
06ec9070 766 * amdgpu_device_wb_*()
455a7bc2 767 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 768 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
769 */
770
771/**
06ec9070 772 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
773 *
774 * @adev: amdgpu_device pointer
775 *
776 * Disables Writeback and frees the Writeback memory (all asics).
777 * Used at driver shutdown.
778 */
06ec9070 779static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
780{
781 if (adev->wb.wb_obj) {
a76ed485
AD
782 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
783 &adev->wb.gpu_addr,
784 (void **)&adev->wb.wb);
d38ceaf9
AD
785 adev->wb.wb_obj = NULL;
786 }
787}
788
789/**
06ec9070 790 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
791 *
792 * @adev: amdgpu_device pointer
793 *
455a7bc2 794 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
795 * Used at driver startup.
796 * Returns 0 on success or an -error on failure.
797 */
06ec9070 798static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
799{
800 int r;
801
802 if (adev->wb.wb_obj == NULL) {
97407b63
AD
803 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
804 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
805 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
806 &adev->wb.wb_obj, &adev->wb.gpu_addr,
807 (void **)&adev->wb.wb);
d38ceaf9
AD
808 if (r) {
809 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
810 return r;
811 }
d38ceaf9
AD
812
813 adev->wb.num_wb = AMDGPU_MAX_WB;
814 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
815
816 /* clear wb memory */
73469585 817 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
818 }
819
820 return 0;
821}
822
823/**
131b4b36 824 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
825 *
826 * @adev: amdgpu_device pointer
827 * @wb: wb index
828 *
829 * Allocate a wb slot for use by the driver (all asics).
830 * Returns 0 on success or -EINVAL on failure.
831 */
131b4b36 832int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
833{
834 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 835
97407b63 836 if (offset < adev->wb.num_wb) {
7014285a 837 __set_bit(offset, adev->wb.used);
63ae07ca 838 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
839 return 0;
840 } else {
841 return -EINVAL;
842 }
843}
844
d38ceaf9 845/**
131b4b36 846 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
847 *
848 * @adev: amdgpu_device pointer
849 * @wb: wb index
850 *
851 * Free a wb slot allocated for use by the driver (all asics)
852 */
131b4b36 853void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 854{
73469585 855 wb >>= 3;
d38ceaf9 856 if (wb < adev->wb.num_wb)
73469585 857 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
858}
859
d6895ad3
CK
860/**
861 * amdgpu_device_resize_fb_bar - try to resize FB BAR
862 *
863 * @adev: amdgpu_device pointer
864 *
865 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
866 * to fail, but if any of the BARs is not accessible after the size we abort
867 * driver loading by returning -ENODEV.
868 */
869int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
870{
770d13b1 871 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 872 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
873 struct pci_bus *root;
874 struct resource *res;
875 unsigned i;
d6895ad3
CK
876 u16 cmd;
877 int r;
878
0c03b912 879 /* Bypass for VF */
880 if (amdgpu_sriov_vf(adev))
881 return 0;
882
31b8adab
CK
883 /* Check if the root BUS has 64bit memory resources */
884 root = adev->pdev->bus;
885 while (root->parent)
886 root = root->parent;
887
888 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 889 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
890 res->start > 0x100000000ull)
891 break;
892 }
893
894 /* Trying to resize is pointless without a root hub window above 4GB */
895 if (!res)
896 return 0;
897
d6895ad3
CK
898 /* Disable memory decoding while we change the BAR addresses and size */
899 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
900 pci_write_config_word(adev->pdev, PCI_COMMAND,
901 cmd & ~PCI_COMMAND_MEMORY);
902
903 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 904 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
905 if (adev->asic_type >= CHIP_BONAIRE)
906 pci_release_resource(adev->pdev, 2);
907
908 pci_release_resource(adev->pdev, 0);
909
910 r = pci_resize_resource(adev->pdev, 0, rbar_size);
911 if (r == -ENOSPC)
912 DRM_INFO("Not enough PCI address space for a large BAR.");
913 else if (r && r != -ENOTSUPP)
914 DRM_ERROR("Problem resizing BAR0 (%d).", r);
915
916 pci_assign_unassigned_bus_resources(adev->pdev->bus);
917
918 /* When the doorbell or fb BAR isn't available we have no chance of
919 * using the device.
920 */
06ec9070 921 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
922 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
923 return -ENODEV;
924
925 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
926
927 return 0;
928}
a05502e5 929
d38ceaf9
AD
930/*
931 * GPU helpers function.
932 */
933/**
39c640c0 934 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
935 *
936 * @adev: amdgpu_device pointer
937 *
c836fec5
JQ
938 * Check if the asic has been initialized (all asics) at driver startup
939 * or post is needed if hw reset is performed.
940 * Returns true if need or false if not.
d38ceaf9 941 */
39c640c0 942bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
943{
944 uint32_t reg;
945
bec86378
ML
946 if (amdgpu_sriov_vf(adev))
947 return false;
948
949 if (amdgpu_passthrough(adev)) {
1da2c326
ML
950 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
951 * some old smc fw still need driver do vPost otherwise gpu hang, while
952 * those smc fw version above 22.15 doesn't have this flaw, so we force
953 * vpost executed for smc version below 22.15
bec86378
ML
954 */
955 if (adev->asic_type == CHIP_FIJI) {
956 int err;
957 uint32_t fw_ver;
958 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
959 /* force vPost if error occured */
960 if (err)
961 return true;
962
963 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
964 if (fw_ver < 0x00160e00)
965 return true;
bec86378 966 }
bec86378 967 }
91fe77eb 968
969 if (adev->has_hw_reset) {
970 adev->has_hw_reset = false;
971 return true;
972 }
973
974 /* bios scratch used on CIK+ */
975 if (adev->asic_type >= CHIP_BONAIRE)
976 return amdgpu_atombios_scratch_need_asic_init(adev);
977
978 /* check MEM_SIZE for older asics */
979 reg = amdgpu_asic_get_config_memsize(adev);
980
981 if ((reg != 0) && (reg != 0xffffffff))
982 return false;
983
984 return true;
bec86378
ML
985}
986
d38ceaf9
AD
987/* if we get transitioned to only one device, take VGA back */
988/**
06ec9070 989 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
990 *
991 * @cookie: amdgpu_device pointer
992 * @state: enable/disable vga decode
993 *
994 * Enable/disable vga decode (all asics).
995 * Returns VGA resource flags.
996 */
06ec9070 997static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
998{
999 struct amdgpu_device *adev = cookie;
1000 amdgpu_asic_set_vga_state(adev, state);
1001 if (state)
1002 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1003 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1004 else
1005 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1006}
1007
e3ecdffa
AD
1008/**
1009 * amdgpu_device_check_block_size - validate the vm block size
1010 *
1011 * @adev: amdgpu_device pointer
1012 *
1013 * Validates the vm block size specified via module parameter.
1014 * The vm block size defines number of bits in page table versus page directory,
1015 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1016 * page table and the remaining bits are in the page directory.
1017 */
06ec9070 1018static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1019{
1020 /* defines number of bits in page table versus page directory,
1021 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1022 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1023 if (amdgpu_vm_block_size == -1)
1024 return;
a1adf8be 1025
bab4fee7 1026 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1027 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1028 amdgpu_vm_block_size);
97489129 1029 amdgpu_vm_block_size = -1;
a1adf8be 1030 }
a1adf8be
CZ
1031}
1032
e3ecdffa
AD
1033/**
1034 * amdgpu_device_check_vm_size - validate the vm size
1035 *
1036 * @adev: amdgpu_device pointer
1037 *
1038 * Validates the vm size in GB specified via module parameter.
1039 * The VM size is the size of the GPU virtual memory space in GB.
1040 */
06ec9070 1041static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1042{
64dab074
AD
1043 /* no need to check the default value */
1044 if (amdgpu_vm_size == -1)
1045 return;
1046
83ca145d
ZJ
1047 if (amdgpu_vm_size < 1) {
1048 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1049 amdgpu_vm_size);
f3368128 1050 amdgpu_vm_size = -1;
83ca145d 1051 }
83ca145d
ZJ
1052}
1053
7951e376
RZ
1054static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1055{
1056 struct sysinfo si;
a9d4fe2f 1057 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1058 uint64_t total_memory;
1059 uint64_t dram_size_seven_GB = 0x1B8000000;
1060 uint64_t dram_size_three_GB = 0xB8000000;
1061
1062 if (amdgpu_smu_memory_pool_size == 0)
1063 return;
1064
1065 if (!is_os_64) {
1066 DRM_WARN("Not 64-bit OS, feature not supported\n");
1067 goto def_value;
1068 }
1069 si_meminfo(&si);
1070 total_memory = (uint64_t)si.totalram * si.mem_unit;
1071
1072 if ((amdgpu_smu_memory_pool_size == 1) ||
1073 (amdgpu_smu_memory_pool_size == 2)) {
1074 if (total_memory < dram_size_three_GB)
1075 goto def_value1;
1076 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1077 (amdgpu_smu_memory_pool_size == 8)) {
1078 if (total_memory < dram_size_seven_GB)
1079 goto def_value1;
1080 } else {
1081 DRM_WARN("Smu memory pool size not supported\n");
1082 goto def_value;
1083 }
1084 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1085
1086 return;
1087
1088def_value1:
1089 DRM_WARN("No enough system memory\n");
1090def_value:
1091 adev->pm.smu_prv_buffer_size = 0;
1092}
1093
d38ceaf9 1094/**
06ec9070 1095 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1096 *
1097 * @adev: amdgpu_device pointer
1098 *
1099 * Validates certain module parameters and updates
1100 * the associated values used by the driver (all asics).
1101 */
912dfc84 1102static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1103{
5b011235
CZ
1104 if (amdgpu_sched_jobs < 4) {
1105 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1106 amdgpu_sched_jobs);
1107 amdgpu_sched_jobs = 4;
76117507 1108 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1109 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1110 amdgpu_sched_jobs);
1111 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1112 }
d38ceaf9 1113
83e74db6 1114 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1115 /* gart size must be greater or equal to 32M */
1116 dev_warn(adev->dev, "gart size (%d) too small\n",
1117 amdgpu_gart_size);
83e74db6 1118 amdgpu_gart_size = -1;
d38ceaf9
AD
1119 }
1120
36d38372 1121 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1122 /* gtt size must be greater or equal to 32M */
36d38372
CK
1123 dev_warn(adev->dev, "gtt size (%d) too small\n",
1124 amdgpu_gtt_size);
1125 amdgpu_gtt_size = -1;
d38ceaf9
AD
1126 }
1127
d07f14be
RH
1128 /* valid range is between 4 and 9 inclusive */
1129 if (amdgpu_vm_fragment_size != -1 &&
1130 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1131 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1132 amdgpu_vm_fragment_size = -1;
1133 }
1134
7951e376
RZ
1135 amdgpu_device_check_smu_prv_buffer_size(adev);
1136
06ec9070 1137 amdgpu_device_check_vm_size(adev);
d38ceaf9 1138
06ec9070 1139 amdgpu_device_check_block_size(adev);
6a7f76e7 1140
19aede77 1141 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1142
e3c00faa 1143 return 0;
d38ceaf9
AD
1144}
1145
1146/**
1147 * amdgpu_switcheroo_set_state - set switcheroo state
1148 *
1149 * @pdev: pci dev pointer
1694467b 1150 * @state: vga_switcheroo state
d38ceaf9
AD
1151 *
1152 * Callback for the switcheroo driver. Suspends or resumes the
1153 * the asics before or after it is powered up using ACPI methods.
1154 */
1155static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1156{
1157 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1158 int r;
d38ceaf9 1159
31af062a 1160 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1161 return;
1162
1163 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1164 pr_info("switched on\n");
d38ceaf9
AD
1165 /* don't suspend or resume card normally */
1166 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1167
de185019
AD
1168 pci_set_power_state(dev->pdev, PCI_D0);
1169 pci_restore_state(dev->pdev);
1170 r = pci_enable_device(dev->pdev);
1171 if (r)
1172 DRM_WARN("pci_enable_device failed (%d)\n", r);
1173 amdgpu_device_resume(dev, true);
d38ceaf9 1174
d38ceaf9
AD
1175 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1176 drm_kms_helper_poll_enable(dev);
1177 } else {
dd4fa6c1 1178 pr_info("switched off\n");
d38ceaf9
AD
1179 drm_kms_helper_poll_disable(dev);
1180 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1181 amdgpu_device_suspend(dev, true);
1182 pci_save_state(dev->pdev);
1183 /* Shut down the device */
1184 pci_disable_device(dev->pdev);
1185 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1186 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1187 }
1188}
1189
1190/**
1191 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1192 *
1193 * @pdev: pci dev pointer
1194 *
1195 * Callback for the switcheroo driver. Check of the switcheroo
1196 * state can be changed.
1197 * Returns true if the state can be changed, false if not.
1198 */
1199static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1200{
1201 struct drm_device *dev = pci_get_drvdata(pdev);
1202
1203 /*
1204 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1205 * locking inversion with the driver load path. And the access here is
1206 * completely racy anyway. So don't bother with locking for now.
1207 */
7e13ad89 1208 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1209}
1210
1211static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1212 .set_gpu_state = amdgpu_switcheroo_set_state,
1213 .reprobe = NULL,
1214 .can_switch = amdgpu_switcheroo_can_switch,
1215};
1216
e3ecdffa
AD
1217/**
1218 * amdgpu_device_ip_set_clockgating_state - set the CG state
1219 *
87e3f136 1220 * @dev: amdgpu_device pointer
e3ecdffa
AD
1221 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1222 * @state: clockgating state (gate or ungate)
1223 *
1224 * Sets the requested clockgating state for all instances of
1225 * the hardware IP specified.
1226 * Returns the error code from the last instance.
1227 */
43fa561f 1228int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1229 enum amd_ip_block_type block_type,
1230 enum amd_clockgating_state state)
d38ceaf9 1231{
43fa561f 1232 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1233 int i, r = 0;
1234
1235 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1236 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1237 continue;
c722865a
RZ
1238 if (adev->ip_blocks[i].version->type != block_type)
1239 continue;
1240 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1241 continue;
1242 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1243 (void *)adev, state);
1244 if (r)
1245 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1246 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1247 }
1248 return r;
1249}
1250
e3ecdffa
AD
1251/**
1252 * amdgpu_device_ip_set_powergating_state - set the PG state
1253 *
87e3f136 1254 * @dev: amdgpu_device pointer
e3ecdffa
AD
1255 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1256 * @state: powergating state (gate or ungate)
1257 *
1258 * Sets the requested powergating state for all instances of
1259 * the hardware IP specified.
1260 * Returns the error code from the last instance.
1261 */
43fa561f 1262int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1263 enum amd_ip_block_type block_type,
1264 enum amd_powergating_state state)
d38ceaf9 1265{
43fa561f 1266 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1267 int i, r = 0;
1268
1269 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1270 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1271 continue;
c722865a
RZ
1272 if (adev->ip_blocks[i].version->type != block_type)
1273 continue;
1274 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1275 continue;
1276 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1277 (void *)adev, state);
1278 if (r)
1279 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1280 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1281 }
1282 return r;
1283}
1284
e3ecdffa
AD
1285/**
1286 * amdgpu_device_ip_get_clockgating_state - get the CG state
1287 *
1288 * @adev: amdgpu_device pointer
1289 * @flags: clockgating feature flags
1290 *
1291 * Walks the list of IPs on the device and updates the clockgating
1292 * flags for each IP.
1293 * Updates @flags with the feature flags for each hardware IP where
1294 * clockgating is enabled.
1295 */
2990a1fc
AD
1296void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1297 u32 *flags)
6cb2d4e4
HR
1298{
1299 int i;
1300
1301 for (i = 0; i < adev->num_ip_blocks; i++) {
1302 if (!adev->ip_blocks[i].status.valid)
1303 continue;
1304 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1305 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1306 }
1307}
1308
e3ecdffa
AD
1309/**
1310 * amdgpu_device_ip_wait_for_idle - wait for idle
1311 *
1312 * @adev: amdgpu_device pointer
1313 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1314 *
1315 * Waits for the request hardware IP to be idle.
1316 * Returns 0 for success or a negative error code on failure.
1317 */
2990a1fc
AD
1318int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1319 enum amd_ip_block_type block_type)
5dbbb60b
AD
1320{
1321 int i, r;
1322
1323 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1324 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1325 continue;
a1255107
AD
1326 if (adev->ip_blocks[i].version->type == block_type) {
1327 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1328 if (r)
1329 return r;
1330 break;
1331 }
1332 }
1333 return 0;
1334
1335}
1336
e3ecdffa
AD
1337/**
1338 * amdgpu_device_ip_is_idle - is the hardware IP idle
1339 *
1340 * @adev: amdgpu_device pointer
1341 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1342 *
1343 * Check if the hardware IP is idle or not.
1344 * Returns true if it the IP is idle, false if not.
1345 */
2990a1fc
AD
1346bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1347 enum amd_ip_block_type block_type)
5dbbb60b
AD
1348{
1349 int i;
1350
1351 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1352 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1353 continue;
a1255107
AD
1354 if (adev->ip_blocks[i].version->type == block_type)
1355 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1356 }
1357 return true;
1358
1359}
1360
e3ecdffa
AD
1361/**
1362 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1363 *
1364 * @adev: amdgpu_device pointer
87e3f136 1365 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1366 *
1367 * Returns a pointer to the hardware IP block structure
1368 * if it exists for the asic, otherwise NULL.
1369 */
2990a1fc
AD
1370struct amdgpu_ip_block *
1371amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1372 enum amd_ip_block_type type)
d38ceaf9
AD
1373{
1374 int i;
1375
1376 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1377 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1378 return &adev->ip_blocks[i];
1379
1380 return NULL;
1381}
1382
1383/**
2990a1fc 1384 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1385 *
1386 * @adev: amdgpu_device pointer
5fc3aeeb 1387 * @type: enum amd_ip_block_type
d38ceaf9
AD
1388 * @major: major version
1389 * @minor: minor version
1390 *
1391 * return 0 if equal or greater
1392 * return 1 if smaller or the ip_block doesn't exist
1393 */
2990a1fc
AD
1394int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1395 enum amd_ip_block_type type,
1396 u32 major, u32 minor)
d38ceaf9 1397{
2990a1fc 1398 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1399
a1255107
AD
1400 if (ip_block && ((ip_block->version->major > major) ||
1401 ((ip_block->version->major == major) &&
1402 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1403 return 0;
1404
1405 return 1;
1406}
1407
a1255107 1408/**
2990a1fc 1409 * amdgpu_device_ip_block_add
a1255107
AD
1410 *
1411 * @adev: amdgpu_device pointer
1412 * @ip_block_version: pointer to the IP to add
1413 *
1414 * Adds the IP block driver information to the collection of IPs
1415 * on the asic.
1416 */
2990a1fc
AD
1417int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1418 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1419{
1420 if (!ip_block_version)
1421 return -EINVAL;
1422
e966a725 1423 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1424 ip_block_version->funcs->name);
1425
a1255107
AD
1426 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1427
1428 return 0;
1429}
1430
e3ecdffa
AD
1431/**
1432 * amdgpu_device_enable_virtual_display - enable virtual display feature
1433 *
1434 * @adev: amdgpu_device pointer
1435 *
1436 * Enabled the virtual display feature if the user has enabled it via
1437 * the module parameter virtual_display. This feature provides a virtual
1438 * display hardware on headless boards or in virtualized environments.
1439 * This function parses and validates the configuration string specified by
1440 * the user and configues the virtual display configuration (number of
1441 * virtual connectors, crtcs, etc.) specified.
1442 */
483ef985 1443static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1444{
1445 adev->enable_virtual_display = false;
1446
1447 if (amdgpu_virtual_display) {
1448 struct drm_device *ddev = adev->ddev;
1449 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1450 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1451
1452 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1453 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1454 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1455 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1456 if (!strcmp("all", pciaddname)
1457 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1458 long num_crtc;
1459 int res = -1;
1460
9accf2fd 1461 adev->enable_virtual_display = true;
0f66356d
ED
1462
1463 if (pciaddname_tmp)
1464 res = kstrtol(pciaddname_tmp, 10,
1465 &num_crtc);
1466
1467 if (!res) {
1468 if (num_crtc < 1)
1469 num_crtc = 1;
1470 if (num_crtc > 6)
1471 num_crtc = 6;
1472 adev->mode_info.num_crtc = num_crtc;
1473 } else {
1474 adev->mode_info.num_crtc = 1;
1475 }
9accf2fd
ED
1476 break;
1477 }
1478 }
1479
0f66356d
ED
1480 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1481 amdgpu_virtual_display, pci_address_name,
1482 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1483
1484 kfree(pciaddstr);
1485 }
1486}
1487
e3ecdffa
AD
1488/**
1489 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1490 *
1491 * @adev: amdgpu_device pointer
1492 *
1493 * Parses the asic configuration parameters specified in the gpu info
1494 * firmware and makes them availale to the driver for use in configuring
1495 * the asic.
1496 * Returns 0 on success, -EINVAL on failure.
1497 */
e2a75f88
AD
1498static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1499{
e2a75f88
AD
1500 const char *chip_name;
1501 char fw_name[30];
1502 int err;
1503 const struct gpu_info_firmware_header_v1_0 *hdr;
1504
ab4fe3e1
HR
1505 adev->firmware.gpu_info_fw = NULL;
1506
e2a75f88
AD
1507 switch (adev->asic_type) {
1508 case CHIP_TOPAZ:
1509 case CHIP_TONGA:
1510 case CHIP_FIJI:
e2a75f88 1511 case CHIP_POLARIS10:
cc07f18d 1512 case CHIP_POLARIS11:
e2a75f88 1513 case CHIP_POLARIS12:
cc07f18d 1514 case CHIP_VEGAM:
e2a75f88
AD
1515 case CHIP_CARRIZO:
1516 case CHIP_STONEY:
1517#ifdef CONFIG_DRM_AMDGPU_SI
1518 case CHIP_VERDE:
1519 case CHIP_TAHITI:
1520 case CHIP_PITCAIRN:
1521 case CHIP_OLAND:
1522 case CHIP_HAINAN:
1523#endif
1524#ifdef CONFIG_DRM_AMDGPU_CIK
1525 case CHIP_BONAIRE:
1526 case CHIP_HAWAII:
1527 case CHIP_KAVERI:
1528 case CHIP_KABINI:
1529 case CHIP_MULLINS:
1530#endif
27c0bc71 1531 case CHIP_VEGA20:
e2a75f88
AD
1532 default:
1533 return 0;
1534 case CHIP_VEGA10:
1535 chip_name = "vega10";
1536 break;
3f76dced
AD
1537 case CHIP_VEGA12:
1538 chip_name = "vega12";
1539 break;
2d2e5e7e 1540 case CHIP_RAVEN:
54c4d17e
FX
1541 if (adev->rev_id >= 8)
1542 chip_name = "raven2";
741deade
AD
1543 else if (adev->pdev->device == 0x15d8)
1544 chip_name = "picasso";
54c4d17e
FX
1545 else
1546 chip_name = "raven";
2d2e5e7e 1547 break;
65e60f6e
LM
1548 case CHIP_ARCTURUS:
1549 chip_name = "arcturus";
1550 break;
b51a26a0
HR
1551 case CHIP_RENOIR:
1552 chip_name = "renoir";
1553 break;
23c6268e
HR
1554 case CHIP_NAVI10:
1555 chip_name = "navi10";
1556 break;
ed42cfe1
XY
1557 case CHIP_NAVI14:
1558 chip_name = "navi14";
1559 break;
42b325e5
XY
1560 case CHIP_NAVI12:
1561 chip_name = "navi12";
1562 break;
e2a75f88
AD
1563 }
1564
1565 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1566 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1567 if (err) {
1568 dev_err(adev->dev,
1569 "Failed to load gpu_info firmware \"%s\"\n",
1570 fw_name);
1571 goto out;
1572 }
ab4fe3e1 1573 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1574 if (err) {
1575 dev_err(adev->dev,
1576 "Failed to validate gpu_info firmware \"%s\"\n",
1577 fw_name);
1578 goto out;
1579 }
1580
ab4fe3e1 1581 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1582 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1583
1584 switch (hdr->version_major) {
1585 case 1:
1586 {
1587 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1588 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1589 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1590
ec51d3fa
XY
1591 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1592 goto parse_soc_bounding_box;
1593
b5ab16bf
AD
1594 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1595 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1596 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1597 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1598 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1599 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1600 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1601 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1602 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1603 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1604 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1605 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1606 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1607 adev->gfx.cu_info.max_waves_per_simd =
1608 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1609 adev->gfx.cu_info.max_scratch_slots_per_cu =
1610 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1611 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1612 if (hdr->version_minor >= 1) {
35c2e910
HZ
1613 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1614 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1615 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1616 adev->gfx.config.num_sc_per_sh =
1617 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1618 adev->gfx.config.num_packer_per_sc =
1619 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1620 }
ec51d3fa
XY
1621
1622parse_soc_bounding_box:
ec51d3fa
XY
1623 /*
1624 * soc bounding box info is not integrated in disocovery table,
1625 * we always need to parse it from gpu info firmware.
1626 */
48321c3d
HW
1627 if (hdr->version_minor == 2) {
1628 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1629 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1630 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1631 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1632 }
e2a75f88
AD
1633 break;
1634 }
1635 default:
1636 dev_err(adev->dev,
1637 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1638 err = -EINVAL;
1639 goto out;
1640 }
1641out:
e2a75f88
AD
1642 return err;
1643}
1644
e3ecdffa
AD
1645/**
1646 * amdgpu_device_ip_early_init - run early init for hardware IPs
1647 *
1648 * @adev: amdgpu_device pointer
1649 *
1650 * Early initialization pass for hardware IPs. The hardware IPs that make
1651 * up each asic are discovered each IP's early_init callback is run. This
1652 * is the first stage in initializing the asic.
1653 * Returns 0 on success, negative error code on failure.
1654 */
06ec9070 1655static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1656{
aaa36a97 1657 int i, r;
d38ceaf9 1658
483ef985 1659 amdgpu_device_enable_virtual_display(adev);
a6be7570 1660
d38ceaf9 1661 switch (adev->asic_type) {
aaa36a97
AD
1662 case CHIP_TOPAZ:
1663 case CHIP_TONGA:
48299f95 1664 case CHIP_FIJI:
2cc0c0b5 1665 case CHIP_POLARIS10:
32cc7e53 1666 case CHIP_POLARIS11:
c4642a47 1667 case CHIP_POLARIS12:
32cc7e53 1668 case CHIP_VEGAM:
aaa36a97 1669 case CHIP_CARRIZO:
39bb0c92
SL
1670 case CHIP_STONEY:
1671 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1672 adev->family = AMDGPU_FAMILY_CZ;
1673 else
1674 adev->family = AMDGPU_FAMILY_VI;
1675
1676 r = vi_set_ip_blocks(adev);
1677 if (r)
1678 return r;
1679 break;
33f34802
KW
1680#ifdef CONFIG_DRM_AMDGPU_SI
1681 case CHIP_VERDE:
1682 case CHIP_TAHITI:
1683 case CHIP_PITCAIRN:
1684 case CHIP_OLAND:
1685 case CHIP_HAINAN:
295d0daf 1686 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1687 r = si_set_ip_blocks(adev);
1688 if (r)
1689 return r;
1690 break;
1691#endif
a2e73f56
AD
1692#ifdef CONFIG_DRM_AMDGPU_CIK
1693 case CHIP_BONAIRE:
1694 case CHIP_HAWAII:
1695 case CHIP_KAVERI:
1696 case CHIP_KABINI:
1697 case CHIP_MULLINS:
1698 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1699 adev->family = AMDGPU_FAMILY_CI;
1700 else
1701 adev->family = AMDGPU_FAMILY_KV;
1702
1703 r = cik_set_ip_blocks(adev);
1704 if (r)
1705 return r;
1706 break;
1707#endif
e48a3cd9
AD
1708 case CHIP_VEGA10:
1709 case CHIP_VEGA12:
e4bd8170 1710 case CHIP_VEGA20:
e48a3cd9 1711 case CHIP_RAVEN:
61cf44c1 1712 case CHIP_ARCTURUS:
b51a26a0
HR
1713 case CHIP_RENOIR:
1714 if (adev->asic_type == CHIP_RAVEN ||
1715 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1716 adev->family = AMDGPU_FAMILY_RV;
1717 else
1718 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1719
1720 r = soc15_set_ip_blocks(adev);
1721 if (r)
1722 return r;
1723 break;
0a5b8c7b 1724 case CHIP_NAVI10:
7ecb5cd4 1725 case CHIP_NAVI14:
4808cf9c 1726 case CHIP_NAVI12:
0a5b8c7b
HR
1727 adev->family = AMDGPU_FAMILY_NV;
1728
1729 r = nv_set_ip_blocks(adev);
1730 if (r)
1731 return r;
1732 break;
d38ceaf9
AD
1733 default:
1734 /* FIXME: not supported yet */
1735 return -EINVAL;
1736 }
1737
e2a75f88
AD
1738 r = amdgpu_device_parse_gpu_info_fw(adev);
1739 if (r)
1740 return r;
1741
ec51d3fa
XY
1742 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1743 amdgpu_discovery_get_gfx_info(adev);
1744
1884734a 1745 amdgpu_amdkfd_device_probe(adev);
1746
3149d9da 1747 if (amdgpu_sriov_vf(adev)) {
122078de
ML
1748 /* handle vbios stuff prior full access mode for new handshake */
1749 if (adev->virt.req_init_data_ver == 1) {
1750 if (!amdgpu_get_bios(adev)) {
1751 DRM_ERROR("failed to get vbios\n");
1752 return -EINVAL;
1753 }
1754
1755 r = amdgpu_atombios_init(adev);
1756 if (r) {
1757 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1758 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1759 return r;
1760 }
1761 }
2f294132 1762 }
122078de 1763
2f294132
ML
1764 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1765 * will not be prepared by host for this VF */
1766 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
3149d9da
XY
1767 r = amdgpu_virt_request_full_gpu(adev, true);
1768 if (r)
2f294132 1769 return r;
3149d9da
XY
1770 }
1771
3b94fb10 1772 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1773 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1774 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1775
d38ceaf9
AD
1776 for (i = 0; i < adev->num_ip_blocks; i++) {
1777 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1778 DRM_ERROR("disabled ip block: %d <%s>\n",
1779 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1780 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1781 } else {
a1255107
AD
1782 if (adev->ip_blocks[i].version->funcs->early_init) {
1783 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1784 if (r == -ENOENT) {
a1255107 1785 adev->ip_blocks[i].status.valid = false;
2c1a2784 1786 } else if (r) {
a1255107
AD
1787 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1788 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1789 return r;
2c1a2784 1790 } else {
a1255107 1791 adev->ip_blocks[i].status.valid = true;
2c1a2784 1792 }
974e6b64 1793 } else {
a1255107 1794 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1795 }
d38ceaf9 1796 }
21a249ca
AD
1797 /* get the vbios after the asic_funcs are set up */
1798 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
122078de
ML
1799 /* skip vbios handling for new handshake */
1800 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1801 continue;
1802
21a249ca
AD
1803 /* Read BIOS */
1804 if (!amdgpu_get_bios(adev))
1805 return -EINVAL;
1806
1807 r = amdgpu_atombios_init(adev);
1808 if (r) {
1809 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1810 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1811 return r;
1812 }
1813 }
d38ceaf9
AD
1814 }
1815
395d1fb9
NH
1816 adev->cg_flags &= amdgpu_cg_mask;
1817 adev->pg_flags &= amdgpu_pg_mask;
1818
d38ceaf9
AD
1819 return 0;
1820}
1821
0a4f2520
RZ
1822static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1823{
1824 int i, r;
1825
1826 for (i = 0; i < adev->num_ip_blocks; i++) {
1827 if (!adev->ip_blocks[i].status.sw)
1828 continue;
1829 if (adev->ip_blocks[i].status.hw)
1830 continue;
1831 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1832 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1833 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1834 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1835 if (r) {
1836 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1837 adev->ip_blocks[i].version->funcs->name, r);
1838 return r;
1839 }
1840 adev->ip_blocks[i].status.hw = true;
1841 }
1842 }
1843
1844 return 0;
1845}
1846
1847static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1848{
1849 int i, r;
1850
1851 for (i = 0; i < adev->num_ip_blocks; i++) {
1852 if (!adev->ip_blocks[i].status.sw)
1853 continue;
1854 if (adev->ip_blocks[i].status.hw)
1855 continue;
1856 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1857 if (r) {
1858 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1859 adev->ip_blocks[i].version->funcs->name, r);
1860 return r;
1861 }
1862 adev->ip_blocks[i].status.hw = true;
1863 }
1864
1865 return 0;
1866}
1867
7a3e0bb2
RZ
1868static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1869{
1870 int r = 0;
1871 int i;
80f41f84 1872 uint32_t smu_version;
7a3e0bb2
RZ
1873
1874 if (adev->asic_type >= CHIP_VEGA10) {
1875 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1876 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1877 continue;
1878
1879 /* no need to do the fw loading again if already done*/
1880 if (adev->ip_blocks[i].status.hw == true)
1881 break;
1882
1883 if (adev->in_gpu_reset || adev->in_suspend) {
1884 r = adev->ip_blocks[i].version->funcs->resume(adev);
1885 if (r) {
1886 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1887 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1888 return r;
1889 }
1890 } else {
1891 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1892 if (r) {
1893 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1894 adev->ip_blocks[i].version->funcs->name, r);
1895 return r;
7a3e0bb2 1896 }
7a3e0bb2 1897 }
482f0e53
ML
1898
1899 adev->ip_blocks[i].status.hw = true;
1900 break;
7a3e0bb2
RZ
1901 }
1902 }
482f0e53 1903
8973d9ec
ED
1904 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1905 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1906
80f41f84 1907 return r;
7a3e0bb2
RZ
1908}
1909
e3ecdffa
AD
1910/**
1911 * amdgpu_device_ip_init - run init for hardware IPs
1912 *
1913 * @adev: amdgpu_device pointer
1914 *
1915 * Main initialization pass for hardware IPs. The list of all the hardware
1916 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1917 * are run. sw_init initializes the software state associated with each IP
1918 * and hw_init initializes the hardware associated with each IP.
1919 * Returns 0 on success, negative error code on failure.
1920 */
06ec9070 1921static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1922{
1923 int i, r;
1924
c030f2e4 1925 r = amdgpu_ras_init(adev);
1926 if (r)
1927 return r;
1928
2f294132
ML
1929 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1930 r = amdgpu_virt_request_full_gpu(adev, true);
1931 if (r)
1932 return -EAGAIN;
1933 }
1934
d38ceaf9 1935 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1936 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1937 continue;
a1255107 1938 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1939 if (r) {
a1255107
AD
1940 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1941 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1942 goto init_failed;
2c1a2784 1943 }
a1255107 1944 adev->ip_blocks[i].status.sw = true;
bfca0289 1945
d38ceaf9 1946 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1947 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1948 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1949 if (r) {
1950 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1951 goto init_failed;
2c1a2784 1952 }
a1255107 1953 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1954 if (r) {
1955 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1956 goto init_failed;
2c1a2784 1957 }
06ec9070 1958 r = amdgpu_device_wb_init(adev);
2c1a2784 1959 if (r) {
06ec9070 1960 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1961 goto init_failed;
2c1a2784 1962 }
a1255107 1963 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1964
1965 /* right after GMC hw init, we create CSA */
f92d5c61 1966 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1967 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1968 AMDGPU_GEM_DOMAIN_VRAM,
1969 AMDGPU_CSA_SIZE);
2493664f
ML
1970 if (r) {
1971 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1972 goto init_failed;
2493664f
ML
1973 }
1974 }
d38ceaf9
AD
1975 }
1976 }
1977
c9ffa427
YT
1978 if (amdgpu_sriov_vf(adev))
1979 amdgpu_virt_init_data_exchange(adev);
1980
533aed27
AG
1981 r = amdgpu_ib_pool_init(adev);
1982 if (r) {
1983 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1984 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1985 goto init_failed;
1986 }
1987
c8963ea4
RZ
1988 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1989 if (r)
72d3f592 1990 goto init_failed;
0a4f2520
RZ
1991
1992 r = amdgpu_device_ip_hw_init_phase1(adev);
1993 if (r)
72d3f592 1994 goto init_failed;
0a4f2520 1995
7a3e0bb2
RZ
1996 r = amdgpu_device_fw_loading(adev);
1997 if (r)
72d3f592 1998 goto init_failed;
7a3e0bb2 1999
0a4f2520
RZ
2000 r = amdgpu_device_ip_hw_init_phase2(adev);
2001 if (r)
72d3f592 2002 goto init_failed;
d38ceaf9 2003
121a2bc6
AG
2004 /*
2005 * retired pages will be loaded from eeprom and reserved here,
2006 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2007 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2008 * for I2C communication which only true at this point.
2009 * recovery_init may fail, but it can free all resources allocated by
2010 * itself and its failure should not stop amdgpu init process.
2011 *
2012 * Note: theoretically, this should be called before all vram allocations
2013 * to protect retired page from abusing
2014 */
2015 amdgpu_ras_recovery_init(adev);
2016
3e2e2ab5
HZ
2017 if (adev->gmc.xgmi.num_physical_nodes > 1)
2018 amdgpu_xgmi_add_device(adev);
1884734a 2019 amdgpu_amdkfd_device_init(adev);
c6332b97 2020
bd607166
KR
2021 amdgpu_fru_get_product_info(adev);
2022
72d3f592 2023init_failed:
c9ffa427 2024 if (amdgpu_sriov_vf(adev))
c6332b97 2025 amdgpu_virt_release_full_gpu(adev, true);
2026
72d3f592 2027 return r;
d38ceaf9
AD
2028}
2029
e3ecdffa
AD
2030/**
2031 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2032 *
2033 * @adev: amdgpu_device pointer
2034 *
2035 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2036 * this function before a GPU reset. If the value is retained after a
2037 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2038 */
06ec9070 2039static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2040{
2041 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2042}
2043
e3ecdffa
AD
2044/**
2045 * amdgpu_device_check_vram_lost - check if vram is valid
2046 *
2047 * @adev: amdgpu_device pointer
2048 *
2049 * Checks the reset magic value written to the gart pointer in VRAM.
2050 * The driver calls this after a GPU reset to see if the contents of
2051 * VRAM is lost or now.
2052 * returns true if vram is lost, false if not.
2053 */
06ec9070 2054static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2055{
dadce777
EQ
2056 if (memcmp(adev->gart.ptr, adev->reset_magic,
2057 AMDGPU_RESET_MAGIC_NUM))
2058 return true;
2059
2060 if (!adev->in_gpu_reset)
2061 return false;
2062
2063 /*
2064 * For all ASICs with baco/mode1 reset, the VRAM is
2065 * always assumed to be lost.
2066 */
2067 switch (amdgpu_asic_reset_method(adev)) {
2068 case AMD_RESET_METHOD_BACO:
2069 case AMD_RESET_METHOD_MODE1:
2070 return true;
2071 default:
2072 return false;
2073 }
0c49e0b8
CZ
2074}
2075
e3ecdffa 2076/**
1112a46b 2077 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2078 *
2079 * @adev: amdgpu_device pointer
b8b72130 2080 * @state: clockgating state (gate or ungate)
e3ecdffa 2081 *
e3ecdffa 2082 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2083 * set_clockgating_state callbacks are run.
2084 * Late initialization pass enabling clockgating for hardware IPs.
2085 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2086 * Returns 0 on success, negative error code on failure.
2087 */
fdd34271 2088
1112a46b
RZ
2089static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2090 enum amd_clockgating_state state)
d38ceaf9 2091{
1112a46b 2092 int i, j, r;
d38ceaf9 2093
4a2ba394
SL
2094 if (amdgpu_emu_mode == 1)
2095 return 0;
2096
1112a46b
RZ
2097 for (j = 0; j < adev->num_ip_blocks; j++) {
2098 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2099 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2100 continue;
4a446d55 2101 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2102 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2103 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2104 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2105 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2106 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2107 /* enable clockgating to save power */
a1255107 2108 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2109 state);
4a446d55
AD
2110 if (r) {
2111 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2112 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2113 return r;
2114 }
b0b00ff1 2115 }
d38ceaf9 2116 }
06b18f61 2117
c9f96fd5
RZ
2118 return 0;
2119}
2120
1112a46b 2121static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2122{
1112a46b 2123 int i, j, r;
06b18f61 2124
c9f96fd5
RZ
2125 if (amdgpu_emu_mode == 1)
2126 return 0;
2127
1112a46b
RZ
2128 for (j = 0; j < adev->num_ip_blocks; j++) {
2129 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2130 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2131 continue;
2132 /* skip CG for VCE/UVD, it's handled specially */
2133 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2134 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2135 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2136 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2137 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2138 /* enable powergating to save power */
2139 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2140 state);
c9f96fd5
RZ
2141 if (r) {
2142 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2143 adev->ip_blocks[i].version->funcs->name, r);
2144 return r;
2145 }
2146 }
2147 }
2dc80b00
S
2148 return 0;
2149}
2150
beff74bc
AD
2151static int amdgpu_device_enable_mgpu_fan_boost(void)
2152{
2153 struct amdgpu_gpu_instance *gpu_ins;
2154 struct amdgpu_device *adev;
2155 int i, ret = 0;
2156
2157 mutex_lock(&mgpu_info.mutex);
2158
2159 /*
2160 * MGPU fan boost feature should be enabled
2161 * only when there are two or more dGPUs in
2162 * the system
2163 */
2164 if (mgpu_info.num_dgpu < 2)
2165 goto out;
2166
2167 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2168 gpu_ins = &(mgpu_info.gpu_ins[i]);
2169 adev = gpu_ins->adev;
2170 if (!(adev->flags & AMD_IS_APU) &&
2171 !gpu_ins->mgpu_fan_enabled &&
2172 adev->powerplay.pp_funcs &&
2173 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2174 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2175 if (ret)
2176 break;
2177
2178 gpu_ins->mgpu_fan_enabled = 1;
2179 }
2180 }
2181
2182out:
2183 mutex_unlock(&mgpu_info.mutex);
2184
2185 return ret;
2186}
2187
e3ecdffa
AD
2188/**
2189 * amdgpu_device_ip_late_init - run late init for hardware IPs
2190 *
2191 * @adev: amdgpu_device pointer
2192 *
2193 * Late initialization pass for hardware IPs. The list of all the hardware
2194 * IPs that make up the asic is walked and the late_init callbacks are run.
2195 * late_init covers any special initialization that an IP requires
2196 * after all of the have been initialized or something that needs to happen
2197 * late in the init process.
2198 * Returns 0 on success, negative error code on failure.
2199 */
06ec9070 2200static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2201{
60599a03 2202 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2203 int i = 0, r;
2204
2205 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2206 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2207 continue;
2208 if (adev->ip_blocks[i].version->funcs->late_init) {
2209 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2210 if (r) {
2211 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2212 adev->ip_blocks[i].version->funcs->name, r);
2213 return r;
2214 }
2dc80b00 2215 }
73f847db 2216 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2217 }
2218
1112a46b
RZ
2219 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2220 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2221
06ec9070 2222 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2223
beff74bc
AD
2224 r = amdgpu_device_enable_mgpu_fan_boost();
2225 if (r)
2226 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2227
60599a03
EQ
2228
2229 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2230 mutex_lock(&mgpu_info.mutex);
2231
2232 /*
2233 * Reset device p-state to low as this was booted with high.
2234 *
2235 * This should be performed only after all devices from the same
2236 * hive get initialized.
2237 *
2238 * However, it's unknown how many device in the hive in advance.
2239 * As this is counted one by one during devices initializations.
2240 *
2241 * So, we wait for all XGMI interlinked devices initialized.
2242 * This may bring some delays as those devices may come from
2243 * different hives. But that should be OK.
2244 */
2245 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2246 for (i = 0; i < mgpu_info.num_gpu; i++) {
2247 gpu_instance = &(mgpu_info.gpu_ins[i]);
2248 if (gpu_instance->adev->flags & AMD_IS_APU)
2249 continue;
2250
d84a430d
JK
2251 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2252 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2253 if (r) {
2254 DRM_ERROR("pstate setting failed (%d).\n", r);
2255 break;
2256 }
2257 }
2258 }
2259
2260 mutex_unlock(&mgpu_info.mutex);
2261 }
2262
d38ceaf9
AD
2263 return 0;
2264}
2265
e3ecdffa
AD
2266/**
2267 * amdgpu_device_ip_fini - run fini for hardware IPs
2268 *
2269 * @adev: amdgpu_device pointer
2270 *
2271 * Main teardown pass for hardware IPs. The list of all the hardware
2272 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2273 * are run. hw_fini tears down the hardware associated with each IP
2274 * and sw_fini tears down any software state associated with each IP.
2275 * Returns 0 on success, negative error code on failure.
2276 */
06ec9070 2277static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2278{
2279 int i, r;
2280
c030f2e4 2281 amdgpu_ras_pre_fini(adev);
2282
a82400b5
AG
2283 if (adev->gmc.xgmi.num_physical_nodes > 1)
2284 amdgpu_xgmi_remove_device(adev);
2285
1884734a 2286 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2287
2288 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2289 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2290
3e96dbfd
AD
2291 /* need to disable SMC first */
2292 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2293 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2294 continue;
fdd34271 2295 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2296 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2297 /* XXX handle errors */
2298 if (r) {
2299 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2300 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2301 }
a1255107 2302 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2303 break;
2304 }
2305 }
2306
d38ceaf9 2307 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2308 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2309 continue;
8201a67a 2310
a1255107 2311 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2312 /* XXX handle errors */
2c1a2784 2313 if (r) {
a1255107
AD
2314 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2315 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2316 }
8201a67a 2317
a1255107 2318 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2319 }
2320
9950cda2 2321
d38ceaf9 2322 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2323 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2324 continue;
c12aba3a
ML
2325
2326 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2327 amdgpu_ucode_free_bo(adev);
1e256e27 2328 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2329 amdgpu_device_wb_fini(adev);
2330 amdgpu_device_vram_scratch_fini(adev);
533aed27 2331 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2332 }
2333
a1255107 2334 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2335 /* XXX handle errors */
2c1a2784 2336 if (r) {
a1255107
AD
2337 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2338 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2339 }
a1255107
AD
2340 adev->ip_blocks[i].status.sw = false;
2341 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2342 }
2343
a6dcfd9c 2344 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2345 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2346 continue;
a1255107
AD
2347 if (adev->ip_blocks[i].version->funcs->late_fini)
2348 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2349 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2350 }
2351
c030f2e4 2352 amdgpu_ras_fini(adev);
2353
030308fc 2354 if (amdgpu_sriov_vf(adev))
24136135
ML
2355 if (amdgpu_virt_release_full_gpu(adev, false))
2356 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2357
d38ceaf9
AD
2358 return 0;
2359}
2360
e3ecdffa 2361/**
beff74bc 2362 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2363 *
1112a46b 2364 * @work: work_struct.
e3ecdffa 2365 */
beff74bc 2366static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2367{
2368 struct amdgpu_device *adev =
beff74bc 2369 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2370 int r;
2371
2372 r = amdgpu_ib_ring_tests(adev);
2373 if (r)
2374 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2375}
2376
1e317b99
RZ
2377static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2378{
2379 struct amdgpu_device *adev =
2380 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2381
2382 mutex_lock(&adev->gfx.gfx_off_mutex);
2383 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2384 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2385 adev->gfx.gfx_off_state = true;
2386 }
2387 mutex_unlock(&adev->gfx.gfx_off_mutex);
2388}
2389
e3ecdffa 2390/**
e7854a03 2391 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2392 *
2393 * @adev: amdgpu_device pointer
2394 *
2395 * Main suspend function for hardware IPs. The list of all the hardware
2396 * IPs that make up the asic is walked, clockgating is disabled and the
2397 * suspend callbacks are run. suspend puts the hardware and software state
2398 * in each IP into a state suitable for suspend.
2399 * Returns 0 on success, negative error code on failure.
2400 */
e7854a03
AD
2401static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2402{
2403 int i, r;
2404
ced1ba97
PL
2405 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2406 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2407
e7854a03
AD
2408 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2409 if (!adev->ip_blocks[i].status.valid)
2410 continue;
2411 /* displays are handled separately */
2412 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2413 /* XXX handle errors */
2414 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2415 /* XXX handle errors */
2416 if (r) {
2417 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2418 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2419 return r;
e7854a03 2420 }
482f0e53 2421 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2422 }
2423 }
2424
e7854a03
AD
2425 return 0;
2426}
2427
2428/**
2429 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2430 *
2431 * @adev: amdgpu_device pointer
2432 *
2433 * Main suspend function for hardware IPs. The list of all the hardware
2434 * IPs that make up the asic is walked, clockgating is disabled and the
2435 * suspend callbacks are run. suspend puts the hardware and software state
2436 * in each IP into a state suitable for suspend.
2437 * Returns 0 on success, negative error code on failure.
2438 */
2439static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2440{
2441 int i, r;
2442
2443 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2444 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2445 continue;
e7854a03
AD
2446 /* displays are handled in phase1 */
2447 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2448 continue;
bff77e86
LM
2449 /* PSP lost connection when err_event_athub occurs */
2450 if (amdgpu_ras_intr_triggered() &&
2451 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2452 adev->ip_blocks[i].status.hw = false;
2453 continue;
2454 }
d38ceaf9 2455 /* XXX handle errors */
a1255107 2456 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2457 /* XXX handle errors */
2c1a2784 2458 if (r) {
a1255107
AD
2459 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2460 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2461 }
876923fb 2462 adev->ip_blocks[i].status.hw = false;
a3a09142 2463 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2464 if(!amdgpu_sriov_vf(adev)){
2465 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2466 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2467 if (r) {
2468 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2469 adev->mp1_state, r);
2470 return r;
2471 }
a3a09142
AD
2472 }
2473 }
b5507c7e 2474 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2475 }
2476
2477 return 0;
2478}
2479
e7854a03
AD
2480/**
2481 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2482 *
2483 * @adev: amdgpu_device pointer
2484 *
2485 * Main suspend function for hardware IPs. The list of all the hardware
2486 * IPs that make up the asic is walked, clockgating is disabled and the
2487 * suspend callbacks are run. suspend puts the hardware and software state
2488 * in each IP into a state suitable for suspend.
2489 * Returns 0 on success, negative error code on failure.
2490 */
2491int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2492{
2493 int r;
2494
e7819644
YT
2495 if (amdgpu_sriov_vf(adev))
2496 amdgpu_virt_request_full_gpu(adev, false);
2497
e7854a03
AD
2498 r = amdgpu_device_ip_suspend_phase1(adev);
2499 if (r)
2500 return r;
2501 r = amdgpu_device_ip_suspend_phase2(adev);
2502
e7819644
YT
2503 if (amdgpu_sriov_vf(adev))
2504 amdgpu_virt_release_full_gpu(adev, false);
2505
e7854a03
AD
2506 return r;
2507}
2508
06ec9070 2509static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2510{
2511 int i, r;
2512
2cb681b6
ML
2513 static enum amd_ip_block_type ip_order[] = {
2514 AMD_IP_BLOCK_TYPE_GMC,
2515 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2516 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2517 AMD_IP_BLOCK_TYPE_IH,
2518 };
a90ad3c2 2519
2cb681b6
ML
2520 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2521 int j;
2522 struct amdgpu_ip_block *block;
a90ad3c2 2523
2cb681b6
ML
2524 for (j = 0; j < adev->num_ip_blocks; j++) {
2525 block = &adev->ip_blocks[j];
2526
482f0e53 2527 block->status.hw = false;
2cb681b6
ML
2528 if (block->version->type != ip_order[i] ||
2529 !block->status.valid)
2530 continue;
2531
2532 r = block->version->funcs->hw_init(adev);
0aaeefcc 2533 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2534 if (r)
2535 return r;
482f0e53 2536 block->status.hw = true;
a90ad3c2
ML
2537 }
2538 }
2539
2540 return 0;
2541}
2542
06ec9070 2543static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2544{
2545 int i, r;
2546
2cb681b6
ML
2547 static enum amd_ip_block_type ip_order[] = {
2548 AMD_IP_BLOCK_TYPE_SMC,
2549 AMD_IP_BLOCK_TYPE_DCE,
2550 AMD_IP_BLOCK_TYPE_GFX,
2551 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2552 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2553 AMD_IP_BLOCK_TYPE_VCE,
2554 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2555 };
a90ad3c2 2556
2cb681b6
ML
2557 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2558 int j;
2559 struct amdgpu_ip_block *block;
a90ad3c2 2560
2cb681b6
ML
2561 for (j = 0; j < adev->num_ip_blocks; j++) {
2562 block = &adev->ip_blocks[j];
2563
2564 if (block->version->type != ip_order[i] ||
482f0e53
ML
2565 !block->status.valid ||
2566 block->status.hw)
2cb681b6
ML
2567 continue;
2568
895bd048
JZ
2569 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2570 r = block->version->funcs->resume(adev);
2571 else
2572 r = block->version->funcs->hw_init(adev);
2573
0aaeefcc 2574 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2575 if (r)
2576 return r;
482f0e53 2577 block->status.hw = true;
a90ad3c2
ML
2578 }
2579 }
2580
2581 return 0;
2582}
2583
e3ecdffa
AD
2584/**
2585 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2586 *
2587 * @adev: amdgpu_device pointer
2588 *
2589 * First resume function for hardware IPs. The list of all the hardware
2590 * IPs that make up the asic is walked and the resume callbacks are run for
2591 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2592 * after a suspend and updates the software state as necessary. This
2593 * function is also used for restoring the GPU after a GPU reset.
2594 * Returns 0 on success, negative error code on failure.
2595 */
06ec9070 2596static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2597{
2598 int i, r;
2599
a90ad3c2 2600 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2601 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2602 continue;
a90ad3c2 2603 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2604 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2605 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2606
fcf0649f
CZ
2607 r = adev->ip_blocks[i].version->funcs->resume(adev);
2608 if (r) {
2609 DRM_ERROR("resume of IP block <%s> failed %d\n",
2610 adev->ip_blocks[i].version->funcs->name, r);
2611 return r;
2612 }
482f0e53 2613 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2614 }
2615 }
2616
2617 return 0;
2618}
2619
e3ecdffa
AD
2620/**
2621 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2622 *
2623 * @adev: amdgpu_device pointer
2624 *
2625 * First resume function for hardware IPs. The list of all the hardware
2626 * IPs that make up the asic is walked and the resume callbacks are run for
2627 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2628 * functional state after a suspend and updates the software state as
2629 * necessary. This function is also used for restoring the GPU after a GPU
2630 * reset.
2631 * Returns 0 on success, negative error code on failure.
2632 */
06ec9070 2633static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2634{
2635 int i, r;
2636
2637 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2638 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2639 continue;
fcf0649f 2640 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2641 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2642 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2643 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2644 continue;
a1255107 2645 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2646 if (r) {
a1255107
AD
2647 DRM_ERROR("resume of IP block <%s> failed %d\n",
2648 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2649 return r;
2c1a2784 2650 }
482f0e53 2651 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2652 }
2653
2654 return 0;
2655}
2656
e3ecdffa
AD
2657/**
2658 * amdgpu_device_ip_resume - run resume for hardware IPs
2659 *
2660 * @adev: amdgpu_device pointer
2661 *
2662 * Main resume function for hardware IPs. The hardware IPs
2663 * are split into two resume functions because they are
2664 * are also used in in recovering from a GPU reset and some additional
2665 * steps need to be take between them. In this case (S3/S4) they are
2666 * run sequentially.
2667 * Returns 0 on success, negative error code on failure.
2668 */
06ec9070 2669static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2670{
2671 int r;
2672
06ec9070 2673 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2674 if (r)
2675 return r;
7a3e0bb2
RZ
2676
2677 r = amdgpu_device_fw_loading(adev);
2678 if (r)
2679 return r;
2680
06ec9070 2681 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2682
2683 return r;
2684}
2685
e3ecdffa
AD
2686/**
2687 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2688 *
2689 * @adev: amdgpu_device pointer
2690 *
2691 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2692 */
4e99a44e 2693static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2694{
6867e1b5
ML
2695 if (amdgpu_sriov_vf(adev)) {
2696 if (adev->is_atom_fw) {
2697 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2698 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2699 } else {
2700 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2701 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2702 }
2703
2704 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2705 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2706 }
048765ad
AR
2707}
2708
e3ecdffa
AD
2709/**
2710 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2711 *
2712 * @asic_type: AMD asic type
2713 *
2714 * Check if there is DC (new modesetting infrastructre) support for an asic.
2715 * returns true if DC has support, false if not.
2716 */
4562236b
HW
2717bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2718{
2719 switch (asic_type) {
2720#if defined(CONFIG_DRM_AMD_DC)
2721 case CHIP_BONAIRE:
0d6fbccb 2722 case CHIP_KAVERI:
367e6687
AD
2723 case CHIP_KABINI:
2724 case CHIP_MULLINS:
d9fda248
HW
2725 /*
2726 * We have systems in the wild with these ASICs that require
2727 * LVDS and VGA support which is not supported with DC.
2728 *
2729 * Fallback to the non-DC driver here by default so as not to
2730 * cause regressions.
2731 */
2732 return amdgpu_dc > 0;
2733 case CHIP_HAWAII:
4562236b
HW
2734 case CHIP_CARRIZO:
2735 case CHIP_STONEY:
4562236b 2736 case CHIP_POLARIS10:
675fd32b 2737 case CHIP_POLARIS11:
2c8ad2d5 2738 case CHIP_POLARIS12:
675fd32b 2739 case CHIP_VEGAM:
4562236b
HW
2740 case CHIP_TONGA:
2741 case CHIP_FIJI:
42f8ffa1 2742 case CHIP_VEGA10:
dca7b401 2743 case CHIP_VEGA12:
c6034aa2 2744 case CHIP_VEGA20:
b86a1aa3 2745#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2746 case CHIP_RAVEN:
b4f199c7 2747 case CHIP_NAVI10:
8fceceb6 2748 case CHIP_NAVI14:
078655d9 2749 case CHIP_NAVI12:
e1c14c43 2750 case CHIP_RENOIR:
42f8ffa1 2751#endif
fd187853 2752 return amdgpu_dc != 0;
4562236b
HW
2753#endif
2754 default:
93b09a9a
SS
2755 if (amdgpu_dc > 0)
2756 DRM_INFO("Display Core has been requested via kernel parameter "
2757 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2758 return false;
2759 }
2760}
2761
2762/**
2763 * amdgpu_device_has_dc_support - check if dc is supported
2764 *
2765 * @adev: amdgpu_device_pointer
2766 *
2767 * Returns true for supported, false for not supported
2768 */
2769bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2770{
2555039d
XY
2771 if (amdgpu_sriov_vf(adev))
2772 return false;
2773
4562236b
HW
2774 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2775}
2776
d4535e2c
AG
2777
2778static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2779{
2780 struct amdgpu_device *adev =
2781 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2782 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2783
c6a6e2db
AG
2784 /* It's a bug to not have a hive within this function */
2785 if (WARN_ON(!hive))
2786 return;
2787
2788 /*
2789 * Use task barrier to synchronize all xgmi reset works across the
2790 * hive. task_barrier_enter and task_barrier_exit will block
2791 * until all the threads running the xgmi reset works reach
2792 * those points. task_barrier_full will do both blocks.
2793 */
2794 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2795
2796 task_barrier_enter(&hive->tb);
2797 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2798
2799 if (adev->asic_reset_res)
2800 goto fail;
2801
2802 task_barrier_exit(&hive->tb);
2803 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2804
2805 if (adev->asic_reset_res)
2806 goto fail;
43c4d576
JC
2807
2808 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2809 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2810 } else {
2811
2812 task_barrier_full(&hive->tb);
2813 adev->asic_reset_res = amdgpu_asic_reset(adev);
2814 }
ce316fa5 2815
c6a6e2db 2816fail:
d4535e2c 2817 if (adev->asic_reset_res)
fed184e9 2818 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2819 adev->asic_reset_res, adev->ddev->unique);
2820}
2821
71f98027
AD
2822static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2823{
2824 char *input = amdgpu_lockup_timeout;
2825 char *timeout_setting = NULL;
2826 int index = 0;
2827 long timeout;
2828 int ret = 0;
2829
2830 /*
2831 * By default timeout for non compute jobs is 10000.
2832 * And there is no timeout enforced on compute jobs.
2833 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2834 * jobs are 60000 by default.
71f98027
AD
2835 */
2836 adev->gfx_timeout = msecs_to_jiffies(10000);
2837 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2838 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2839 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2840 else
2841 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2842
f440ff44 2843 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2844 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2845 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2846 ret = kstrtol(timeout_setting, 0, &timeout);
2847 if (ret)
2848 return ret;
2849
2850 if (timeout == 0) {
2851 index++;
2852 continue;
2853 } else if (timeout < 0) {
2854 timeout = MAX_SCHEDULE_TIMEOUT;
2855 } else {
2856 timeout = msecs_to_jiffies(timeout);
2857 }
2858
2859 switch (index++) {
2860 case 0:
2861 adev->gfx_timeout = timeout;
2862 break;
2863 case 1:
2864 adev->compute_timeout = timeout;
2865 break;
2866 case 2:
2867 adev->sdma_timeout = timeout;
2868 break;
2869 case 3:
2870 adev->video_timeout = timeout;
2871 break;
2872 default:
2873 break;
2874 }
2875 }
2876 /*
2877 * There is only one value specified and
2878 * it should apply to all non-compute jobs.
2879 */
bcccee89 2880 if (index == 1) {
71f98027 2881 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2882 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2883 adev->compute_timeout = adev->gfx_timeout;
2884 }
71f98027
AD
2885 }
2886
2887 return ret;
2888}
d4535e2c 2889
d38ceaf9
AD
2890/**
2891 * amdgpu_device_init - initialize the driver
2892 *
2893 * @adev: amdgpu_device pointer
87e3f136 2894 * @ddev: drm dev pointer
d38ceaf9
AD
2895 * @pdev: pci dev pointer
2896 * @flags: driver flags
2897 *
2898 * Initializes the driver info and hw (all asics).
2899 * Returns 0 for success or an error on failure.
2900 * Called at driver startup.
2901 */
2902int amdgpu_device_init(struct amdgpu_device *adev,
2903 struct drm_device *ddev,
2904 struct pci_dev *pdev,
2905 uint32_t flags)
2906{
2907 int r, i;
3840c5bc 2908 bool boco = false;
95844d20 2909 u32 max_MBps;
d38ceaf9
AD
2910
2911 adev->shutdown = false;
2912 adev->dev = &pdev->dev;
2913 adev->ddev = ddev;
2914 adev->pdev = pdev;
2915 adev->flags = flags;
4e66d7d2
YZ
2916
2917 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2918 adev->asic_type = amdgpu_force_asic_type;
2919 else
2920 adev->asic_type = flags & AMD_ASIC_MASK;
2921
d38ceaf9 2922 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2923 if (amdgpu_emu_mode == 1)
8bdab6bb 2924 adev->usec_timeout *= 10;
770d13b1 2925 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2926 adev->accel_working = false;
2927 adev->num_rings = 0;
2928 adev->mman.buffer_funcs = NULL;
2929 adev->mman.buffer_funcs_ring = NULL;
2930 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2931 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2932 adev->gmc.gmc_funcs = NULL;
f54d1867 2933 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2934 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2935
2936 adev->smc_rreg = &amdgpu_invalid_rreg;
2937 adev->smc_wreg = &amdgpu_invalid_wreg;
2938 adev->pcie_rreg = &amdgpu_invalid_rreg;
2939 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2940 adev->pciep_rreg = &amdgpu_invalid_rreg;
2941 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2942 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2943 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2944 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2945 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2946 adev->didt_rreg = &amdgpu_invalid_rreg;
2947 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2948 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2949 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2950 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2951 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2952
3e39ab90
AD
2953 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2954 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2955 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2956
2957 /* mutex initialization are all done here so we
2958 * can recall function without having locking issues */
d38ceaf9 2959 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2960 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2961 mutex_init(&adev->pm.mutex);
2962 mutex_init(&adev->gfx.gpu_clock_mutex);
2963 mutex_init(&adev->srbm_mutex);
b8866c26 2964 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2965 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2966 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2967 mutex_init(&adev->mn_lock);
e23b74aa 2968 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2969 hash_init(adev->mn_hash);
13a752e3 2970 mutex_init(&adev->lock_reset);
32eaeae0 2971 mutex_init(&adev->psp.mutex);
bd052211 2972 mutex_init(&adev->notifier_lock);
d38ceaf9 2973
912dfc84
EQ
2974 r = amdgpu_device_check_arguments(adev);
2975 if (r)
2976 return r;
d38ceaf9 2977
d38ceaf9
AD
2978 spin_lock_init(&adev->mmio_idx_lock);
2979 spin_lock_init(&adev->smc_idx_lock);
2980 spin_lock_init(&adev->pcie_idx_lock);
2981 spin_lock_init(&adev->uvd_ctx_idx_lock);
2982 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2983 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2984 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2985 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2986 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2987
0c4e7fa5
CZ
2988 INIT_LIST_HEAD(&adev->shadow_list);
2989 mutex_init(&adev->shadow_list_lock);
2990
beff74bc
AD
2991 INIT_DELAYED_WORK(&adev->delayed_init_work,
2992 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2993 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2994 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2995
d4535e2c
AG
2996 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2997
d23ee13f 2998 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2999 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
3000
0fa49558
AX
3001 /* Registers mapping */
3002 /* TODO: block userspace mapping of io register */
da69c161
KW
3003 if (adev->asic_type >= CHIP_BONAIRE) {
3004 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3005 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3006 } else {
3007 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3008 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3009 }
d38ceaf9 3010
d38ceaf9
AD
3011 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3012 if (adev->rmmio == NULL) {
3013 return -ENOMEM;
3014 }
3015 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3016 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3017
d38ceaf9
AD
3018 /* io port mapping */
3019 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3020 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3021 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3022 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3023 break;
3024 }
3025 }
3026 if (adev->rio_mem == NULL)
b64a18c5 3027 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3028
b2109d8e
JX
3029 /* enable PCIE atomic ops */
3030 r = pci_enable_atomic_ops_to_root(adev->pdev,
3031 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3032 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3033 if (r) {
3034 adev->have_atomics_support = false;
3035 DRM_INFO("PCIE atomic ops is not supported\n");
3036 } else {
3037 adev->have_atomics_support = true;
3038 }
3039
5494d864
AD
3040 amdgpu_device_get_pcie_info(adev);
3041
b239c017
JX
3042 if (amdgpu_mcbp)
3043 DRM_INFO("MCBP is enabled\n");
3044
5f84cc63
JX
3045 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3046 adev->enable_mes = true;
3047
3aa0115d
ML
3048 /* detect hw virtualization here */
3049 amdgpu_detect_virtualization(adev);
3050
dffa11b4
ML
3051 r = amdgpu_device_get_job_timeout_settings(adev);
3052 if (r) {
3053 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3054 return r;
a190d1c7
XY
3055 }
3056
d38ceaf9 3057 /* early init functions */
06ec9070 3058 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3059 if (r)
3060 return r;
3061
6585661d
OZ
3062 /* doorbell bar mapping and doorbell index init*/
3063 amdgpu_device_doorbell_init(adev);
3064
d38ceaf9
AD
3065 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3066 /* this will fail for cards that aren't VGA class devices, just
3067 * ignore it */
06ec9070 3068 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3069
31af062a 3070 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3071 boco = true;
3072 if (amdgpu_has_atpx() &&
3073 (amdgpu_is_atpx_hybrid() ||
3074 amdgpu_has_atpx_dgpu_power_cntl()) &&
3075 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3076 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3077 &amdgpu_switcheroo_ops, boco);
3078 if (boco)
d38ceaf9
AD
3079 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3080
9475a943
SL
3081 if (amdgpu_emu_mode == 1) {
3082 /* post the asic on emulation mode */
3083 emu_soc_asic_init(adev);
bfca0289 3084 goto fence_driver_init;
9475a943 3085 }
bfca0289 3086
4e99a44e
ML
3087 /* detect if we are with an SRIOV vbios */
3088 amdgpu_device_detect_sriov_bios(adev);
048765ad 3089
95e8e59e
AD
3090 /* check if we need to reset the asic
3091 * E.g., driver was not cleanly unloaded previously, etc.
3092 */
f14899fd 3093 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3094 r = amdgpu_asic_reset(adev);
3095 if (r) {
3096 dev_err(adev->dev, "asic reset on init failed\n");
3097 goto failed;
3098 }
3099 }
3100
d38ceaf9 3101 /* Post card if necessary */
39c640c0 3102 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3103 if (!adev->bios) {
bec86378 3104 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3105 r = -EINVAL;
3106 goto failed;
d38ceaf9 3107 }
bec86378 3108 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3109 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3110 if (r) {
3111 dev_err(adev->dev, "gpu post error!\n");
3112 goto failed;
3113 }
d38ceaf9
AD
3114 }
3115
88b64e95
AD
3116 if (adev->is_atom_fw) {
3117 /* Initialize clocks */
3118 r = amdgpu_atomfirmware_get_clock_info(adev);
3119 if (r) {
3120 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3121 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3122 goto failed;
3123 }
3124 } else {
a5bde2f9
AD
3125 /* Initialize clocks */
3126 r = amdgpu_atombios_get_clock_info(adev);
3127 if (r) {
3128 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3129 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3130 goto failed;
a5bde2f9
AD
3131 }
3132 /* init i2c buses */
4562236b
HW
3133 if (!amdgpu_device_has_dc_support(adev))
3134 amdgpu_atombios_i2c_init(adev);
2c1a2784 3135 }
d38ceaf9 3136
bfca0289 3137fence_driver_init:
d38ceaf9
AD
3138 /* Fence driver */
3139 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3140 if (r) {
3141 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3142 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3143 goto failed;
2c1a2784 3144 }
d38ceaf9
AD
3145
3146 /* init the mode config */
3147 drm_mode_config_init(adev->ddev);
3148
06ec9070 3149 r = amdgpu_device_ip_init(adev);
d38ceaf9 3150 if (r) {
8840a387 3151 /* failed in exclusive mode due to timeout */
3152 if (amdgpu_sriov_vf(adev) &&
3153 !amdgpu_sriov_runtime(adev) &&
3154 amdgpu_virt_mmio_blocked(adev) &&
3155 !amdgpu_virt_wait_reset(adev)) {
3156 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3157 /* Don't send request since VF is inactive. */
3158 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3159 adev->virt.ops = NULL;
8840a387 3160 r = -EAGAIN;
3161 goto failed;
3162 }
06ec9070 3163 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3164 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3165 goto failed;
d38ceaf9
AD
3166 }
3167
d69b8971
YZ
3168 dev_info(adev->dev,
3169 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3170 adev->gfx.config.max_shader_engines,
3171 adev->gfx.config.max_sh_per_se,
3172 adev->gfx.config.max_cu_per_sh,
3173 adev->gfx.cu_info.number);
3174
d38ceaf9
AD
3175 adev->accel_working = true;
3176
e59c0205
AX
3177 amdgpu_vm_check_compute_bug(adev);
3178
95844d20
MO
3179 /* Initialize the buffer migration limit. */
3180 if (amdgpu_moverate >= 0)
3181 max_MBps = amdgpu_moverate;
3182 else
3183 max_MBps = 8; /* Allow 8 MB/s. */
3184 /* Get a log2 for easy divisions. */
3185 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3186
9bc92b9c
ML
3187 amdgpu_fbdev_init(adev);
3188
d2f52ac8 3189 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3190 if (r) {
3191 adev->pm_sysfs_en = false;
d2f52ac8 3192 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3193 } else
3194 adev->pm_sysfs_en = true;
d2f52ac8 3195
5bb23532 3196 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3197 if (r) {
3198 adev->ucode_sysfs_en = false;
5bb23532 3199 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3200 } else
3201 adev->ucode_sysfs_en = true;
5bb23532 3202
d38ceaf9
AD
3203 if ((amdgpu_testing & 1)) {
3204 if (adev->accel_working)
3205 amdgpu_test_moves(adev);
3206 else
3207 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3208 }
d38ceaf9
AD
3209 if (amdgpu_benchmarking) {
3210 if (adev->accel_working)
3211 amdgpu_benchmark(adev, amdgpu_benchmarking);
3212 else
3213 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3214 }
3215
b0adca4d
EQ
3216 /*
3217 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3218 * Otherwise the mgpu fan boost feature will be skipped due to the
3219 * gpu instance is counted less.
3220 */
3221 amdgpu_register_gpu_instance(adev);
3222
d38ceaf9
AD
3223 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3224 * explicit gating rather than handling it automatically.
3225 */
06ec9070 3226 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3227 if (r) {
06ec9070 3228 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3229 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3230 goto failed;
2c1a2784 3231 }
d38ceaf9 3232
108c6a63 3233 /* must succeed. */
511fdbc3 3234 amdgpu_ras_resume(adev);
108c6a63 3235
beff74bc
AD
3236 queue_delayed_work(system_wq, &adev->delayed_init_work,
3237 msecs_to_jiffies(AMDGPU_RESUME_MS));
3238
dcea6e65
KR
3239 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3240 if (r) {
3241 dev_err(adev->dev, "Could not create pcie_replay_count");
3242 return r;
3243 }
108c6a63 3244
bd607166
KR
3245 r = device_create_file(adev->dev, &dev_attr_product_name);
3246 if (r) {
3247 dev_err(adev->dev, "Could not create product_name");
3248 return r;
3249 }
3250
3251 r = device_create_file(adev->dev, &dev_attr_product_number);
3252 if (r) {
3253 dev_err(adev->dev, "Could not create product_number");
3254 return r;
3255 }
3256
3257 r = device_create_file(adev->dev, &dev_attr_serial_number);
3258 if (r) {
3259 dev_err(adev->dev, "Could not create serial_number");
3260 return r;
3261 }
3262
d155bef0
AB
3263 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3264 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3265 if (r)
3266 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3267
d38ceaf9 3268 return 0;
83ba126a
AD
3269
3270failed:
89041940 3271 amdgpu_vf_error_trans_all(adev);
3840c5bc 3272 if (boco)
83ba126a 3273 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3274
83ba126a 3275 return r;
d38ceaf9
AD
3276}
3277
d38ceaf9
AD
3278/**
3279 * amdgpu_device_fini - tear down the driver
3280 *
3281 * @adev: amdgpu_device pointer
3282 *
3283 * Tear down the driver info (all asics).
3284 * Called at driver shutdown.
3285 */
3286void amdgpu_device_fini(struct amdgpu_device *adev)
3287{
3288 int r;
3289
3290 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3291 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3292 adev->shutdown = true;
9f875167 3293
752c683d
ML
3294 /* make sure IB test finished before entering exclusive mode
3295 * to avoid preemption on IB test
3296 * */
3297 if (amdgpu_sriov_vf(adev))
3298 amdgpu_virt_request_full_gpu(adev, false);
3299
e5b03032
ML
3300 /* disable all interrupts */
3301 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3302 if (adev->mode_info.mode_config_initialized){
3303 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3304 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3305 else
3306 drm_atomic_helper_shutdown(adev->ddev);
3307 }
d38ceaf9 3308 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3309 if (adev->pm_sysfs_en)
3310 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3311 amdgpu_fbdev_fini(adev);
06ec9070 3312 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3313 if (adev->firmware.gpu_info_fw) {
3314 release_firmware(adev->firmware.gpu_info_fw);
3315 adev->firmware.gpu_info_fw = NULL;
3316 }
d38ceaf9
AD
3317 adev->accel_working = false;
3318 /* free i2c buses */
4562236b
HW
3319 if (!amdgpu_device_has_dc_support(adev))
3320 amdgpu_i2c_fini(adev);
bfca0289
SL
3321
3322 if (amdgpu_emu_mode != 1)
3323 amdgpu_atombios_fini(adev);
3324
d38ceaf9
AD
3325 kfree(adev->bios);
3326 adev->bios = NULL;
3840c5bc
AD
3327 if (amdgpu_has_atpx() &&
3328 (amdgpu_is_atpx_hybrid() ||
3329 amdgpu_has_atpx_dgpu_power_cntl()) &&
3330 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3331 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3332 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3333 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3334 vga_client_register(adev->pdev, NULL, NULL, NULL);
3335 if (adev->rio_mem)
3336 pci_iounmap(adev->pdev, adev->rio_mem);
3337 adev->rio_mem = NULL;
3338 iounmap(adev->rmmio);
3339 adev->rmmio = NULL;
06ec9070 3340 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3341
dcea6e65 3342 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3343 if (adev->ucode_sysfs_en)
3344 amdgpu_ucode_sysfs_fini(adev);
bd607166
KR
3345 device_remove_file(adev->dev, &dev_attr_product_name);
3346 device_remove_file(adev->dev, &dev_attr_product_number);
3347 device_remove_file(adev->dev, &dev_attr_serial_number);
d155bef0
AB
3348 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3349 amdgpu_pmu_fini(adev);
f54eeab4 3350 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3351 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3352}
3353
3354
3355/*
3356 * Suspend & resume.
3357 */
3358/**
810ddc3a 3359 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3360 *
87e3f136
DP
3361 * @dev: drm dev pointer
3362 * @suspend: suspend state
3363 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3364 *
3365 * Puts the hw in the suspend state (all asics).
3366 * Returns 0 for success or an error on failure.
3367 * Called at driver suspend.
3368 */
de185019 3369int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3370{
3371 struct amdgpu_device *adev;
3372 struct drm_crtc *crtc;
3373 struct drm_connector *connector;
f8d2d39e 3374 struct drm_connector_list_iter iter;
5ceb54c6 3375 int r;
d38ceaf9
AD
3376
3377 if (dev == NULL || dev->dev_private == NULL) {
3378 return -ENODEV;
3379 }
3380
3381 adev = dev->dev_private;
3382
3383 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3384 return 0;
3385
44779b43 3386 adev->in_suspend = true;
d38ceaf9
AD
3387 drm_kms_helper_poll_disable(dev);
3388
5f818173
S
3389 if (fbcon)
3390 amdgpu_fbdev_set_suspend(adev, 1);
3391
beff74bc 3392 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3393
4562236b
HW
3394 if (!amdgpu_device_has_dc_support(adev)) {
3395 /* turn off display hw */
3396 drm_modeset_lock_all(dev);
f8d2d39e
LP
3397 drm_connector_list_iter_begin(dev, &iter);
3398 drm_for_each_connector_iter(connector, &iter)
3399 drm_helper_connector_dpms(connector,
3400 DRM_MODE_DPMS_OFF);
3401 drm_connector_list_iter_end(&iter);
4562236b 3402 drm_modeset_unlock_all(dev);
fe1053b7
AD
3403 /* unpin the front buffers and cursors */
3404 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3405 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3406 struct drm_framebuffer *fb = crtc->primary->fb;
3407 struct amdgpu_bo *robj;
3408
91334223 3409 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3410 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3411 r = amdgpu_bo_reserve(aobj, true);
3412 if (r == 0) {
3413 amdgpu_bo_unpin(aobj);
3414 amdgpu_bo_unreserve(aobj);
3415 }
756e6880 3416 }
756e6880 3417
fe1053b7
AD
3418 if (fb == NULL || fb->obj[0] == NULL) {
3419 continue;
3420 }
3421 robj = gem_to_amdgpu_bo(fb->obj[0]);
3422 /* don't unpin kernel fb objects */
3423 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3424 r = amdgpu_bo_reserve(robj, true);
3425 if (r == 0) {
3426 amdgpu_bo_unpin(robj);
3427 amdgpu_bo_unreserve(robj);
3428 }
d38ceaf9
AD
3429 }
3430 }
3431 }
fe1053b7 3432
a23ca7f7
PL
3433 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3434 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3435
9593f4d6 3436 amdgpu_amdkfd_suspend(adev, !fbcon);
fe1053b7 3437
5e6932fe 3438 amdgpu_ras_suspend(adev);
3439
fe1053b7
AD
3440 r = amdgpu_device_ip_suspend_phase1(adev);
3441
d38ceaf9
AD
3442 /* evict vram memory */
3443 amdgpu_bo_evict_vram(adev);
3444
5ceb54c6 3445 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3446
fe1053b7 3447 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3448
a0a71e49
AD
3449 /* evict remaining vram memory
3450 * This second call to evict vram is to evict the gart page table
3451 * using the CPU.
3452 */
d38ceaf9
AD
3453 amdgpu_bo_evict_vram(adev);
3454
d38ceaf9
AD
3455 return 0;
3456}
3457
3458/**
810ddc3a 3459 * amdgpu_device_resume - initiate device resume
d38ceaf9 3460 *
87e3f136
DP
3461 * @dev: drm dev pointer
3462 * @resume: resume state
3463 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3464 *
3465 * Bring the hw back to operating state (all asics).
3466 * Returns 0 for success or an error on failure.
3467 * Called at driver resume.
3468 */
de185019 3469int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3470{
3471 struct drm_connector *connector;
f8d2d39e 3472 struct drm_connector_list_iter iter;
d38ceaf9 3473 struct amdgpu_device *adev = dev->dev_private;
756e6880 3474 struct drm_crtc *crtc;
03161a6e 3475 int r = 0;
d38ceaf9
AD
3476
3477 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3478 return 0;
3479
d38ceaf9 3480 /* post card */
39c640c0 3481 if (amdgpu_device_need_post(adev)) {
74b0b157 3482 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3483 if (r)
3484 DRM_ERROR("amdgpu asic init failed\n");
3485 }
d38ceaf9 3486
06ec9070 3487 r = amdgpu_device_ip_resume(adev);
e6707218 3488 if (r) {
06ec9070 3489 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3490 return r;
e6707218 3491 }
5ceb54c6
AD
3492 amdgpu_fence_driver_resume(adev);
3493
d38ceaf9 3494
06ec9070 3495 r = amdgpu_device_ip_late_init(adev);
03161a6e 3496 if (r)
4d3b9ae5 3497 return r;
d38ceaf9 3498
beff74bc
AD
3499 queue_delayed_work(system_wq, &adev->delayed_init_work,
3500 msecs_to_jiffies(AMDGPU_RESUME_MS));
3501
fe1053b7
AD
3502 if (!amdgpu_device_has_dc_support(adev)) {
3503 /* pin cursors */
3504 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3505 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3506
91334223 3507 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3508 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3509 r = amdgpu_bo_reserve(aobj, true);
3510 if (r == 0) {
3511 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3512 if (r != 0)
3513 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3514 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3515 amdgpu_bo_unreserve(aobj);
3516 }
756e6880
AD
3517 }
3518 }
3519 }
9593f4d6 3520 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3521 if (r)
3522 return r;
756e6880 3523
96a5d8d4 3524 /* Make sure IB tests flushed */
beff74bc 3525 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3526
d38ceaf9
AD
3527 /* blat the mode back in */
3528 if (fbcon) {
4562236b
HW
3529 if (!amdgpu_device_has_dc_support(adev)) {
3530 /* pre DCE11 */
3531 drm_helper_resume_force_mode(dev);
3532
3533 /* turn on display hw */
3534 drm_modeset_lock_all(dev);
f8d2d39e
LP
3535
3536 drm_connector_list_iter_begin(dev, &iter);
3537 drm_for_each_connector_iter(connector, &iter)
3538 drm_helper_connector_dpms(connector,
3539 DRM_MODE_DPMS_ON);
3540 drm_connector_list_iter_end(&iter);
3541
4562236b 3542 drm_modeset_unlock_all(dev);
d38ceaf9 3543 }
4d3b9ae5 3544 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3545 }
3546
3547 drm_kms_helper_poll_enable(dev);
23a1a9e5 3548
5e6932fe 3549 amdgpu_ras_resume(adev);
3550
23a1a9e5
L
3551 /*
3552 * Most of the connector probing functions try to acquire runtime pm
3553 * refs to ensure that the GPU is powered on when connector polling is
3554 * performed. Since we're calling this from a runtime PM callback,
3555 * trying to acquire rpm refs will cause us to deadlock.
3556 *
3557 * Since we're guaranteed to be holding the rpm lock, it's safe to
3558 * temporarily disable the rpm helpers so this doesn't deadlock us.
3559 */
3560#ifdef CONFIG_PM
3561 dev->dev->power.disable_depth++;
3562#endif
4562236b
HW
3563 if (!amdgpu_device_has_dc_support(adev))
3564 drm_helper_hpd_irq_event(dev);
3565 else
3566 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3567#ifdef CONFIG_PM
3568 dev->dev->power.disable_depth--;
3569#endif
44779b43
RZ
3570 adev->in_suspend = false;
3571
4d3b9ae5 3572 return 0;
d38ceaf9
AD
3573}
3574
e3ecdffa
AD
3575/**
3576 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3577 *
3578 * @adev: amdgpu_device pointer
3579 *
3580 * The list of all the hardware IPs that make up the asic is walked and
3581 * the check_soft_reset callbacks are run. check_soft_reset determines
3582 * if the asic is still hung or not.
3583 * Returns true if any of the IPs are still in a hung state, false if not.
3584 */
06ec9070 3585static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3586{
3587 int i;
3588 bool asic_hang = false;
3589
f993d628
ML
3590 if (amdgpu_sriov_vf(adev))
3591 return true;
3592
8bc04c29
AD
3593 if (amdgpu_asic_need_full_reset(adev))
3594 return true;
3595
63fbf42f 3596 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3597 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3598 continue;
a1255107
AD
3599 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3600 adev->ip_blocks[i].status.hang =
3601 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3602 if (adev->ip_blocks[i].status.hang) {
3603 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3604 asic_hang = true;
3605 }
3606 }
3607 return asic_hang;
3608}
3609
e3ecdffa
AD
3610/**
3611 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3612 *
3613 * @adev: amdgpu_device pointer
3614 *
3615 * The list of all the hardware IPs that make up the asic is walked and the
3616 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3617 * handles any IP specific hardware or software state changes that are
3618 * necessary for a soft reset to succeed.
3619 * Returns 0 on success, negative error code on failure.
3620 */
06ec9070 3621static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3622{
3623 int i, r = 0;
3624
3625 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3626 if (!adev->ip_blocks[i].status.valid)
d31a501e 3627 continue;
a1255107
AD
3628 if (adev->ip_blocks[i].status.hang &&
3629 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3630 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3631 if (r)
3632 return r;
3633 }
3634 }
3635
3636 return 0;
3637}
3638
e3ecdffa
AD
3639/**
3640 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3641 *
3642 * @adev: amdgpu_device pointer
3643 *
3644 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3645 * reset is necessary to recover.
3646 * Returns true if a full asic reset is required, false if not.
3647 */
06ec9070 3648static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3649{
da146d3b
AD
3650 int i;
3651
8bc04c29
AD
3652 if (amdgpu_asic_need_full_reset(adev))
3653 return true;
3654
da146d3b 3655 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3656 if (!adev->ip_blocks[i].status.valid)
da146d3b 3657 continue;
a1255107
AD
3658 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3659 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3660 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3661 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3662 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3663 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3664 DRM_INFO("Some block need full reset!\n");
3665 return true;
3666 }
3667 }
35d782fe
CZ
3668 }
3669 return false;
3670}
3671
e3ecdffa
AD
3672/**
3673 * amdgpu_device_ip_soft_reset - do a soft reset
3674 *
3675 * @adev: amdgpu_device pointer
3676 *
3677 * The list of all the hardware IPs that make up the asic is walked and the
3678 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3679 * IP specific hardware or software state changes that are necessary to soft
3680 * reset the IP.
3681 * Returns 0 on success, negative error code on failure.
3682 */
06ec9070 3683static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3684{
3685 int i, r = 0;
3686
3687 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3688 if (!adev->ip_blocks[i].status.valid)
35d782fe 3689 continue;
a1255107
AD
3690 if (adev->ip_blocks[i].status.hang &&
3691 adev->ip_blocks[i].version->funcs->soft_reset) {
3692 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3693 if (r)
3694 return r;
3695 }
3696 }
3697
3698 return 0;
3699}
3700
e3ecdffa
AD
3701/**
3702 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3703 *
3704 * @adev: amdgpu_device pointer
3705 *
3706 * The list of all the hardware IPs that make up the asic is walked and the
3707 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3708 * handles any IP specific hardware or software state changes that are
3709 * necessary after the IP has been soft reset.
3710 * Returns 0 on success, negative error code on failure.
3711 */
06ec9070 3712static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3713{
3714 int i, r = 0;
3715
3716 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3717 if (!adev->ip_blocks[i].status.valid)
35d782fe 3718 continue;
a1255107
AD
3719 if (adev->ip_blocks[i].status.hang &&
3720 adev->ip_blocks[i].version->funcs->post_soft_reset)
3721 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3722 if (r)
3723 return r;
3724 }
3725
3726 return 0;
3727}
3728
e3ecdffa 3729/**
c33adbc7 3730 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3731 *
3732 * @adev: amdgpu_device pointer
3733 *
3734 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3735 * restore things like GPUVM page tables after a GPU reset where
3736 * the contents of VRAM might be lost.
403009bf
CK
3737 *
3738 * Returns:
3739 * 0 on success, negative error code on failure.
e3ecdffa 3740 */
c33adbc7 3741static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3742{
c41d1cf6 3743 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3744 struct amdgpu_bo *shadow;
3745 long r = 1, tmo;
c41d1cf6
ML
3746
3747 if (amdgpu_sriov_runtime(adev))
b045d3af 3748 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3749 else
3750 tmo = msecs_to_jiffies(100);
3751
3752 DRM_INFO("recover vram bo from shadow start\n");
3753 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3754 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3755
3756 /* No need to recover an evicted BO */
3757 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3758 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3759 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3760 continue;
3761
3762 r = amdgpu_bo_restore_shadow(shadow, &next);
3763 if (r)
3764 break;
3765
c41d1cf6 3766 if (fence) {
1712fb1a 3767 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3768 dma_fence_put(fence);
3769 fence = next;
1712fb1a 3770 if (tmo == 0) {
3771 r = -ETIMEDOUT;
c41d1cf6 3772 break;
1712fb1a 3773 } else if (tmo < 0) {
3774 r = tmo;
3775 break;
3776 }
403009bf
CK
3777 } else {
3778 fence = next;
c41d1cf6 3779 }
c41d1cf6
ML
3780 }
3781 mutex_unlock(&adev->shadow_list_lock);
3782
403009bf
CK
3783 if (fence)
3784 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3785 dma_fence_put(fence);
3786
1712fb1a 3787 if (r < 0 || tmo <= 0) {
3788 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3789 return -EIO;
3790 }
c41d1cf6 3791
403009bf
CK
3792 DRM_INFO("recover vram bo from shadow done\n");
3793 return 0;
c41d1cf6
ML
3794}
3795
a90ad3c2 3796
e3ecdffa 3797/**
06ec9070 3798 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3799 *
3800 * @adev: amdgpu device pointer
87e3f136 3801 * @from_hypervisor: request from hypervisor
5740682e
ML
3802 *
3803 * do VF FLR and reinitialize Asic
3f48c681 3804 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3805 */
3806static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3807 bool from_hypervisor)
5740682e
ML
3808{
3809 int r;
3810
3811 if (from_hypervisor)
3812 r = amdgpu_virt_request_full_gpu(adev, true);
3813 else
3814 r = amdgpu_virt_reset_gpu(adev);
3815 if (r)
3816 return r;
a90ad3c2 3817
b639c22c
JZ
3818 amdgpu_amdkfd_pre_reset(adev);
3819
a90ad3c2 3820 /* Resume IP prior to SMC */
06ec9070 3821 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3822 if (r)
3823 goto error;
a90ad3c2 3824
c9ffa427 3825 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3826 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3827 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3828
7a3e0bb2
RZ
3829 r = amdgpu_device_fw_loading(adev);
3830 if (r)
3831 return r;
3832
a90ad3c2 3833 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3834 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3835 if (r)
3836 goto error;
a90ad3c2
ML
3837
3838 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3839 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3840 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3841
abc34253
ED
3842error:
3843 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3844 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3845 amdgpu_inc_vram_lost(adev);
c33adbc7 3846 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3847 }
3848
3849 return r;
3850}
3851
12938fad
CK
3852/**
3853 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3854 *
3855 * @adev: amdgpu device pointer
3856 *
3857 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3858 * a hung GPU.
3859 */
3860bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3861{
3862 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3863 DRM_INFO("Timeout, but no hardware hang detected.\n");
3864 return false;
3865 }
3866
3ba7b418
AG
3867 if (amdgpu_gpu_recovery == 0)
3868 goto disabled;
3869
3870 if (amdgpu_sriov_vf(adev))
3871 return true;
3872
3873 if (amdgpu_gpu_recovery == -1) {
3874 switch (adev->asic_type) {
fc42d47c
AG
3875 case CHIP_BONAIRE:
3876 case CHIP_HAWAII:
3ba7b418
AG
3877 case CHIP_TOPAZ:
3878 case CHIP_TONGA:
3879 case CHIP_FIJI:
3880 case CHIP_POLARIS10:
3881 case CHIP_POLARIS11:
3882 case CHIP_POLARIS12:
3883 case CHIP_VEGAM:
3884 case CHIP_VEGA20:
3885 case CHIP_VEGA10:
3886 case CHIP_VEGA12:
c43b849f 3887 case CHIP_RAVEN:
e9d4cf91 3888 case CHIP_ARCTURUS:
2cb44fb0 3889 case CHIP_RENOIR:
658c6639
AD
3890 case CHIP_NAVI10:
3891 case CHIP_NAVI14:
3892 case CHIP_NAVI12:
3ba7b418
AG
3893 break;
3894 default:
3895 goto disabled;
3896 }
12938fad
CK
3897 }
3898
3899 return true;
3ba7b418
AG
3900
3901disabled:
3902 DRM_INFO("GPU recovery disabled.\n");
3903 return false;
12938fad
CK
3904}
3905
5c6dd71e 3906
26bc5340
AG
3907static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3908 struct amdgpu_job *job,
3909 bool *need_full_reset_arg)
3910{
3911 int i, r = 0;
3912 bool need_full_reset = *need_full_reset_arg;
71182665 3913
71182665 3914 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3915 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3916 struct amdgpu_ring *ring = adev->rings[i];
3917
51687759 3918 if (!ring || !ring->sched.thread)
0875dc9e 3919 continue;
5740682e 3920
2f9d4084
ML
3921 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3922 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3923 }
d38ceaf9 3924
222b5f04
AG
3925 if(job)
3926 drm_sched_increase_karma(&job->base);
3927
1d721ed6 3928 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3929 if (!amdgpu_sriov_vf(adev)) {
3930
3931 if (!need_full_reset)
3932 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3933
3934 if (!need_full_reset) {
3935 amdgpu_device_ip_pre_soft_reset(adev);
3936 r = amdgpu_device_ip_soft_reset(adev);
3937 amdgpu_device_ip_post_soft_reset(adev);
3938 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3939 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3940 need_full_reset = true;
3941 }
3942 }
3943
3944 if (need_full_reset)
3945 r = amdgpu_device_ip_suspend(adev);
3946
3947 *need_full_reset_arg = need_full_reset;
3948 }
3949
3950 return r;
3951}
3952
041a62bc 3953static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3954 struct list_head *device_list_handle,
3955 bool *need_full_reset_arg)
3956{
3957 struct amdgpu_device *tmp_adev = NULL;
3958 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3959 int r = 0;
3960
3961 /*
3962 * ASIC reset has to be done on all HGMI hive nodes ASAP
3963 * to allow proper links negotiation in FW (within 1 sec)
3964 */
3965 if (need_full_reset) {
3966 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3967 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3968 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3969 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3970 r = -EALREADY;
3971 } else
3972 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3973
041a62bc
AG
3974 if (r) {
3975 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3976 r, tmp_adev->ddev->unique);
3977 break;
ce316fa5
LM
3978 }
3979 }
3980
041a62bc
AG
3981 /* For XGMI wait for all resets to complete before proceed */
3982 if (!r) {
ce316fa5
LM
3983 list_for_each_entry(tmp_adev, device_list_handle,
3984 gmc.xgmi.head) {
3985 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3986 flush_work(&tmp_adev->xgmi_reset_work);
3987 r = tmp_adev->asic_reset_res;
3988 if (r)
3989 break;
ce316fa5
LM
3990 }
3991 }
3992 }
ce316fa5 3993 }
26bc5340 3994
43c4d576
JC
3995 if (!r && amdgpu_ras_intr_triggered()) {
3996 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3997 if (tmp_adev->mmhub.funcs &&
3998 tmp_adev->mmhub.funcs->reset_ras_error_count)
3999 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4000 }
4001
00eaa571 4002 amdgpu_ras_intr_cleared();
43c4d576 4003 }
00eaa571 4004
26bc5340
AG
4005 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4006 if (need_full_reset) {
4007 /* post card */
4008 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4009 DRM_WARN("asic atom init failed!");
4010
4011 if (!r) {
4012 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4013 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4014 if (r)
4015 goto out;
4016
4017 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4018 if (vram_lost) {
77e7f829 4019 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4020 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4021 }
4022
4023 r = amdgpu_gtt_mgr_recover(
4024 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4025 if (r)
4026 goto out;
4027
4028 r = amdgpu_device_fw_loading(tmp_adev);
4029 if (r)
4030 return r;
4031
4032 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4033 if (r)
4034 goto out;
4035
4036 if (vram_lost)
4037 amdgpu_device_fill_reset_magic(tmp_adev);
4038
fdafb359
EQ
4039 /*
4040 * Add this ASIC as tracked as reset was already
4041 * complete successfully.
4042 */
4043 amdgpu_register_gpu_instance(tmp_adev);
4044
7c04ca50 4045 r = amdgpu_device_ip_late_init(tmp_adev);
4046 if (r)
4047 goto out;
4048
565d1941
EQ
4049 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4050
e79a04d5 4051 /* must succeed. */
511fdbc3 4052 amdgpu_ras_resume(tmp_adev);
e79a04d5 4053
26bc5340
AG
4054 /* Update PSP FW topology after reset */
4055 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4056 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4057 }
4058 }
4059
4060
4061out:
4062 if (!r) {
4063 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4064 r = amdgpu_ib_ring_tests(tmp_adev);
4065 if (r) {
4066 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4067 r = amdgpu_device_ip_suspend(tmp_adev);
4068 need_full_reset = true;
4069 r = -EAGAIN;
4070 goto end;
4071 }
4072 }
4073
4074 if (!r)
4075 r = amdgpu_device_recover_vram(tmp_adev);
4076 else
4077 tmp_adev->asic_reset_res = r;
4078 }
4079
4080end:
4081 *need_full_reset_arg = need_full_reset;
4082 return r;
4083}
4084
1d721ed6 4085static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4086{
1d721ed6
AG
4087 if (trylock) {
4088 if (!mutex_trylock(&adev->lock_reset))
4089 return false;
4090 } else
4091 mutex_lock(&adev->lock_reset);
5740682e 4092
26bc5340 4093 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4094 adev->in_gpu_reset = true;
a3a09142
AD
4095 switch (amdgpu_asic_reset_method(adev)) {
4096 case AMD_RESET_METHOD_MODE1:
4097 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4098 break;
4099 case AMD_RESET_METHOD_MODE2:
4100 adev->mp1_state = PP_MP1_STATE_RESET;
4101 break;
4102 default:
4103 adev->mp1_state = PP_MP1_STATE_NONE;
4104 break;
4105 }
1d721ed6
AG
4106
4107 return true;
26bc5340 4108}
d38ceaf9 4109
26bc5340
AG
4110static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4111{
89041940 4112 amdgpu_vf_error_trans_all(adev);
a3a09142 4113 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4114 adev->in_gpu_reset = false;
13a752e3 4115 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4116}
4117
26bc5340
AG
4118/**
4119 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4120 *
4121 * @adev: amdgpu device pointer
4122 * @job: which job trigger hang
4123 *
4124 * Attempt to reset the GPU if it has hung (all asics).
4125 * Attempt to do soft-reset or full-reset and reinitialize Asic
4126 * Returns 0 for success or an error on failure.
4127 */
4128
4129int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4130 struct amdgpu_job *job)
4131{
1d721ed6
AG
4132 struct list_head device_list, *device_list_handle = NULL;
4133 bool need_full_reset, job_signaled;
26bc5340 4134 struct amdgpu_hive_info *hive = NULL;
26bc5340 4135 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4136 int i, r = 0;
7c6e68c7 4137 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4138 bool use_baco =
4139 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4140 true : false;
26bc5340 4141
d5ea093e
AG
4142 /*
4143 * Flush RAM to disk so that after reboot
4144 * the user can read log and see why the system rebooted.
4145 */
b823821f 4146 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4147
4148 DRM_WARN("Emergency reboot.");
4149
4150 ksys_sync_helper();
4151 emergency_restart();
4152 }
4153
1d721ed6 4154 need_full_reset = job_signaled = false;
26bc5340
AG
4155 INIT_LIST_HEAD(&device_list);
4156
61380faa
JC
4157 amdgpu_ras_set_error_query_ready(adev, false);
4158
b823821f
LM
4159 dev_info(adev->dev, "GPU %s begin!\n",
4160 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4161
beff74bc 4162 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4163
1d721ed6
AG
4164 hive = amdgpu_get_xgmi_hive(adev, false);
4165
26bc5340 4166 /*
1d721ed6
AG
4167 * Here we trylock to avoid chain of resets executing from
4168 * either trigger by jobs on different adevs in XGMI hive or jobs on
4169 * different schedulers for same device while this TO handler is running.
4170 * We always reset all schedulers for device and all devices for XGMI
4171 * hive so that should take care of them too.
26bc5340 4172 */
1d721ed6
AG
4173
4174 if (hive && !mutex_trylock(&hive->reset_lock)) {
4175 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4176 job ? job->base.id : -1, hive->hive_id);
26bc5340 4177 return 0;
1d721ed6 4178 }
26bc5340
AG
4179
4180 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4181 if (!amdgpu_device_lock_adev(adev, !hive)) {
4182 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4183 job ? job->base.id : -1);
1d721ed6 4184 return 0;
26bc5340
AG
4185 }
4186
7c6e68c7
AG
4187 /* Block kfd: SRIOV would do it separately */
4188 if (!amdgpu_sriov_vf(adev))
4189 amdgpu_amdkfd_pre_reset(adev);
4190
26bc5340 4191 /* Build list of devices to reset */
1d721ed6 4192 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4193 if (!hive) {
7c6e68c7
AG
4194 /*unlock kfd: SRIOV would do it separately */
4195 if (!amdgpu_sriov_vf(adev))
4196 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4197 amdgpu_device_unlock_adev(adev);
4198 return -ENODEV;
4199 }
4200
4201 /*
4202 * In case we are in XGMI hive mode device reset is done for all the
4203 * nodes in the hive to retrain all XGMI links and hence the reset
4204 * sequence is executed in loop on all nodes.
4205 */
4206 device_list_handle = &hive->device_list;
4207 } else {
4208 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4209 device_list_handle = &device_list;
4210 }
4211
1d721ed6
AG
4212 /* block all schedulers and reset given job's ring */
4213 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4214 if (tmp_adev != adev) {
61380faa 4215 amdgpu_ras_set_error_query_ready(tmp_adev, false);
12ffa55d 4216 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4217 if (!amdgpu_sriov_vf(tmp_adev))
4218 amdgpu_amdkfd_pre_reset(tmp_adev);
4219 }
4220
12ffa55d
AG
4221 /*
4222 * Mark these ASICs to be reseted as untracked first
4223 * And add them back after reset completed
4224 */
4225 amdgpu_unregister_gpu_instance(tmp_adev);
4226
a2f63ee8 4227 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4228
f1c1314b 4229 /* disable ras on ALL IPs */
b823821f
LM
4230 if (!(in_ras_intr && !use_baco) &&
4231 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4232 amdgpu_ras_suspend(tmp_adev);
4233
1d721ed6
AG
4234 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4235 struct amdgpu_ring *ring = tmp_adev->rings[i];
4236
4237 if (!ring || !ring->sched.thread)
4238 continue;
4239
0b2d2c2e 4240 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4241
b823821f 4242 if (in_ras_intr && !use_baco)
7c6e68c7 4243 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4244 }
4245 }
4246
4247
b823821f 4248 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4249 goto skip_sched_resume;
4250
1d721ed6
AG
4251 /*
4252 * Must check guilty signal here since after this point all old
4253 * HW fences are force signaled.
4254 *
4255 * job->base holds a reference to parent fence
4256 */
4257 if (job && job->base.s_fence->parent &&
4258 dma_fence_is_signaled(job->base.s_fence->parent))
4259 job_signaled = true;
4260
1d721ed6
AG
4261 if (job_signaled) {
4262 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4263 goto skip_hw_reset;
4264 }
4265
4266
4267 /* Guilty job will be freed after this*/
0b2d2c2e 4268 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4269 if (r) {
4270 /*TODO Should we stop ?*/
4271 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4272 r, adev->ddev->unique);
4273 adev->asic_reset_res = r;
4274 }
4275
26bc5340
AG
4276retry: /* Rest of adevs pre asic reset from XGMI hive. */
4277 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4278
4279 if (tmp_adev == adev)
4280 continue;
4281
26bc5340
AG
4282 r = amdgpu_device_pre_asic_reset(tmp_adev,
4283 NULL,
4284 &need_full_reset);
4285 /*TODO Should we stop ?*/
4286 if (r) {
4287 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4288 r, tmp_adev->ddev->unique);
4289 tmp_adev->asic_reset_res = r;
4290 }
4291 }
4292
4293 /* Actual ASIC resets if needed.*/
4294 /* TODO Implement XGMI hive reset logic for SRIOV */
4295 if (amdgpu_sriov_vf(adev)) {
4296 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4297 if (r)
4298 adev->asic_reset_res = r;
4299 } else {
041a62bc 4300 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4301 if (r && r == -EAGAIN)
4302 goto retry;
4303 }
4304
1d721ed6
AG
4305skip_hw_reset:
4306
26bc5340
AG
4307 /* Post ASIC reset for all devs .*/
4308 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4309
1d721ed6
AG
4310 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4311 struct amdgpu_ring *ring = tmp_adev->rings[i];
4312
4313 if (!ring || !ring->sched.thread)
4314 continue;
4315
4316 /* No point to resubmit jobs if we didn't HW reset*/
4317 if (!tmp_adev->asic_reset_res && !job_signaled)
4318 drm_sched_resubmit_jobs(&ring->sched);
4319
4320 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4321 }
4322
4323 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4324 drm_helper_resume_force_mode(tmp_adev->ddev);
4325 }
4326
4327 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4328
4329 if (r) {
4330 /* bad news, how to tell it to userspace ? */
12ffa55d 4331 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4332 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4333 } else {
12ffa55d 4334 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4335 }
7c6e68c7 4336 }
26bc5340 4337
7c6e68c7
AG
4338skip_sched_resume:
4339 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4340 /*unlock kfd: SRIOV would do it separately */
b823821f 4341 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4342 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4343 amdgpu_device_unlock_adev(tmp_adev);
4344 }
4345
1d721ed6 4346 if (hive)
22d6575b 4347 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4348
4349 if (r)
4350 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4351 return r;
4352}
4353
e3ecdffa
AD
4354/**
4355 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4356 *
4357 * @adev: amdgpu_device pointer
4358 *
4359 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4360 * and lanes) of the slot the device is in. Handles APUs and
4361 * virtualized environments where PCIE config space may not be available.
4362 */
5494d864 4363static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4364{
5d9a6330 4365 struct pci_dev *pdev;
c5313457
HK
4366 enum pci_bus_speed speed_cap, platform_speed_cap;
4367 enum pcie_link_width platform_link_width;
d0dd7f0c 4368
cd474ba0
AD
4369 if (amdgpu_pcie_gen_cap)
4370 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4371
cd474ba0
AD
4372 if (amdgpu_pcie_lane_cap)
4373 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4374
cd474ba0
AD
4375 /* covers APUs as well */
4376 if (pci_is_root_bus(adev->pdev->bus)) {
4377 if (adev->pm.pcie_gen_mask == 0)
4378 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4379 if (adev->pm.pcie_mlw_mask == 0)
4380 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4381 return;
cd474ba0 4382 }
d0dd7f0c 4383
c5313457
HK
4384 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4385 return;
4386
dbaa922b
AD
4387 pcie_bandwidth_available(adev->pdev, NULL,
4388 &platform_speed_cap, &platform_link_width);
c5313457 4389
cd474ba0 4390 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4391 /* asic caps */
4392 pdev = adev->pdev;
4393 speed_cap = pcie_get_speed_cap(pdev);
4394 if (speed_cap == PCI_SPEED_UNKNOWN) {
4395 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4396 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4397 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4398 } else {
5d9a6330
AD
4399 if (speed_cap == PCIE_SPEED_16_0GT)
4400 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4401 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4402 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4403 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4404 else if (speed_cap == PCIE_SPEED_8_0GT)
4405 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4406 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4407 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4408 else if (speed_cap == PCIE_SPEED_5_0GT)
4409 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4410 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4411 else
4412 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4413 }
4414 /* platform caps */
c5313457 4415 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4416 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4417 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4418 } else {
c5313457 4419 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4420 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4421 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4422 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4423 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4424 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4425 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4426 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4427 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4428 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4429 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4430 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4431 else
4432 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4433
cd474ba0
AD
4434 }
4435 }
4436 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4437 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4438 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4439 } else {
c5313457 4440 switch (platform_link_width) {
5d9a6330 4441 case PCIE_LNK_X32:
cd474ba0
AD
4442 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4443 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4444 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4445 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4446 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4447 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4448 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4449 break;
5d9a6330 4450 case PCIE_LNK_X16:
cd474ba0
AD
4451 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4452 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4453 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4454 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4455 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4456 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4457 break;
5d9a6330 4458 case PCIE_LNK_X12:
cd474ba0
AD
4459 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4460 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4461 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4462 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4463 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4464 break;
5d9a6330 4465 case PCIE_LNK_X8:
cd474ba0
AD
4466 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4467 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4468 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4469 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4470 break;
5d9a6330 4471 case PCIE_LNK_X4:
cd474ba0
AD
4472 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4473 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4474 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4475 break;
5d9a6330 4476 case PCIE_LNK_X2:
cd474ba0
AD
4477 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4478 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4479 break;
5d9a6330 4480 case PCIE_LNK_X1:
cd474ba0
AD
4481 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4482 break;
4483 default:
4484 break;
4485 }
d0dd7f0c
AD
4486 }
4487 }
4488}
d38ceaf9 4489
361dbd01
AD
4490int amdgpu_device_baco_enter(struct drm_device *dev)
4491{
4492 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4493 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4494
4495 if (!amdgpu_device_supports_baco(adev->ddev))
4496 return -ENOTSUPP;
4497
7a22677b
LM
4498 if (ras && ras->supported)
4499 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4500
9530273e 4501 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4502}
4503
4504int amdgpu_device_baco_exit(struct drm_device *dev)
4505{
4506 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4507 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4508 int ret = 0;
361dbd01
AD
4509
4510 if (!amdgpu_device_supports_baco(adev->ddev))
4511 return -ENOTSUPP;
4512
9530273e
EQ
4513 ret = amdgpu_dpm_baco_exit(adev);
4514 if (ret)
4515 return ret;
7a22677b
LM
4516
4517 if (ras && ras->supported)
4518 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4519
4520 return 0;
361dbd01 4521}