Merge tag 'pmdomain-v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 #include "amdgpu_dev_coredump.h"
78
79 #include <linux/suspend.h>
80 #include <drm/task_barrier.h>
81 #include <linux/pm_runtime.h>
82
83 #include <drm/drm_drv.h>
84
85 #if IS_ENABLED(CONFIG_X86)
86 #include <asm/intel-family.h>
87 #endif
88
89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96
97 #define AMDGPU_RESUME_MS                2000
98 #define AMDGPU_MAX_RETRY_LIMIT          2
99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
103
104 static const struct drm_driver amdgpu_kms_driver;
105
106 const char *amdgpu_asic_name[] = {
107         "TAHITI",
108         "PITCAIRN",
109         "VERDE",
110         "OLAND",
111         "HAINAN",
112         "BONAIRE",
113         "KAVERI",
114         "KABINI",
115         "HAWAII",
116         "MULLINS",
117         "TOPAZ",
118         "TONGA",
119         "FIJI",
120         "CARRIZO",
121         "STONEY",
122         "POLARIS10",
123         "POLARIS11",
124         "POLARIS12",
125         "VEGAM",
126         "VEGA10",
127         "VEGA12",
128         "VEGA20",
129         "RAVEN",
130         "ARCTURUS",
131         "RENOIR",
132         "ALDEBARAN",
133         "NAVI10",
134         "CYAN_SKILLFISH",
135         "NAVI14",
136         "NAVI12",
137         "SIENNA_CICHLID",
138         "NAVY_FLOUNDER",
139         "VANGOGH",
140         "DIMGREY_CAVEFISH",
141         "BEIGE_GOBY",
142         "YELLOW_CARP",
143         "IP DISCOVERY",
144         "LAST",
145 };
146
147 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148
149 /**
150  * DOC: pcie_replay_count
151  *
152  * The amdgpu driver provides a sysfs API for reporting the total number
153  * of PCIe replays (NAKs)
154  * The file pcie_replay_count is used for this and returns the total
155  * number of replays as a sum of the NAKs generated and NAKs received
156  */
157
158 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159                 struct device_attribute *attr, char *buf)
160 {
161         struct drm_device *ddev = dev_get_drvdata(dev);
162         struct amdgpu_device *adev = drm_to_adev(ddev);
163         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164
165         return sysfs_emit(buf, "%llu\n", cnt);
166 }
167
168 static DEVICE_ATTR(pcie_replay_count, 0444,
169                 amdgpu_device_get_pcie_replay_count, NULL);
170
171 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
172                                           struct bin_attribute *attr, char *buf,
173                                           loff_t ppos, size_t count)
174 {
175         struct device *dev = kobj_to_dev(kobj);
176         struct drm_device *ddev = dev_get_drvdata(dev);
177         struct amdgpu_device *adev = drm_to_adev(ddev);
178         ssize_t bytes_read;
179
180         switch (ppos) {
181         case AMDGPU_SYS_REG_STATE_XGMI:
182                 bytes_read = amdgpu_asic_get_reg_state(
183                         adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
184                 break;
185         case AMDGPU_SYS_REG_STATE_WAFL:
186                 bytes_read = amdgpu_asic_get_reg_state(
187                         adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
188                 break;
189         case AMDGPU_SYS_REG_STATE_PCIE:
190                 bytes_read = amdgpu_asic_get_reg_state(
191                         adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
192                 break;
193         case AMDGPU_SYS_REG_STATE_USR:
194                 bytes_read = amdgpu_asic_get_reg_state(
195                         adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
196                 break;
197         case AMDGPU_SYS_REG_STATE_USR_1:
198                 bytes_read = amdgpu_asic_get_reg_state(
199                         adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
200                 break;
201         default:
202                 return -EINVAL;
203         }
204
205         return bytes_read;
206 }
207
208 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
209          AMDGPU_SYS_REG_STATE_END);
210
211 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
212 {
213         int ret;
214
215         if (!amdgpu_asic_get_reg_state_supported(adev))
216                 return 0;
217
218         ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
219
220         return ret;
221 }
222
223 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
224 {
225         if (!amdgpu_asic_get_reg_state_supported(adev))
226                 return;
227         sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
228 }
229
230 /**
231  * DOC: board_info
232  *
233  * The amdgpu driver provides a sysfs API for giving board related information.
234  * It provides the form factor information in the format
235  *
236  *   type : form factor
237  *
238  * Possible form factor values
239  *
240  * - "cem"              - PCIE CEM card
241  * - "oam"              - Open Compute Accelerator Module
242  * - "unknown"  - Not known
243  *
244  */
245
246 static ssize_t amdgpu_device_get_board_info(struct device *dev,
247                                             struct device_attribute *attr,
248                                             char *buf)
249 {
250         struct drm_device *ddev = dev_get_drvdata(dev);
251         struct amdgpu_device *adev = drm_to_adev(ddev);
252         enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
253         const char *pkg;
254
255         if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
256                 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
257
258         switch (pkg_type) {
259         case AMDGPU_PKG_TYPE_CEM:
260                 pkg = "cem";
261                 break;
262         case AMDGPU_PKG_TYPE_OAM:
263                 pkg = "oam";
264                 break;
265         default:
266                 pkg = "unknown";
267                 break;
268         }
269
270         return sysfs_emit(buf, "%s : %s\n", "type", pkg);
271 }
272
273 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
274
275 static struct attribute *amdgpu_board_attrs[] = {
276         &dev_attr_board_info.attr,
277         NULL,
278 };
279
280 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
281                                              struct attribute *attr, int n)
282 {
283         struct device *dev = kobj_to_dev(kobj);
284         struct drm_device *ddev = dev_get_drvdata(dev);
285         struct amdgpu_device *adev = drm_to_adev(ddev);
286
287         if (adev->flags & AMD_IS_APU)
288                 return 0;
289
290         return attr->mode;
291 }
292
293 static const struct attribute_group amdgpu_board_attrs_group = {
294         .attrs = amdgpu_board_attrs,
295         .is_visible = amdgpu_board_attrs_is_visible
296 };
297
298 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
299
300
301 /**
302  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
303  *
304  * @dev: drm_device pointer
305  *
306  * Returns true if the device is a dGPU with ATPX power control,
307  * otherwise return false.
308  */
309 bool amdgpu_device_supports_px(struct drm_device *dev)
310 {
311         struct amdgpu_device *adev = drm_to_adev(dev);
312
313         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
314                 return true;
315         return false;
316 }
317
318 /**
319  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
320  *
321  * @dev: drm_device pointer
322  *
323  * Returns true if the device is a dGPU with ACPI power control,
324  * otherwise return false.
325  */
326 bool amdgpu_device_supports_boco(struct drm_device *dev)
327 {
328         struct amdgpu_device *adev = drm_to_adev(dev);
329
330         if (adev->has_pr3 ||
331             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
332                 return true;
333         return false;
334 }
335
336 /**
337  * amdgpu_device_supports_baco - Does the device support BACO
338  *
339  * @dev: drm_device pointer
340  *
341  * Return:
342  * 1 if the device supporte BACO;
343  * 3 if the device support MACO (only works if BACO is supported)
344  * otherwise return 0.
345  */
346 int amdgpu_device_supports_baco(struct drm_device *dev)
347 {
348         struct amdgpu_device *adev = drm_to_adev(dev);
349
350         return amdgpu_asic_supports_baco(adev);
351 }
352
353 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
354 {
355         struct drm_device *dev;
356         int bamaco_support;
357
358         dev = adev_to_drm(adev);
359
360         adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
361         bamaco_support = amdgpu_device_supports_baco(dev);
362
363         switch (amdgpu_runtime_pm) {
364         case 2:
365                 if (bamaco_support & MACO_SUPPORT) {
366                         adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
367                         dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
368                 } else if (bamaco_support == BACO_SUPPORT) {
369                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
370                         dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
371                 }
372                 break;
373         case 1:
374                 if (bamaco_support & BACO_SUPPORT) {
375                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
376                         dev_info(adev->dev, "Forcing BACO for runtime pm\n");
377                 }
378                 break;
379         case -1:
380         case -2:
381                 if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
382                         adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
383                         dev_info(adev->dev, "Using ATPX for runtime pm\n");
384                 } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
385                         adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
386                         dev_info(adev->dev, "Using BOCO for runtime pm\n");
387                 } else {
388                         if (!bamaco_support)
389                                 goto no_runtime_pm;
390
391                         switch (adev->asic_type) {
392                         case CHIP_VEGA20:
393                         case CHIP_ARCTURUS:
394                                 /* BACO are not supported on vega20 and arctrus */
395                                 break;
396                         case CHIP_VEGA10:
397                                 /* enable BACO as runpm mode if noretry=0 */
398                                 if (!adev->gmc.noretry)
399                                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
400                                 break;
401                         default:
402                                 /* enable BACO as runpm mode on CI+ */
403                                 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
404                                 break;
405                         }
406
407                         if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
408                                 if (bamaco_support & MACO_SUPPORT) {
409                                         adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
410                                         dev_info(adev->dev, "Using BAMACO for runtime pm\n");
411                                 } else {
412                                         dev_info(adev->dev, "Using BACO for runtime pm\n");
413                                 }
414                         }
415                 }
416                 break;
417         case 0:
418                 dev_info(adev->dev, "runtime pm is manually disabled\n");
419                 break;
420         default:
421                 break;
422         }
423
424 no_runtime_pm:
425         if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
426                 dev_info(adev->dev, "Runtime PM not available\n");
427 }
428 /**
429  * amdgpu_device_supports_smart_shift - Is the device dGPU with
430  * smart shift support
431  *
432  * @dev: drm_device pointer
433  *
434  * Returns true if the device is a dGPU with Smart Shift support,
435  * otherwise returns false.
436  */
437 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
438 {
439         return (amdgpu_device_supports_boco(dev) &&
440                 amdgpu_acpi_is_power_shift_control_supported());
441 }
442
443 /*
444  * VRAM access helper functions
445  */
446
447 /**
448  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
449  *
450  * @adev: amdgpu_device pointer
451  * @pos: offset of the buffer in vram
452  * @buf: virtual address of the buffer in system memory
453  * @size: read/write size, sizeof(@buf) must > @size
454  * @write: true - write to vram, otherwise - read from vram
455  */
456 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
457                              void *buf, size_t size, bool write)
458 {
459         unsigned long flags;
460         uint32_t hi = ~0, tmp = 0;
461         uint32_t *data = buf;
462         uint64_t last;
463         int idx;
464
465         if (!drm_dev_enter(adev_to_drm(adev), &idx))
466                 return;
467
468         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
469
470         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
471         for (last = pos + size; pos < last; pos += 4) {
472                 tmp = pos >> 31;
473
474                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
475                 if (tmp != hi) {
476                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
477                         hi = tmp;
478                 }
479                 if (write)
480                         WREG32_NO_KIQ(mmMM_DATA, *data++);
481                 else
482                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
483         }
484
485         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
486         drm_dev_exit(idx);
487 }
488
489 /**
490  * amdgpu_device_aper_access - access vram by vram aperature
491  *
492  * @adev: amdgpu_device pointer
493  * @pos: offset of the buffer in vram
494  * @buf: virtual address of the buffer in system memory
495  * @size: read/write size, sizeof(@buf) must > @size
496  * @write: true - write to vram, otherwise - read from vram
497  *
498  * The return value means how many bytes have been transferred.
499  */
500 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
501                                  void *buf, size_t size, bool write)
502 {
503 #ifdef CONFIG_64BIT
504         void __iomem *addr;
505         size_t count = 0;
506         uint64_t last;
507
508         if (!adev->mman.aper_base_kaddr)
509                 return 0;
510
511         last = min(pos + size, adev->gmc.visible_vram_size);
512         if (last > pos) {
513                 addr = adev->mman.aper_base_kaddr + pos;
514                 count = last - pos;
515
516                 if (write) {
517                         memcpy_toio(addr, buf, count);
518                         /* Make sure HDP write cache flush happens without any reordering
519                          * after the system memory contents are sent over PCIe device
520                          */
521                         mb();
522                         amdgpu_device_flush_hdp(adev, NULL);
523                 } else {
524                         amdgpu_device_invalidate_hdp(adev, NULL);
525                         /* Make sure HDP read cache is invalidated before issuing a read
526                          * to the PCIe device
527                          */
528                         mb();
529                         memcpy_fromio(buf, addr, count);
530                 }
531
532         }
533
534         return count;
535 #else
536         return 0;
537 #endif
538 }
539
540 /**
541  * amdgpu_device_vram_access - read/write a buffer in vram
542  *
543  * @adev: amdgpu_device pointer
544  * @pos: offset of the buffer in vram
545  * @buf: virtual address of the buffer in system memory
546  * @size: read/write size, sizeof(@buf) must > @size
547  * @write: true - write to vram, otherwise - read from vram
548  */
549 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
550                                void *buf, size_t size, bool write)
551 {
552         size_t count;
553
554         /* try to using vram apreature to access vram first */
555         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
556         size -= count;
557         if (size) {
558                 /* using MM to access rest vram */
559                 pos += count;
560                 buf += count;
561                 amdgpu_device_mm_access(adev, pos, buf, size, write);
562         }
563 }
564
565 /*
566  * register access helper functions.
567  */
568
569 /* Check if hw access should be skipped because of hotplug or device error */
570 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
571 {
572         if (adev->no_hw_access)
573                 return true;
574
575 #ifdef CONFIG_LOCKDEP
576         /*
577          * This is a bit complicated to understand, so worth a comment. What we assert
578          * here is that the GPU reset is not running on another thread in parallel.
579          *
580          * For this we trylock the read side of the reset semaphore, if that succeeds
581          * we know that the reset is not running in paralell.
582          *
583          * If the trylock fails we assert that we are either already holding the read
584          * side of the lock or are the reset thread itself and hold the write side of
585          * the lock.
586          */
587         if (in_task()) {
588                 if (down_read_trylock(&adev->reset_domain->sem))
589                         up_read(&adev->reset_domain->sem);
590                 else
591                         lockdep_assert_held(&adev->reset_domain->sem);
592         }
593 #endif
594         return false;
595 }
596
597 /**
598  * amdgpu_device_rreg - read a memory mapped IO or indirect register
599  *
600  * @adev: amdgpu_device pointer
601  * @reg: dword aligned register offset
602  * @acc_flags: access flags which require special behavior
603  *
604  * Returns the 32 bit value from the offset specified.
605  */
606 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
607                             uint32_t reg, uint32_t acc_flags)
608 {
609         uint32_t ret;
610
611         if (amdgpu_device_skip_hw_access(adev))
612                 return 0;
613
614         if ((reg * 4) < adev->rmmio_size) {
615                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
616                     amdgpu_sriov_runtime(adev) &&
617                     down_read_trylock(&adev->reset_domain->sem)) {
618                         ret = amdgpu_kiq_rreg(adev, reg, 0);
619                         up_read(&adev->reset_domain->sem);
620                 } else {
621                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
622                 }
623         } else {
624                 ret = adev->pcie_rreg(adev, reg * 4);
625         }
626
627         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
628
629         return ret;
630 }
631
632 /*
633  * MMIO register read with bytes helper functions
634  * @offset:bytes offset from MMIO start
635  */
636
637 /**
638  * amdgpu_mm_rreg8 - read a memory mapped IO register
639  *
640  * @adev: amdgpu_device pointer
641  * @offset: byte aligned register offset
642  *
643  * Returns the 8 bit value from the offset specified.
644  */
645 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
646 {
647         if (amdgpu_device_skip_hw_access(adev))
648                 return 0;
649
650         if (offset < adev->rmmio_size)
651                 return (readb(adev->rmmio + offset));
652         BUG();
653 }
654
655
656 /**
657  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
658  *
659  * @adev: amdgpu_device pointer
660  * @reg: dword aligned register offset
661  * @acc_flags: access flags which require special behavior
662  * @xcc_id: xcc accelerated compute core id
663  *
664  * Returns the 32 bit value from the offset specified.
665  */
666 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
667                                 uint32_t reg, uint32_t acc_flags,
668                                 uint32_t xcc_id)
669 {
670         uint32_t ret, rlcg_flag;
671
672         if (amdgpu_device_skip_hw_access(adev))
673                 return 0;
674
675         if ((reg * 4) < adev->rmmio_size) {
676                 if (amdgpu_sriov_vf(adev) &&
677                     !amdgpu_sriov_runtime(adev) &&
678                     adev->gfx.rlc.rlcg_reg_access_supported &&
679                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
680                                                          GC_HWIP, false,
681                                                          &rlcg_flag)) {
682                         ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
683                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
684                     amdgpu_sriov_runtime(adev) &&
685                     down_read_trylock(&adev->reset_domain->sem)) {
686                         ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
687                         up_read(&adev->reset_domain->sem);
688                 } else {
689                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
690                 }
691         } else {
692                 ret = adev->pcie_rreg(adev, reg * 4);
693         }
694
695         return ret;
696 }
697
698 /*
699  * MMIO register write with bytes helper functions
700  * @offset:bytes offset from MMIO start
701  * @value: the value want to be written to the register
702  */
703
704 /**
705  * amdgpu_mm_wreg8 - read a memory mapped IO register
706  *
707  * @adev: amdgpu_device pointer
708  * @offset: byte aligned register offset
709  * @value: 8 bit value to write
710  *
711  * Writes the value specified to the offset specified.
712  */
713 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
714 {
715         if (amdgpu_device_skip_hw_access(adev))
716                 return;
717
718         if (offset < adev->rmmio_size)
719                 writeb(value, adev->rmmio + offset);
720         else
721                 BUG();
722 }
723
724 /**
725  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
726  *
727  * @adev: amdgpu_device pointer
728  * @reg: dword aligned register offset
729  * @v: 32 bit value to write to the register
730  * @acc_flags: access flags which require special behavior
731  *
732  * Writes the value specified to the offset specified.
733  */
734 void amdgpu_device_wreg(struct amdgpu_device *adev,
735                         uint32_t reg, uint32_t v,
736                         uint32_t acc_flags)
737 {
738         if (amdgpu_device_skip_hw_access(adev))
739                 return;
740
741         if ((reg * 4) < adev->rmmio_size) {
742                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
743                     amdgpu_sriov_runtime(adev) &&
744                     down_read_trylock(&adev->reset_domain->sem)) {
745                         amdgpu_kiq_wreg(adev, reg, v, 0);
746                         up_read(&adev->reset_domain->sem);
747                 } else {
748                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
749                 }
750         } else {
751                 adev->pcie_wreg(adev, reg * 4, v);
752         }
753
754         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
755 }
756
757 /**
758  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
759  *
760  * @adev: amdgpu_device pointer
761  * @reg: mmio/rlc register
762  * @v: value to write
763  * @xcc_id: xcc accelerated compute core id
764  *
765  * this function is invoked only for the debugfs register access
766  */
767 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
768                              uint32_t reg, uint32_t v,
769                              uint32_t xcc_id)
770 {
771         if (amdgpu_device_skip_hw_access(adev))
772                 return;
773
774         if (amdgpu_sriov_fullaccess(adev) &&
775             adev->gfx.rlc.funcs &&
776             adev->gfx.rlc.funcs->is_rlcg_access_range) {
777                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
778                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
779         } else if ((reg * 4) >= adev->rmmio_size) {
780                 adev->pcie_wreg(adev, reg * 4, v);
781         } else {
782                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
783         }
784 }
785
786 /**
787  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
788  *
789  * @adev: amdgpu_device pointer
790  * @reg: dword aligned register offset
791  * @v: 32 bit value to write to the register
792  * @acc_flags: access flags which require special behavior
793  * @xcc_id: xcc accelerated compute core id
794  *
795  * Writes the value specified to the offset specified.
796  */
797 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
798                         uint32_t reg, uint32_t v,
799                         uint32_t acc_flags, uint32_t xcc_id)
800 {
801         uint32_t rlcg_flag;
802
803         if (amdgpu_device_skip_hw_access(adev))
804                 return;
805
806         if ((reg * 4) < adev->rmmio_size) {
807                 if (amdgpu_sriov_vf(adev) &&
808                     !amdgpu_sriov_runtime(adev) &&
809                     adev->gfx.rlc.rlcg_reg_access_supported &&
810                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
811                                                          GC_HWIP, true,
812                                                          &rlcg_flag)) {
813                         amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
814                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
815                     amdgpu_sriov_runtime(adev) &&
816                     down_read_trylock(&adev->reset_domain->sem)) {
817                         amdgpu_kiq_wreg(adev, reg, v, xcc_id);
818                         up_read(&adev->reset_domain->sem);
819                 } else {
820                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
821                 }
822         } else {
823                 adev->pcie_wreg(adev, reg * 4, v);
824         }
825 }
826
827 /**
828  * amdgpu_device_indirect_rreg - read an indirect register
829  *
830  * @adev: amdgpu_device pointer
831  * @reg_addr: indirect register address to read from
832  *
833  * Returns the value of indirect register @reg_addr
834  */
835 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
836                                 u32 reg_addr)
837 {
838         unsigned long flags, pcie_index, pcie_data;
839         void __iomem *pcie_index_offset;
840         void __iomem *pcie_data_offset;
841         u32 r;
842
843         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
844         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
845
846         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
847         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
848         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
849
850         writel(reg_addr, pcie_index_offset);
851         readl(pcie_index_offset);
852         r = readl(pcie_data_offset);
853         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
854
855         return r;
856 }
857
858 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
859                                     u64 reg_addr)
860 {
861         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
862         u32 r;
863         void __iomem *pcie_index_offset;
864         void __iomem *pcie_index_hi_offset;
865         void __iomem *pcie_data_offset;
866
867         if (unlikely(!adev->nbio.funcs)) {
868                 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
869                 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
870         } else {
871                 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
872                 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
873         }
874
875         if (reg_addr >> 32) {
876                 if (unlikely(!adev->nbio.funcs))
877                         pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
878                 else
879                         pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
880         } else {
881                 pcie_index_hi = 0;
882         }
883
884         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
885         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
886         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
887         if (pcie_index_hi != 0)
888                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
889                                 pcie_index_hi * 4;
890
891         writel(reg_addr, pcie_index_offset);
892         readl(pcie_index_offset);
893         if (pcie_index_hi != 0) {
894                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
895                 readl(pcie_index_hi_offset);
896         }
897         r = readl(pcie_data_offset);
898
899         /* clear the high bits */
900         if (pcie_index_hi != 0) {
901                 writel(0, pcie_index_hi_offset);
902                 readl(pcie_index_hi_offset);
903         }
904
905         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
906
907         return r;
908 }
909
910 /**
911  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
912  *
913  * @adev: amdgpu_device pointer
914  * @reg_addr: indirect register address to read from
915  *
916  * Returns the value of indirect register @reg_addr
917  */
918 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
919                                   u32 reg_addr)
920 {
921         unsigned long flags, pcie_index, pcie_data;
922         void __iomem *pcie_index_offset;
923         void __iomem *pcie_data_offset;
924         u64 r;
925
926         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
927         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
928
929         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
930         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
931         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
932
933         /* read low 32 bits */
934         writel(reg_addr, pcie_index_offset);
935         readl(pcie_index_offset);
936         r = readl(pcie_data_offset);
937         /* read high 32 bits */
938         writel(reg_addr + 4, pcie_index_offset);
939         readl(pcie_index_offset);
940         r |= ((u64)readl(pcie_data_offset) << 32);
941         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
942
943         return r;
944 }
945
946 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
947                                   u64 reg_addr)
948 {
949         unsigned long flags, pcie_index, pcie_data;
950         unsigned long pcie_index_hi = 0;
951         void __iomem *pcie_index_offset;
952         void __iomem *pcie_index_hi_offset;
953         void __iomem *pcie_data_offset;
954         u64 r;
955
956         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
957         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
958         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
959                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
960
961         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
962         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
963         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
964         if (pcie_index_hi != 0)
965                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
966                         pcie_index_hi * 4;
967
968         /* read low 32 bits */
969         writel(reg_addr, pcie_index_offset);
970         readl(pcie_index_offset);
971         if (pcie_index_hi != 0) {
972                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
973                 readl(pcie_index_hi_offset);
974         }
975         r = readl(pcie_data_offset);
976         /* read high 32 bits */
977         writel(reg_addr + 4, pcie_index_offset);
978         readl(pcie_index_offset);
979         if (pcie_index_hi != 0) {
980                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
981                 readl(pcie_index_hi_offset);
982         }
983         r |= ((u64)readl(pcie_data_offset) << 32);
984
985         /* clear the high bits */
986         if (pcie_index_hi != 0) {
987                 writel(0, pcie_index_hi_offset);
988                 readl(pcie_index_hi_offset);
989         }
990
991         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
992
993         return r;
994 }
995
996 /**
997  * amdgpu_device_indirect_wreg - write an indirect register address
998  *
999  * @adev: amdgpu_device pointer
1000  * @reg_addr: indirect register offset
1001  * @reg_data: indirect register data
1002  *
1003  */
1004 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1005                                  u32 reg_addr, u32 reg_data)
1006 {
1007         unsigned long flags, pcie_index, pcie_data;
1008         void __iomem *pcie_index_offset;
1009         void __iomem *pcie_data_offset;
1010
1011         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1012         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1013
1014         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1015         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1016         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1017
1018         writel(reg_addr, pcie_index_offset);
1019         readl(pcie_index_offset);
1020         writel(reg_data, pcie_data_offset);
1021         readl(pcie_data_offset);
1022         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1023 }
1024
1025 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1026                                      u64 reg_addr, u32 reg_data)
1027 {
1028         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1029         void __iomem *pcie_index_offset;
1030         void __iomem *pcie_index_hi_offset;
1031         void __iomem *pcie_data_offset;
1032
1033         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1034         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1035         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1036                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1037         else
1038                 pcie_index_hi = 0;
1039
1040         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1041         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1042         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1043         if (pcie_index_hi != 0)
1044                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1045                                 pcie_index_hi * 4;
1046
1047         writel(reg_addr, pcie_index_offset);
1048         readl(pcie_index_offset);
1049         if (pcie_index_hi != 0) {
1050                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1051                 readl(pcie_index_hi_offset);
1052         }
1053         writel(reg_data, pcie_data_offset);
1054         readl(pcie_data_offset);
1055
1056         /* clear the high bits */
1057         if (pcie_index_hi != 0) {
1058                 writel(0, pcie_index_hi_offset);
1059                 readl(pcie_index_hi_offset);
1060         }
1061
1062         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1063 }
1064
1065 /**
1066  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1067  *
1068  * @adev: amdgpu_device pointer
1069  * @reg_addr: indirect register offset
1070  * @reg_data: indirect register data
1071  *
1072  */
1073 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1074                                    u32 reg_addr, u64 reg_data)
1075 {
1076         unsigned long flags, pcie_index, pcie_data;
1077         void __iomem *pcie_index_offset;
1078         void __iomem *pcie_data_offset;
1079
1080         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1081         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1082
1083         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1084         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1085         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1086
1087         /* write low 32 bits */
1088         writel(reg_addr, pcie_index_offset);
1089         readl(pcie_index_offset);
1090         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1091         readl(pcie_data_offset);
1092         /* write high 32 bits */
1093         writel(reg_addr + 4, pcie_index_offset);
1094         readl(pcie_index_offset);
1095         writel((u32)(reg_data >> 32), pcie_data_offset);
1096         readl(pcie_data_offset);
1097         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1098 }
1099
1100 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1101                                    u64 reg_addr, u64 reg_data)
1102 {
1103         unsigned long flags, pcie_index, pcie_data;
1104         unsigned long pcie_index_hi = 0;
1105         void __iomem *pcie_index_offset;
1106         void __iomem *pcie_index_hi_offset;
1107         void __iomem *pcie_data_offset;
1108
1109         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1110         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1111         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1112                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1113
1114         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1115         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1116         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1117         if (pcie_index_hi != 0)
1118                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1119                                 pcie_index_hi * 4;
1120
1121         /* write low 32 bits */
1122         writel(reg_addr, pcie_index_offset);
1123         readl(pcie_index_offset);
1124         if (pcie_index_hi != 0) {
1125                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1126                 readl(pcie_index_hi_offset);
1127         }
1128         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1129         readl(pcie_data_offset);
1130         /* write high 32 bits */
1131         writel(reg_addr + 4, pcie_index_offset);
1132         readl(pcie_index_offset);
1133         if (pcie_index_hi != 0) {
1134                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1135                 readl(pcie_index_hi_offset);
1136         }
1137         writel((u32)(reg_data >> 32), pcie_data_offset);
1138         readl(pcie_data_offset);
1139
1140         /* clear the high bits */
1141         if (pcie_index_hi != 0) {
1142                 writel(0, pcie_index_hi_offset);
1143                 readl(pcie_index_hi_offset);
1144         }
1145
1146         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1147 }
1148
1149 /**
1150  * amdgpu_device_get_rev_id - query device rev_id
1151  *
1152  * @adev: amdgpu_device pointer
1153  *
1154  * Return device rev_id
1155  */
1156 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1157 {
1158         return adev->nbio.funcs->get_rev_id(adev);
1159 }
1160
1161 /**
1162  * amdgpu_invalid_rreg - dummy reg read function
1163  *
1164  * @adev: amdgpu_device pointer
1165  * @reg: offset of register
1166  *
1167  * Dummy register read function.  Used for register blocks
1168  * that certain asics don't have (all asics).
1169  * Returns the value in the register.
1170  */
1171 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1172 {
1173         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1174         BUG();
1175         return 0;
1176 }
1177
1178 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1179 {
1180         DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1181         BUG();
1182         return 0;
1183 }
1184
1185 /**
1186  * amdgpu_invalid_wreg - dummy reg write function
1187  *
1188  * @adev: amdgpu_device pointer
1189  * @reg: offset of register
1190  * @v: value to write to the register
1191  *
1192  * Dummy register read function.  Used for register blocks
1193  * that certain asics don't have (all asics).
1194  */
1195 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1196 {
1197         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1198                   reg, v);
1199         BUG();
1200 }
1201
1202 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1203 {
1204         DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1205                   reg, v);
1206         BUG();
1207 }
1208
1209 /**
1210  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1211  *
1212  * @adev: amdgpu_device pointer
1213  * @reg: offset of register
1214  *
1215  * Dummy register read function.  Used for register blocks
1216  * that certain asics don't have (all asics).
1217  * Returns the value in the register.
1218  */
1219 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1220 {
1221         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1222         BUG();
1223         return 0;
1224 }
1225
1226 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1227 {
1228         DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1229         BUG();
1230         return 0;
1231 }
1232
1233 /**
1234  * amdgpu_invalid_wreg64 - dummy reg write function
1235  *
1236  * @adev: amdgpu_device pointer
1237  * @reg: offset of register
1238  * @v: value to write to the register
1239  *
1240  * Dummy register read function.  Used for register blocks
1241  * that certain asics don't have (all asics).
1242  */
1243 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1244 {
1245         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1246                   reg, v);
1247         BUG();
1248 }
1249
1250 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1251 {
1252         DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1253                   reg, v);
1254         BUG();
1255 }
1256
1257 /**
1258  * amdgpu_block_invalid_rreg - dummy reg read function
1259  *
1260  * @adev: amdgpu_device pointer
1261  * @block: offset of instance
1262  * @reg: offset of register
1263  *
1264  * Dummy register read function.  Used for register blocks
1265  * that certain asics don't have (all asics).
1266  * Returns the value in the register.
1267  */
1268 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1269                                           uint32_t block, uint32_t reg)
1270 {
1271         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1272                   reg, block);
1273         BUG();
1274         return 0;
1275 }
1276
1277 /**
1278  * amdgpu_block_invalid_wreg - dummy reg write function
1279  *
1280  * @adev: amdgpu_device pointer
1281  * @block: offset of instance
1282  * @reg: offset of register
1283  * @v: value to write to the register
1284  *
1285  * Dummy register read function.  Used for register blocks
1286  * that certain asics don't have (all asics).
1287  */
1288 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1289                                       uint32_t block,
1290                                       uint32_t reg, uint32_t v)
1291 {
1292         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1293                   reg, block, v);
1294         BUG();
1295 }
1296
1297 /**
1298  * amdgpu_device_asic_init - Wrapper for atom asic_init
1299  *
1300  * @adev: amdgpu_device pointer
1301  *
1302  * Does any asic specific work and then calls atom asic init.
1303  */
1304 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1305 {
1306         int ret;
1307
1308         amdgpu_asic_pre_asic_init(adev);
1309
1310         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1311             amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1312                 amdgpu_psp_wait_for_bootloader(adev);
1313                 ret = amdgpu_atomfirmware_asic_init(adev, true);
1314                 return ret;
1315         } else {
1316                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1317         }
1318
1319         return 0;
1320 }
1321
1322 /**
1323  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1324  *
1325  * @adev: amdgpu_device pointer
1326  *
1327  * Allocates a scratch page of VRAM for use by various things in the
1328  * driver.
1329  */
1330 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1331 {
1332         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1333                                        AMDGPU_GEM_DOMAIN_VRAM |
1334                                        AMDGPU_GEM_DOMAIN_GTT,
1335                                        &adev->mem_scratch.robj,
1336                                        &adev->mem_scratch.gpu_addr,
1337                                        (void **)&adev->mem_scratch.ptr);
1338 }
1339
1340 /**
1341  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1342  *
1343  * @adev: amdgpu_device pointer
1344  *
1345  * Frees the VRAM scratch page.
1346  */
1347 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1348 {
1349         amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1350 }
1351
1352 /**
1353  * amdgpu_device_program_register_sequence - program an array of registers.
1354  *
1355  * @adev: amdgpu_device pointer
1356  * @registers: pointer to the register array
1357  * @array_size: size of the register array
1358  *
1359  * Programs an array or registers with and or masks.
1360  * This is a helper for setting golden registers.
1361  */
1362 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1363                                              const u32 *registers,
1364                                              const u32 array_size)
1365 {
1366         u32 tmp, reg, and_mask, or_mask;
1367         int i;
1368
1369         if (array_size % 3)
1370                 return;
1371
1372         for (i = 0; i < array_size; i += 3) {
1373                 reg = registers[i + 0];
1374                 and_mask = registers[i + 1];
1375                 or_mask = registers[i + 2];
1376
1377                 if (and_mask == 0xffffffff) {
1378                         tmp = or_mask;
1379                 } else {
1380                         tmp = RREG32(reg);
1381                         tmp &= ~and_mask;
1382                         if (adev->family >= AMDGPU_FAMILY_AI)
1383                                 tmp |= (or_mask & and_mask);
1384                         else
1385                                 tmp |= or_mask;
1386                 }
1387                 WREG32(reg, tmp);
1388         }
1389 }
1390
1391 /**
1392  * amdgpu_device_pci_config_reset - reset the GPU
1393  *
1394  * @adev: amdgpu_device pointer
1395  *
1396  * Resets the GPU using the pci config reset sequence.
1397  * Only applicable to asics prior to vega10.
1398  */
1399 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1400 {
1401         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1402 }
1403
1404 /**
1405  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1406  *
1407  * @adev: amdgpu_device pointer
1408  *
1409  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1410  */
1411 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1412 {
1413         return pci_reset_function(adev->pdev);
1414 }
1415
1416 /*
1417  * amdgpu_device_wb_*()
1418  * Writeback is the method by which the GPU updates special pages in memory
1419  * with the status of certain GPU events (fences, ring pointers,etc.).
1420  */
1421
1422 /**
1423  * amdgpu_device_wb_fini - Disable Writeback and free memory
1424  *
1425  * @adev: amdgpu_device pointer
1426  *
1427  * Disables Writeback and frees the Writeback memory (all asics).
1428  * Used at driver shutdown.
1429  */
1430 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1431 {
1432         if (adev->wb.wb_obj) {
1433                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1434                                       &adev->wb.gpu_addr,
1435                                       (void **)&adev->wb.wb);
1436                 adev->wb.wb_obj = NULL;
1437         }
1438 }
1439
1440 /**
1441  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1442  *
1443  * @adev: amdgpu_device pointer
1444  *
1445  * Initializes writeback and allocates writeback memory (all asics).
1446  * Used at driver startup.
1447  * Returns 0 on success or an -error on failure.
1448  */
1449 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1450 {
1451         int r;
1452
1453         if (adev->wb.wb_obj == NULL) {
1454                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1455                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1456                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1457                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
1458                                             (void **)&adev->wb.wb);
1459                 if (r) {
1460                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1461                         return r;
1462                 }
1463
1464                 adev->wb.num_wb = AMDGPU_MAX_WB;
1465                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1466
1467                 /* clear wb memory */
1468                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1469         }
1470
1471         return 0;
1472 }
1473
1474 /**
1475  * amdgpu_device_wb_get - Allocate a wb entry
1476  *
1477  * @adev: amdgpu_device pointer
1478  * @wb: wb index
1479  *
1480  * Allocate a wb slot for use by the driver (all asics).
1481  * Returns 0 on success or -EINVAL on failure.
1482  */
1483 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1484 {
1485         unsigned long flags, offset;
1486
1487         spin_lock_irqsave(&adev->wb.lock, flags);
1488         offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1489         if (offset < adev->wb.num_wb) {
1490                 __set_bit(offset, adev->wb.used);
1491                 spin_unlock_irqrestore(&adev->wb.lock, flags);
1492                 *wb = offset << 3; /* convert to dw offset */
1493                 return 0;
1494         } else {
1495                 spin_unlock_irqrestore(&adev->wb.lock, flags);
1496                 return -EINVAL;
1497         }
1498 }
1499
1500 /**
1501  * amdgpu_device_wb_free - Free a wb entry
1502  *
1503  * @adev: amdgpu_device pointer
1504  * @wb: wb index
1505  *
1506  * Free a wb slot allocated for use by the driver (all asics)
1507  */
1508 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1509 {
1510         unsigned long flags;
1511
1512         wb >>= 3;
1513         spin_lock_irqsave(&adev->wb.lock, flags);
1514         if (wb < adev->wb.num_wb)
1515                 __clear_bit(wb, adev->wb.used);
1516         spin_unlock_irqrestore(&adev->wb.lock, flags);
1517 }
1518
1519 /**
1520  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1521  *
1522  * @adev: amdgpu_device pointer
1523  *
1524  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1525  * to fail, but if any of the BARs is not accessible after the size we abort
1526  * driver loading by returning -ENODEV.
1527  */
1528 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1529 {
1530         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1531         struct pci_bus *root;
1532         struct resource *res;
1533         unsigned int i;
1534         u16 cmd;
1535         int r;
1536
1537         if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1538                 return 0;
1539
1540         /* Bypass for VF */
1541         if (amdgpu_sriov_vf(adev))
1542                 return 0;
1543
1544         /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1545         if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1546                 DRM_WARN("System can't access extended configuration space, please check!!\n");
1547
1548         /* skip if the bios has already enabled large BAR */
1549         if (adev->gmc.real_vram_size &&
1550             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1551                 return 0;
1552
1553         /* Check if the root BUS has 64bit memory resources */
1554         root = adev->pdev->bus;
1555         while (root->parent)
1556                 root = root->parent;
1557
1558         pci_bus_for_each_resource(root, res, i) {
1559                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1560                     res->start > 0x100000000ull)
1561                         break;
1562         }
1563
1564         /* Trying to resize is pointless without a root hub window above 4GB */
1565         if (!res)
1566                 return 0;
1567
1568         /* Limit the BAR size to what is available */
1569         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1570                         rbar_size);
1571
1572         /* Disable memory decoding while we change the BAR addresses and size */
1573         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1574         pci_write_config_word(adev->pdev, PCI_COMMAND,
1575                               cmd & ~PCI_COMMAND_MEMORY);
1576
1577         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1578         amdgpu_doorbell_fini(adev);
1579         if (adev->asic_type >= CHIP_BONAIRE)
1580                 pci_release_resource(adev->pdev, 2);
1581
1582         pci_release_resource(adev->pdev, 0);
1583
1584         r = pci_resize_resource(adev->pdev, 0, rbar_size);
1585         if (r == -ENOSPC)
1586                 DRM_INFO("Not enough PCI address space for a large BAR.");
1587         else if (r && r != -ENOTSUPP)
1588                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1589
1590         pci_assign_unassigned_bus_resources(adev->pdev->bus);
1591
1592         /* When the doorbell or fb BAR isn't available we have no chance of
1593          * using the device.
1594          */
1595         r = amdgpu_doorbell_init(adev);
1596         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1597                 return -ENODEV;
1598
1599         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1600
1601         return 0;
1602 }
1603
1604 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1605 {
1606         if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1607                 return false;
1608
1609         return true;
1610 }
1611
1612 /*
1613  * GPU helpers function.
1614  */
1615 /**
1616  * amdgpu_device_need_post - check if the hw need post or not
1617  *
1618  * @adev: amdgpu_device pointer
1619  *
1620  * Check if the asic has been initialized (all asics) at driver startup
1621  * or post is needed if  hw reset is performed.
1622  * Returns true if need or false if not.
1623  */
1624 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1625 {
1626         uint32_t reg;
1627
1628         if (amdgpu_sriov_vf(adev))
1629                 return false;
1630
1631         if (!amdgpu_device_read_bios(adev))
1632                 return false;
1633
1634         if (amdgpu_passthrough(adev)) {
1635                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1636                  * some old smc fw still need driver do vPost otherwise gpu hang, while
1637                  * those smc fw version above 22.15 doesn't have this flaw, so we force
1638                  * vpost executed for smc version below 22.15
1639                  */
1640                 if (adev->asic_type == CHIP_FIJI) {
1641                         int err;
1642                         uint32_t fw_ver;
1643
1644                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1645                         /* force vPost if error occured */
1646                         if (err)
1647                                 return true;
1648
1649                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1650                         release_firmware(adev->pm.fw);
1651                         if (fw_ver < 0x00160e00)
1652                                 return true;
1653                 }
1654         }
1655
1656         /* Don't post if we need to reset whole hive on init */
1657         if (adev->gmc.xgmi.pending_reset)
1658                 return false;
1659
1660         if (adev->has_hw_reset) {
1661                 adev->has_hw_reset = false;
1662                 return true;
1663         }
1664
1665         /* bios scratch used on CIK+ */
1666         if (adev->asic_type >= CHIP_BONAIRE)
1667                 return amdgpu_atombios_scratch_need_asic_init(adev);
1668
1669         /* check MEM_SIZE for older asics */
1670         reg = amdgpu_asic_get_config_memsize(adev);
1671
1672         if ((reg != 0) && (reg != 0xffffffff))
1673                 return false;
1674
1675         return true;
1676 }
1677
1678 /*
1679  * Check whether seamless boot is supported.
1680  *
1681  * So far we only support seamless boot on DCE 3.0 or later.
1682  * If users report that it works on older ASICS as well, we may
1683  * loosen this.
1684  */
1685 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1686 {
1687         switch (amdgpu_seamless) {
1688         case -1:
1689                 break;
1690         case 1:
1691                 return true;
1692         case 0:
1693                 return false;
1694         default:
1695                 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1696                           amdgpu_seamless);
1697                 return false;
1698         }
1699
1700         if (!(adev->flags & AMD_IS_APU))
1701                 return false;
1702
1703         if (adev->mman.keep_stolen_vga_memory)
1704                 return false;
1705
1706         return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1707 }
1708
1709 /*
1710  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1711  * don't support dynamic speed switching. Until we have confirmation from Intel
1712  * that a specific host supports it, it's safer that we keep it disabled for all.
1713  *
1714  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1715  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1716  */
1717 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1718 {
1719 #if IS_ENABLED(CONFIG_X86)
1720         struct cpuinfo_x86 *c = &cpu_data(0);
1721
1722         /* eGPU change speeds based on USB4 fabric conditions */
1723         if (dev_is_removable(adev->dev))
1724                 return true;
1725
1726         if (c->x86_vendor == X86_VENDOR_INTEL)
1727                 return false;
1728 #endif
1729         return true;
1730 }
1731
1732 /**
1733  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1734  *
1735  * @adev: amdgpu_device pointer
1736  *
1737  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1738  * be set for this device.
1739  *
1740  * Returns true if it should be used or false if not.
1741  */
1742 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1743 {
1744         switch (amdgpu_aspm) {
1745         case -1:
1746                 break;
1747         case 0:
1748                 return false;
1749         case 1:
1750                 return true;
1751         default:
1752                 return false;
1753         }
1754         if (adev->flags & AMD_IS_APU)
1755                 return false;
1756         if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1757                 return false;
1758         return pcie_aspm_enabled(adev->pdev);
1759 }
1760
1761 /* if we get transitioned to only one device, take VGA back */
1762 /**
1763  * amdgpu_device_vga_set_decode - enable/disable vga decode
1764  *
1765  * @pdev: PCI device pointer
1766  * @state: enable/disable vga decode
1767  *
1768  * Enable/disable vga decode (all asics).
1769  * Returns VGA resource flags.
1770  */
1771 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1772                 bool state)
1773 {
1774         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1775
1776         amdgpu_asic_set_vga_state(adev, state);
1777         if (state)
1778                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1779                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1780         else
1781                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1782 }
1783
1784 /**
1785  * amdgpu_device_check_block_size - validate the vm block size
1786  *
1787  * @adev: amdgpu_device pointer
1788  *
1789  * Validates the vm block size specified via module parameter.
1790  * The vm block size defines number of bits in page table versus page directory,
1791  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1792  * page table and the remaining bits are in the page directory.
1793  */
1794 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1795 {
1796         /* defines number of bits in page table versus page directory,
1797          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1798          * page table and the remaining bits are in the page directory
1799          */
1800         if (amdgpu_vm_block_size == -1)
1801                 return;
1802
1803         if (amdgpu_vm_block_size < 9) {
1804                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1805                          amdgpu_vm_block_size);
1806                 amdgpu_vm_block_size = -1;
1807         }
1808 }
1809
1810 /**
1811  * amdgpu_device_check_vm_size - validate the vm size
1812  *
1813  * @adev: amdgpu_device pointer
1814  *
1815  * Validates the vm size in GB specified via module parameter.
1816  * The VM size is the size of the GPU virtual memory space in GB.
1817  */
1818 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1819 {
1820         /* no need to check the default value */
1821         if (amdgpu_vm_size == -1)
1822                 return;
1823
1824         if (amdgpu_vm_size < 1) {
1825                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1826                          amdgpu_vm_size);
1827                 amdgpu_vm_size = -1;
1828         }
1829 }
1830
1831 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1832 {
1833         struct sysinfo si;
1834         bool is_os_64 = (sizeof(void *) == 8);
1835         uint64_t total_memory;
1836         uint64_t dram_size_seven_GB = 0x1B8000000;
1837         uint64_t dram_size_three_GB = 0xB8000000;
1838
1839         if (amdgpu_smu_memory_pool_size == 0)
1840                 return;
1841
1842         if (!is_os_64) {
1843                 DRM_WARN("Not 64-bit OS, feature not supported\n");
1844                 goto def_value;
1845         }
1846         si_meminfo(&si);
1847         total_memory = (uint64_t)si.totalram * si.mem_unit;
1848
1849         if ((amdgpu_smu_memory_pool_size == 1) ||
1850                 (amdgpu_smu_memory_pool_size == 2)) {
1851                 if (total_memory < dram_size_three_GB)
1852                         goto def_value1;
1853         } else if ((amdgpu_smu_memory_pool_size == 4) ||
1854                 (amdgpu_smu_memory_pool_size == 8)) {
1855                 if (total_memory < dram_size_seven_GB)
1856                         goto def_value1;
1857         } else {
1858                 DRM_WARN("Smu memory pool size not supported\n");
1859                 goto def_value;
1860         }
1861         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1862
1863         return;
1864
1865 def_value1:
1866         DRM_WARN("No enough system memory\n");
1867 def_value:
1868         adev->pm.smu_prv_buffer_size = 0;
1869 }
1870
1871 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1872 {
1873         if (!(adev->flags & AMD_IS_APU) ||
1874             adev->asic_type < CHIP_RAVEN)
1875                 return 0;
1876
1877         switch (adev->asic_type) {
1878         case CHIP_RAVEN:
1879                 if (adev->pdev->device == 0x15dd)
1880                         adev->apu_flags |= AMD_APU_IS_RAVEN;
1881                 if (adev->pdev->device == 0x15d8)
1882                         adev->apu_flags |= AMD_APU_IS_PICASSO;
1883                 break;
1884         case CHIP_RENOIR:
1885                 if ((adev->pdev->device == 0x1636) ||
1886                     (adev->pdev->device == 0x164c))
1887                         adev->apu_flags |= AMD_APU_IS_RENOIR;
1888                 else
1889                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1890                 break;
1891         case CHIP_VANGOGH:
1892                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1893                 break;
1894         case CHIP_YELLOW_CARP:
1895                 break;
1896         case CHIP_CYAN_SKILLFISH:
1897                 if ((adev->pdev->device == 0x13FE) ||
1898                     (adev->pdev->device == 0x143F))
1899                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1900                 break;
1901         default:
1902                 break;
1903         }
1904
1905         return 0;
1906 }
1907
1908 /**
1909  * amdgpu_device_check_arguments - validate module params
1910  *
1911  * @adev: amdgpu_device pointer
1912  *
1913  * Validates certain module parameters and updates
1914  * the associated values used by the driver (all asics).
1915  */
1916 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1917 {
1918         if (amdgpu_sched_jobs < 4) {
1919                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1920                          amdgpu_sched_jobs);
1921                 amdgpu_sched_jobs = 4;
1922         } else if (!is_power_of_2(amdgpu_sched_jobs)) {
1923                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1924                          amdgpu_sched_jobs);
1925                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1926         }
1927
1928         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1929                 /* gart size must be greater or equal to 32M */
1930                 dev_warn(adev->dev, "gart size (%d) too small\n",
1931                          amdgpu_gart_size);
1932                 amdgpu_gart_size = -1;
1933         }
1934
1935         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1936                 /* gtt size must be greater or equal to 32M */
1937                 dev_warn(adev->dev, "gtt size (%d) too small\n",
1938                                  amdgpu_gtt_size);
1939                 amdgpu_gtt_size = -1;
1940         }
1941
1942         /* valid range is between 4 and 9 inclusive */
1943         if (amdgpu_vm_fragment_size != -1 &&
1944             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1945                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1946                 amdgpu_vm_fragment_size = -1;
1947         }
1948
1949         if (amdgpu_sched_hw_submission < 2) {
1950                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1951                          amdgpu_sched_hw_submission);
1952                 amdgpu_sched_hw_submission = 2;
1953         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1954                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1955                          amdgpu_sched_hw_submission);
1956                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1957         }
1958
1959         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1960                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1961                 amdgpu_reset_method = -1;
1962         }
1963
1964         amdgpu_device_check_smu_prv_buffer_size(adev);
1965
1966         amdgpu_device_check_vm_size(adev);
1967
1968         amdgpu_device_check_block_size(adev);
1969
1970         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1971
1972         return 0;
1973 }
1974
1975 /**
1976  * amdgpu_switcheroo_set_state - set switcheroo state
1977  *
1978  * @pdev: pci dev pointer
1979  * @state: vga_switcheroo state
1980  *
1981  * Callback for the switcheroo driver.  Suspends or resumes
1982  * the asics before or after it is powered up using ACPI methods.
1983  */
1984 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1985                                         enum vga_switcheroo_state state)
1986 {
1987         struct drm_device *dev = pci_get_drvdata(pdev);
1988         int r;
1989
1990         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1991                 return;
1992
1993         if (state == VGA_SWITCHEROO_ON) {
1994                 pr_info("switched on\n");
1995                 /* don't suspend or resume card normally */
1996                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1997
1998                 pci_set_power_state(pdev, PCI_D0);
1999                 amdgpu_device_load_pci_state(pdev);
2000                 r = pci_enable_device(pdev);
2001                 if (r)
2002                         DRM_WARN("pci_enable_device failed (%d)\n", r);
2003                 amdgpu_device_resume(dev, true);
2004
2005                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
2006         } else {
2007                 pr_info("switched off\n");
2008                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2009                 amdgpu_device_prepare(dev);
2010                 amdgpu_device_suspend(dev, true);
2011                 amdgpu_device_cache_pci_state(pdev);
2012                 /* Shut down the device */
2013                 pci_disable_device(pdev);
2014                 pci_set_power_state(pdev, PCI_D3cold);
2015                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2016         }
2017 }
2018
2019 /**
2020  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2021  *
2022  * @pdev: pci dev pointer
2023  *
2024  * Callback for the switcheroo driver.  Check of the switcheroo
2025  * state can be changed.
2026  * Returns true if the state can be changed, false if not.
2027  */
2028 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2029 {
2030         struct drm_device *dev = pci_get_drvdata(pdev);
2031
2032        /*
2033         * FIXME: open_count is protected by drm_global_mutex but that would lead to
2034         * locking inversion with the driver load path. And the access here is
2035         * completely racy anyway. So don't bother with locking for now.
2036         */
2037         return atomic_read(&dev->open_count) == 0;
2038 }
2039
2040 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2041         .set_gpu_state = amdgpu_switcheroo_set_state,
2042         .reprobe = NULL,
2043         .can_switch = amdgpu_switcheroo_can_switch,
2044 };
2045
2046 /**
2047  * amdgpu_device_ip_set_clockgating_state - set the CG state
2048  *
2049  * @dev: amdgpu_device pointer
2050  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2051  * @state: clockgating state (gate or ungate)
2052  *
2053  * Sets the requested clockgating state for all instances of
2054  * the hardware IP specified.
2055  * Returns the error code from the last instance.
2056  */
2057 int amdgpu_device_ip_set_clockgating_state(void *dev,
2058                                            enum amd_ip_block_type block_type,
2059                                            enum amd_clockgating_state state)
2060 {
2061         struct amdgpu_device *adev = dev;
2062         int i, r = 0;
2063
2064         for (i = 0; i < adev->num_ip_blocks; i++) {
2065                 if (!adev->ip_blocks[i].status.valid)
2066                         continue;
2067                 if (adev->ip_blocks[i].version->type != block_type)
2068                         continue;
2069                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2070                         continue;
2071                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2072                         (void *)adev, state);
2073                 if (r)
2074                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2075                                   adev->ip_blocks[i].version->funcs->name, r);
2076         }
2077         return r;
2078 }
2079
2080 /**
2081  * amdgpu_device_ip_set_powergating_state - set the PG state
2082  *
2083  * @dev: amdgpu_device pointer
2084  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2085  * @state: powergating state (gate or ungate)
2086  *
2087  * Sets the requested powergating state for all instances of
2088  * the hardware IP specified.
2089  * Returns the error code from the last instance.
2090  */
2091 int amdgpu_device_ip_set_powergating_state(void *dev,
2092                                            enum amd_ip_block_type block_type,
2093                                            enum amd_powergating_state state)
2094 {
2095         struct amdgpu_device *adev = dev;
2096         int i, r = 0;
2097
2098         for (i = 0; i < adev->num_ip_blocks; i++) {
2099                 if (!adev->ip_blocks[i].status.valid)
2100                         continue;
2101                 if (adev->ip_blocks[i].version->type != block_type)
2102                         continue;
2103                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2104                         continue;
2105                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2106                         (void *)adev, state);
2107                 if (r)
2108                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2109                                   adev->ip_blocks[i].version->funcs->name, r);
2110         }
2111         return r;
2112 }
2113
2114 /**
2115  * amdgpu_device_ip_get_clockgating_state - get the CG state
2116  *
2117  * @adev: amdgpu_device pointer
2118  * @flags: clockgating feature flags
2119  *
2120  * Walks the list of IPs on the device and updates the clockgating
2121  * flags for each IP.
2122  * Updates @flags with the feature flags for each hardware IP where
2123  * clockgating is enabled.
2124  */
2125 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2126                                             u64 *flags)
2127 {
2128         int i;
2129
2130         for (i = 0; i < adev->num_ip_blocks; i++) {
2131                 if (!adev->ip_blocks[i].status.valid)
2132                         continue;
2133                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2134                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2135         }
2136 }
2137
2138 /**
2139  * amdgpu_device_ip_wait_for_idle - wait for idle
2140  *
2141  * @adev: amdgpu_device pointer
2142  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2143  *
2144  * Waits for the request hardware IP to be idle.
2145  * Returns 0 for success or a negative error code on failure.
2146  */
2147 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2148                                    enum amd_ip_block_type block_type)
2149 {
2150         int i, r;
2151
2152         for (i = 0; i < adev->num_ip_blocks; i++) {
2153                 if (!adev->ip_blocks[i].status.valid)
2154                         continue;
2155                 if (adev->ip_blocks[i].version->type == block_type) {
2156                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2157                         if (r)
2158                                 return r;
2159                         break;
2160                 }
2161         }
2162         return 0;
2163
2164 }
2165
2166 /**
2167  * amdgpu_device_ip_is_idle - is the hardware IP idle
2168  *
2169  * @adev: amdgpu_device pointer
2170  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2171  *
2172  * Check if the hardware IP is idle or not.
2173  * Returns true if it the IP is idle, false if not.
2174  */
2175 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2176                               enum amd_ip_block_type block_type)
2177 {
2178         int i;
2179
2180         for (i = 0; i < adev->num_ip_blocks; i++) {
2181                 if (!adev->ip_blocks[i].status.valid)
2182                         continue;
2183                 if (adev->ip_blocks[i].version->type == block_type)
2184                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2185         }
2186         return true;
2187
2188 }
2189
2190 /**
2191  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2192  *
2193  * @adev: amdgpu_device pointer
2194  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2195  *
2196  * Returns a pointer to the hardware IP block structure
2197  * if it exists for the asic, otherwise NULL.
2198  */
2199 struct amdgpu_ip_block *
2200 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2201                               enum amd_ip_block_type type)
2202 {
2203         int i;
2204
2205         for (i = 0; i < adev->num_ip_blocks; i++)
2206                 if (adev->ip_blocks[i].version->type == type)
2207                         return &adev->ip_blocks[i];
2208
2209         return NULL;
2210 }
2211
2212 /**
2213  * amdgpu_device_ip_block_version_cmp
2214  *
2215  * @adev: amdgpu_device pointer
2216  * @type: enum amd_ip_block_type
2217  * @major: major version
2218  * @minor: minor version
2219  *
2220  * return 0 if equal or greater
2221  * return 1 if smaller or the ip_block doesn't exist
2222  */
2223 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2224                                        enum amd_ip_block_type type,
2225                                        u32 major, u32 minor)
2226 {
2227         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2228
2229         if (ip_block && ((ip_block->version->major > major) ||
2230                         ((ip_block->version->major == major) &&
2231                         (ip_block->version->minor >= minor))))
2232                 return 0;
2233
2234         return 1;
2235 }
2236
2237 /**
2238  * amdgpu_device_ip_block_add
2239  *
2240  * @adev: amdgpu_device pointer
2241  * @ip_block_version: pointer to the IP to add
2242  *
2243  * Adds the IP block driver information to the collection of IPs
2244  * on the asic.
2245  */
2246 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2247                                const struct amdgpu_ip_block_version *ip_block_version)
2248 {
2249         if (!ip_block_version)
2250                 return -EINVAL;
2251
2252         switch (ip_block_version->type) {
2253         case AMD_IP_BLOCK_TYPE_VCN:
2254                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2255                         return 0;
2256                 break;
2257         case AMD_IP_BLOCK_TYPE_JPEG:
2258                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2259                         return 0;
2260                 break;
2261         default:
2262                 break;
2263         }
2264
2265         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2266                   ip_block_version->funcs->name);
2267
2268         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2269
2270         return 0;
2271 }
2272
2273 /**
2274  * amdgpu_device_enable_virtual_display - enable virtual display feature
2275  *
2276  * @adev: amdgpu_device pointer
2277  *
2278  * Enabled the virtual display feature if the user has enabled it via
2279  * the module parameter virtual_display.  This feature provides a virtual
2280  * display hardware on headless boards or in virtualized environments.
2281  * This function parses and validates the configuration string specified by
2282  * the user and configues the virtual display configuration (number of
2283  * virtual connectors, crtcs, etc.) specified.
2284  */
2285 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2286 {
2287         adev->enable_virtual_display = false;
2288
2289         if (amdgpu_virtual_display) {
2290                 const char *pci_address_name = pci_name(adev->pdev);
2291                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2292
2293                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2294                 pciaddstr_tmp = pciaddstr;
2295                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2296                         pciaddname = strsep(&pciaddname_tmp, ",");
2297                         if (!strcmp("all", pciaddname)
2298                             || !strcmp(pci_address_name, pciaddname)) {
2299                                 long num_crtc;
2300                                 int res = -1;
2301
2302                                 adev->enable_virtual_display = true;
2303
2304                                 if (pciaddname_tmp)
2305                                         res = kstrtol(pciaddname_tmp, 10,
2306                                                       &num_crtc);
2307
2308                                 if (!res) {
2309                                         if (num_crtc < 1)
2310                                                 num_crtc = 1;
2311                                         if (num_crtc > 6)
2312                                                 num_crtc = 6;
2313                                         adev->mode_info.num_crtc = num_crtc;
2314                                 } else {
2315                                         adev->mode_info.num_crtc = 1;
2316                                 }
2317                                 break;
2318                         }
2319                 }
2320
2321                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2322                          amdgpu_virtual_display, pci_address_name,
2323                          adev->enable_virtual_display, adev->mode_info.num_crtc);
2324
2325                 kfree(pciaddstr);
2326         }
2327 }
2328
2329 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2330 {
2331         if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2332                 adev->mode_info.num_crtc = 1;
2333                 adev->enable_virtual_display = true;
2334                 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2335                          adev->enable_virtual_display, adev->mode_info.num_crtc);
2336         }
2337 }
2338
2339 /**
2340  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2341  *
2342  * @adev: amdgpu_device pointer
2343  *
2344  * Parses the asic configuration parameters specified in the gpu info
2345  * firmware and makes them availale to the driver for use in configuring
2346  * the asic.
2347  * Returns 0 on success, -EINVAL on failure.
2348  */
2349 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2350 {
2351         const char *chip_name;
2352         char fw_name[40];
2353         int err;
2354         const struct gpu_info_firmware_header_v1_0 *hdr;
2355
2356         adev->firmware.gpu_info_fw = NULL;
2357
2358         if (adev->mman.discovery_bin)
2359                 return 0;
2360
2361         switch (adev->asic_type) {
2362         default:
2363                 return 0;
2364         case CHIP_VEGA10:
2365                 chip_name = "vega10";
2366                 break;
2367         case CHIP_VEGA12:
2368                 chip_name = "vega12";
2369                 break;
2370         case CHIP_RAVEN:
2371                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2372                         chip_name = "raven2";
2373                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2374                         chip_name = "picasso";
2375                 else
2376                         chip_name = "raven";
2377                 break;
2378         case CHIP_ARCTURUS:
2379                 chip_name = "arcturus";
2380                 break;
2381         case CHIP_NAVI12:
2382                 chip_name = "navi12";
2383                 break;
2384         }
2385
2386         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2387         err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2388         if (err) {
2389                 dev_err(adev->dev,
2390                         "Failed to get gpu_info firmware \"%s\"\n",
2391                         fw_name);
2392                 goto out;
2393         }
2394
2395         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2396         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2397
2398         switch (hdr->version_major) {
2399         case 1:
2400         {
2401                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2402                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2403                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2404
2405                 /*
2406                  * Should be droped when DAL no longer needs it.
2407                  */
2408                 if (adev->asic_type == CHIP_NAVI12)
2409                         goto parse_soc_bounding_box;
2410
2411                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2412                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2413                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2414                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2415                 adev->gfx.config.max_texture_channel_caches =
2416                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
2417                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2418                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2419                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2420                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2421                 adev->gfx.config.double_offchip_lds_buf =
2422                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2423                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2424                 adev->gfx.cu_info.max_waves_per_simd =
2425                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2426                 adev->gfx.cu_info.max_scratch_slots_per_cu =
2427                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2428                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2429                 if (hdr->version_minor >= 1) {
2430                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2431                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2432                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2433                         adev->gfx.config.num_sc_per_sh =
2434                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2435                         adev->gfx.config.num_packer_per_sc =
2436                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2437                 }
2438
2439 parse_soc_bounding_box:
2440                 /*
2441                  * soc bounding box info is not integrated in disocovery table,
2442                  * we always need to parse it from gpu info firmware if needed.
2443                  */
2444                 if (hdr->version_minor == 2) {
2445                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2446                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2447                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2448                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2449                 }
2450                 break;
2451         }
2452         default:
2453                 dev_err(adev->dev,
2454                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2455                 err = -EINVAL;
2456                 goto out;
2457         }
2458 out:
2459         return err;
2460 }
2461
2462 /**
2463  * amdgpu_device_ip_early_init - run early init for hardware IPs
2464  *
2465  * @adev: amdgpu_device pointer
2466  *
2467  * Early initialization pass for hardware IPs.  The hardware IPs that make
2468  * up each asic are discovered each IP's early_init callback is run.  This
2469  * is the first stage in initializing the asic.
2470  * Returns 0 on success, negative error code on failure.
2471  */
2472 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2473 {
2474         struct pci_dev *parent;
2475         int i, r;
2476         bool total;
2477
2478         amdgpu_device_enable_virtual_display(adev);
2479
2480         if (amdgpu_sriov_vf(adev)) {
2481                 r = amdgpu_virt_request_full_gpu(adev, true);
2482                 if (r)
2483                         return r;
2484         }
2485
2486         switch (adev->asic_type) {
2487 #ifdef CONFIG_DRM_AMDGPU_SI
2488         case CHIP_VERDE:
2489         case CHIP_TAHITI:
2490         case CHIP_PITCAIRN:
2491         case CHIP_OLAND:
2492         case CHIP_HAINAN:
2493                 adev->family = AMDGPU_FAMILY_SI;
2494                 r = si_set_ip_blocks(adev);
2495                 if (r)
2496                         return r;
2497                 break;
2498 #endif
2499 #ifdef CONFIG_DRM_AMDGPU_CIK
2500         case CHIP_BONAIRE:
2501         case CHIP_HAWAII:
2502         case CHIP_KAVERI:
2503         case CHIP_KABINI:
2504         case CHIP_MULLINS:
2505                 if (adev->flags & AMD_IS_APU)
2506                         adev->family = AMDGPU_FAMILY_KV;
2507                 else
2508                         adev->family = AMDGPU_FAMILY_CI;
2509
2510                 r = cik_set_ip_blocks(adev);
2511                 if (r)
2512                         return r;
2513                 break;
2514 #endif
2515         case CHIP_TOPAZ:
2516         case CHIP_TONGA:
2517         case CHIP_FIJI:
2518         case CHIP_POLARIS10:
2519         case CHIP_POLARIS11:
2520         case CHIP_POLARIS12:
2521         case CHIP_VEGAM:
2522         case CHIP_CARRIZO:
2523         case CHIP_STONEY:
2524                 if (adev->flags & AMD_IS_APU)
2525                         adev->family = AMDGPU_FAMILY_CZ;
2526                 else
2527                         adev->family = AMDGPU_FAMILY_VI;
2528
2529                 r = vi_set_ip_blocks(adev);
2530                 if (r)
2531                         return r;
2532                 break;
2533         default:
2534                 r = amdgpu_discovery_set_ip_blocks(adev);
2535                 if (r)
2536                         return r;
2537                 break;
2538         }
2539
2540         if (amdgpu_has_atpx() &&
2541             (amdgpu_is_atpx_hybrid() ||
2542              amdgpu_has_atpx_dgpu_power_cntl()) &&
2543             ((adev->flags & AMD_IS_APU) == 0) &&
2544             !dev_is_removable(&adev->pdev->dev))
2545                 adev->flags |= AMD_IS_PX;
2546
2547         if (!(adev->flags & AMD_IS_APU)) {
2548                 parent = pcie_find_root_port(adev->pdev);
2549                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2550         }
2551
2552
2553         adev->pm.pp_feature = amdgpu_pp_feature_mask;
2554         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2555                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2556         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2557                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2558         if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2559                 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2560
2561         total = true;
2562         for (i = 0; i < adev->num_ip_blocks; i++) {
2563                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2564                         DRM_WARN("disabled ip block: %d <%s>\n",
2565                                   i, adev->ip_blocks[i].version->funcs->name);
2566                         adev->ip_blocks[i].status.valid = false;
2567                 } else {
2568                         if (adev->ip_blocks[i].version->funcs->early_init) {
2569                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2570                                 if (r == -ENOENT) {
2571                                         adev->ip_blocks[i].status.valid = false;
2572                                 } else if (r) {
2573                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
2574                                                   adev->ip_blocks[i].version->funcs->name, r);
2575                                         total = false;
2576                                 } else {
2577                                         adev->ip_blocks[i].status.valid = true;
2578                                 }
2579                         } else {
2580                                 adev->ip_blocks[i].status.valid = true;
2581                         }
2582                 }
2583                 /* get the vbios after the asic_funcs are set up */
2584                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2585                         r = amdgpu_device_parse_gpu_info_fw(adev);
2586                         if (r)
2587                                 return r;
2588
2589                         /* Read BIOS */
2590                         if (amdgpu_device_read_bios(adev)) {
2591                                 if (!amdgpu_get_bios(adev))
2592                                         return -EINVAL;
2593
2594                                 r = amdgpu_atombios_init(adev);
2595                                 if (r) {
2596                                         dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2597                                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2598                                         return r;
2599                                 }
2600                         }
2601
2602                         /*get pf2vf msg info at it's earliest time*/
2603                         if (amdgpu_sriov_vf(adev))
2604                                 amdgpu_virt_init_data_exchange(adev);
2605
2606                 }
2607         }
2608         if (!total)
2609                 return -ENODEV;
2610
2611         amdgpu_amdkfd_device_probe(adev);
2612         adev->cg_flags &= amdgpu_cg_mask;
2613         adev->pg_flags &= amdgpu_pg_mask;
2614
2615         return 0;
2616 }
2617
2618 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2619 {
2620         int i, r;
2621
2622         for (i = 0; i < adev->num_ip_blocks; i++) {
2623                 if (!adev->ip_blocks[i].status.sw)
2624                         continue;
2625                 if (adev->ip_blocks[i].status.hw)
2626                         continue;
2627                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2628                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2629                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2630                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2631                         if (r) {
2632                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2633                                           adev->ip_blocks[i].version->funcs->name, r);
2634                                 return r;
2635                         }
2636                         adev->ip_blocks[i].status.hw = true;
2637                 }
2638         }
2639
2640         return 0;
2641 }
2642
2643 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2644 {
2645         int i, r;
2646
2647         for (i = 0; i < adev->num_ip_blocks; i++) {
2648                 if (!adev->ip_blocks[i].status.sw)
2649                         continue;
2650                 if (adev->ip_blocks[i].status.hw)
2651                         continue;
2652                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2653                 if (r) {
2654                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2655                                   adev->ip_blocks[i].version->funcs->name, r);
2656                         return r;
2657                 }
2658                 adev->ip_blocks[i].status.hw = true;
2659         }
2660
2661         return 0;
2662 }
2663
2664 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2665 {
2666         int r = 0;
2667         int i;
2668         uint32_t smu_version;
2669
2670         if (adev->asic_type >= CHIP_VEGA10) {
2671                 for (i = 0; i < adev->num_ip_blocks; i++) {
2672                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2673                                 continue;
2674
2675                         if (!adev->ip_blocks[i].status.sw)
2676                                 continue;
2677
2678                         /* no need to do the fw loading again if already done*/
2679                         if (adev->ip_blocks[i].status.hw == true)
2680                                 break;
2681
2682                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
2683                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
2684                                 if (r) {
2685                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
2686                                                           adev->ip_blocks[i].version->funcs->name, r);
2687                                         return r;
2688                                 }
2689                         } else {
2690                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2691                                 if (r) {
2692                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2693                                                           adev->ip_blocks[i].version->funcs->name, r);
2694                                         return r;
2695                                 }
2696                         }
2697
2698                         adev->ip_blocks[i].status.hw = true;
2699                         break;
2700                 }
2701         }
2702
2703         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2704                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2705
2706         return r;
2707 }
2708
2709 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2710 {
2711         long timeout;
2712         int r, i;
2713
2714         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2715                 struct amdgpu_ring *ring = adev->rings[i];
2716
2717                 /* No need to setup the GPU scheduler for rings that don't need it */
2718                 if (!ring || ring->no_scheduler)
2719                         continue;
2720
2721                 switch (ring->funcs->type) {
2722                 case AMDGPU_RING_TYPE_GFX:
2723                         timeout = adev->gfx_timeout;
2724                         break;
2725                 case AMDGPU_RING_TYPE_COMPUTE:
2726                         timeout = adev->compute_timeout;
2727                         break;
2728                 case AMDGPU_RING_TYPE_SDMA:
2729                         timeout = adev->sdma_timeout;
2730                         break;
2731                 default:
2732                         timeout = adev->video_timeout;
2733                         break;
2734                 }
2735
2736                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2737                                    DRM_SCHED_PRIORITY_COUNT,
2738                                    ring->num_hw_submission, 0,
2739                                    timeout, adev->reset_domain->wq,
2740                                    ring->sched_score, ring->name,
2741                                    adev->dev);
2742                 if (r) {
2743                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
2744                                   ring->name);
2745                         return r;
2746                 }
2747                 r = amdgpu_uvd_entity_init(adev, ring);
2748                 if (r) {
2749                         DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2750                                   ring->name);
2751                         return r;
2752                 }
2753                 r = amdgpu_vce_entity_init(adev, ring);
2754                 if (r) {
2755                         DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2756                                   ring->name);
2757                         return r;
2758                 }
2759         }
2760
2761         amdgpu_xcp_update_partition_sched_list(adev);
2762
2763         return 0;
2764 }
2765
2766
2767 /**
2768  * amdgpu_device_ip_init - run init for hardware IPs
2769  *
2770  * @adev: amdgpu_device pointer
2771  *
2772  * Main initialization pass for hardware IPs.  The list of all the hardware
2773  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2774  * are run.  sw_init initializes the software state associated with each IP
2775  * and hw_init initializes the hardware associated with each IP.
2776  * Returns 0 on success, negative error code on failure.
2777  */
2778 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2779 {
2780         int i, r;
2781
2782         r = amdgpu_ras_init(adev);
2783         if (r)
2784                 return r;
2785
2786         for (i = 0; i < adev->num_ip_blocks; i++) {
2787                 if (!adev->ip_blocks[i].status.valid)
2788                         continue;
2789                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2790                 if (r) {
2791                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2792                                   adev->ip_blocks[i].version->funcs->name, r);
2793                         goto init_failed;
2794                 }
2795                 adev->ip_blocks[i].status.sw = true;
2796
2797                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2798                         /* need to do common hw init early so everything is set up for gmc */
2799                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2800                         if (r) {
2801                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2802                                 goto init_failed;
2803                         }
2804                         adev->ip_blocks[i].status.hw = true;
2805                 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2806                         /* need to do gmc hw init early so we can allocate gpu mem */
2807                         /* Try to reserve bad pages early */
2808                         if (amdgpu_sriov_vf(adev))
2809                                 amdgpu_virt_exchange_data(adev);
2810
2811                         r = amdgpu_device_mem_scratch_init(adev);
2812                         if (r) {
2813                                 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2814                                 goto init_failed;
2815                         }
2816                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2817                         if (r) {
2818                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2819                                 goto init_failed;
2820                         }
2821                         r = amdgpu_device_wb_init(adev);
2822                         if (r) {
2823                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2824                                 goto init_failed;
2825                         }
2826                         adev->ip_blocks[i].status.hw = true;
2827
2828                         /* right after GMC hw init, we create CSA */
2829                         if (adev->gfx.mcbp) {
2830                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2831                                                                AMDGPU_GEM_DOMAIN_VRAM |
2832                                                                AMDGPU_GEM_DOMAIN_GTT,
2833                                                                AMDGPU_CSA_SIZE);
2834                                 if (r) {
2835                                         DRM_ERROR("allocate CSA failed %d\n", r);
2836                                         goto init_failed;
2837                                 }
2838                         }
2839
2840                         r = amdgpu_seq64_init(adev);
2841                         if (r) {
2842                                 DRM_ERROR("allocate seq64 failed %d\n", r);
2843                                 goto init_failed;
2844                         }
2845                 }
2846         }
2847
2848         if (amdgpu_sriov_vf(adev))
2849                 amdgpu_virt_init_data_exchange(adev);
2850
2851         r = amdgpu_ib_pool_init(adev);
2852         if (r) {
2853                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2854                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2855                 goto init_failed;
2856         }
2857
2858         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2859         if (r)
2860                 goto init_failed;
2861
2862         r = amdgpu_device_ip_hw_init_phase1(adev);
2863         if (r)
2864                 goto init_failed;
2865
2866         r = amdgpu_device_fw_loading(adev);
2867         if (r)
2868                 goto init_failed;
2869
2870         r = amdgpu_device_ip_hw_init_phase2(adev);
2871         if (r)
2872                 goto init_failed;
2873
2874         /*
2875          * retired pages will be loaded from eeprom and reserved here,
2876          * it should be called after amdgpu_device_ip_hw_init_phase2  since
2877          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2878          * for I2C communication which only true at this point.
2879          *
2880          * amdgpu_ras_recovery_init may fail, but the upper only cares the
2881          * failure from bad gpu situation and stop amdgpu init process
2882          * accordingly. For other failed cases, it will still release all
2883          * the resource and print error message, rather than returning one
2884          * negative value to upper level.
2885          *
2886          * Note: theoretically, this should be called before all vram allocations
2887          * to protect retired page from abusing
2888          */
2889         r = amdgpu_ras_recovery_init(adev);
2890         if (r)
2891                 goto init_failed;
2892
2893         /**
2894          * In case of XGMI grab extra reference for reset domain for this device
2895          */
2896         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2897                 if (amdgpu_xgmi_add_device(adev) == 0) {
2898                         if (!amdgpu_sriov_vf(adev)) {
2899                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2900
2901                                 if (WARN_ON(!hive)) {
2902                                         r = -ENOENT;
2903                                         goto init_failed;
2904                                 }
2905
2906                                 if (!hive->reset_domain ||
2907                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2908                                         r = -ENOENT;
2909                                         amdgpu_put_xgmi_hive(hive);
2910                                         goto init_failed;
2911                                 }
2912
2913                                 /* Drop the early temporary reset domain we created for device */
2914                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
2915                                 adev->reset_domain = hive->reset_domain;
2916                                 amdgpu_put_xgmi_hive(hive);
2917                         }
2918                 }
2919         }
2920
2921         r = amdgpu_device_init_schedulers(adev);
2922         if (r)
2923                 goto init_failed;
2924
2925         if (adev->mman.buffer_funcs_ring->sched.ready)
2926                 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2927
2928         /* Don't init kfd if whole hive need to be reset during init */
2929         if (!adev->gmc.xgmi.pending_reset) {
2930                 kgd2kfd_init_zone_device(adev);
2931                 amdgpu_amdkfd_device_init(adev);
2932         }
2933
2934         amdgpu_fru_get_product_info(adev);
2935
2936 init_failed:
2937
2938         return r;
2939 }
2940
2941 /**
2942  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2943  *
2944  * @adev: amdgpu_device pointer
2945  *
2946  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2947  * this function before a GPU reset.  If the value is retained after a
2948  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2949  */
2950 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2951 {
2952         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2953 }
2954
2955 /**
2956  * amdgpu_device_check_vram_lost - check if vram is valid
2957  *
2958  * @adev: amdgpu_device pointer
2959  *
2960  * Checks the reset magic value written to the gart pointer in VRAM.
2961  * The driver calls this after a GPU reset to see if the contents of
2962  * VRAM is lost or now.
2963  * returns true if vram is lost, false if not.
2964  */
2965 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2966 {
2967         if (memcmp(adev->gart.ptr, adev->reset_magic,
2968                         AMDGPU_RESET_MAGIC_NUM))
2969                 return true;
2970
2971         if (!amdgpu_in_reset(adev))
2972                 return false;
2973
2974         /*
2975          * For all ASICs with baco/mode1 reset, the VRAM is
2976          * always assumed to be lost.
2977          */
2978         switch (amdgpu_asic_reset_method(adev)) {
2979         case AMD_RESET_METHOD_BACO:
2980         case AMD_RESET_METHOD_MODE1:
2981                 return true;
2982         default:
2983                 return false;
2984         }
2985 }
2986
2987 /**
2988  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2989  *
2990  * @adev: amdgpu_device pointer
2991  * @state: clockgating state (gate or ungate)
2992  *
2993  * The list of all the hardware IPs that make up the asic is walked and the
2994  * set_clockgating_state callbacks are run.
2995  * Late initialization pass enabling clockgating for hardware IPs.
2996  * Fini or suspend, pass disabling clockgating for hardware IPs.
2997  * Returns 0 on success, negative error code on failure.
2998  */
2999
3000 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3001                                enum amd_clockgating_state state)
3002 {
3003         int i, j, r;
3004
3005         if (amdgpu_emu_mode == 1)
3006                 return 0;
3007
3008         for (j = 0; j < adev->num_ip_blocks; j++) {
3009                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3010                 if (!adev->ip_blocks[i].status.late_initialized)
3011                         continue;
3012                 /* skip CG for GFX, SDMA on S0ix */
3013                 if (adev->in_s0ix &&
3014                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3015                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3016                         continue;
3017                 /* skip CG for VCE/UVD, it's handled specially */
3018                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3019                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3020                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3021                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3022                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3023                         /* enable clockgating to save power */
3024                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3025                                                                                      state);
3026                         if (r) {
3027                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3028                                           adev->ip_blocks[i].version->funcs->name, r);
3029                                 return r;
3030                         }
3031                 }
3032         }
3033
3034         return 0;
3035 }
3036
3037 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3038                                enum amd_powergating_state state)
3039 {
3040         int i, j, r;
3041
3042         if (amdgpu_emu_mode == 1)
3043                 return 0;
3044
3045         for (j = 0; j < adev->num_ip_blocks; j++) {
3046                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3047                 if (!adev->ip_blocks[i].status.late_initialized)
3048                         continue;
3049                 /* skip PG for GFX, SDMA on S0ix */
3050                 if (adev->in_s0ix &&
3051                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3052                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3053                         continue;
3054                 /* skip CG for VCE/UVD, it's handled specially */
3055                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3056                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3057                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3058                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3059                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
3060                         /* enable powergating to save power */
3061                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3062                                                                                         state);
3063                         if (r) {
3064                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3065                                           adev->ip_blocks[i].version->funcs->name, r);
3066                                 return r;
3067                         }
3068                 }
3069         }
3070         return 0;
3071 }
3072
3073 static int amdgpu_device_enable_mgpu_fan_boost(void)
3074 {
3075         struct amdgpu_gpu_instance *gpu_ins;
3076         struct amdgpu_device *adev;
3077         int i, ret = 0;
3078
3079         mutex_lock(&mgpu_info.mutex);
3080
3081         /*
3082          * MGPU fan boost feature should be enabled
3083          * only when there are two or more dGPUs in
3084          * the system
3085          */
3086         if (mgpu_info.num_dgpu < 2)
3087                 goto out;
3088
3089         for (i = 0; i < mgpu_info.num_dgpu; i++) {
3090                 gpu_ins = &(mgpu_info.gpu_ins[i]);
3091                 adev = gpu_ins->adev;
3092                 if (!(adev->flags & AMD_IS_APU) &&
3093                     !gpu_ins->mgpu_fan_enabled) {
3094                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3095                         if (ret)
3096                                 break;
3097
3098                         gpu_ins->mgpu_fan_enabled = 1;
3099                 }
3100         }
3101
3102 out:
3103         mutex_unlock(&mgpu_info.mutex);
3104
3105         return ret;
3106 }
3107
3108 /**
3109  * amdgpu_device_ip_late_init - run late init for hardware IPs
3110  *
3111  * @adev: amdgpu_device pointer
3112  *
3113  * Late initialization pass for hardware IPs.  The list of all the hardware
3114  * IPs that make up the asic is walked and the late_init callbacks are run.
3115  * late_init covers any special initialization that an IP requires
3116  * after all of the have been initialized or something that needs to happen
3117  * late in the init process.
3118  * Returns 0 on success, negative error code on failure.
3119  */
3120 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3121 {
3122         struct amdgpu_gpu_instance *gpu_instance;
3123         int i = 0, r;
3124
3125         for (i = 0; i < adev->num_ip_blocks; i++) {
3126                 if (!adev->ip_blocks[i].status.hw)
3127                         continue;
3128                 if (adev->ip_blocks[i].version->funcs->late_init) {
3129                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3130                         if (r) {
3131                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
3132                                           adev->ip_blocks[i].version->funcs->name, r);
3133                                 return r;
3134                         }
3135                 }
3136                 adev->ip_blocks[i].status.late_initialized = true;
3137         }
3138
3139         r = amdgpu_ras_late_init(adev);
3140         if (r) {
3141                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3142                 return r;
3143         }
3144
3145         amdgpu_ras_set_error_query_ready(adev, true);
3146
3147         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3148         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3149
3150         amdgpu_device_fill_reset_magic(adev);
3151
3152         r = amdgpu_device_enable_mgpu_fan_boost();
3153         if (r)
3154                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3155
3156         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3157         if (amdgpu_passthrough(adev) &&
3158             ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3159              adev->asic_type == CHIP_ALDEBARAN))
3160                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
3161
3162         if (adev->gmc.xgmi.num_physical_nodes > 1) {
3163                 mutex_lock(&mgpu_info.mutex);
3164
3165                 /*
3166                  * Reset device p-state to low as this was booted with high.
3167                  *
3168                  * This should be performed only after all devices from the same
3169                  * hive get initialized.
3170                  *
3171                  * However, it's unknown how many device in the hive in advance.
3172                  * As this is counted one by one during devices initializations.
3173                  *
3174                  * So, we wait for all XGMI interlinked devices initialized.
3175                  * This may bring some delays as those devices may come from
3176                  * different hives. But that should be OK.
3177                  */
3178                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3179                         for (i = 0; i < mgpu_info.num_gpu; i++) {
3180                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
3181                                 if (gpu_instance->adev->flags & AMD_IS_APU)
3182                                         continue;
3183
3184                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3185                                                 AMDGPU_XGMI_PSTATE_MIN);
3186                                 if (r) {
3187                                         DRM_ERROR("pstate setting failed (%d).\n", r);
3188                                         break;
3189                                 }
3190                         }
3191                 }
3192
3193                 mutex_unlock(&mgpu_info.mutex);
3194         }
3195
3196         return 0;
3197 }
3198
3199 /**
3200  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3201  *
3202  * @adev: amdgpu_device pointer
3203  *
3204  * For ASICs need to disable SMC first
3205  */
3206 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3207 {
3208         int i, r;
3209
3210         if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3211                 return;
3212
3213         for (i = 0; i < adev->num_ip_blocks; i++) {
3214                 if (!adev->ip_blocks[i].status.hw)
3215                         continue;
3216                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3217                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3218                         /* XXX handle errors */
3219                         if (r) {
3220                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3221                                           adev->ip_blocks[i].version->funcs->name, r);
3222                         }
3223                         adev->ip_blocks[i].status.hw = false;
3224                         break;
3225                 }
3226         }
3227 }
3228
3229 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3230 {
3231         int i, r;
3232
3233         for (i = 0; i < adev->num_ip_blocks; i++) {
3234                 if (!adev->ip_blocks[i].version->funcs->early_fini)
3235                         continue;
3236
3237                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3238                 if (r) {
3239                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3240                                   adev->ip_blocks[i].version->funcs->name, r);
3241                 }
3242         }
3243
3244         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3245         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3246
3247         amdgpu_amdkfd_suspend(adev, false);
3248
3249         /* Workaroud for ASICs need to disable SMC first */
3250         amdgpu_device_smu_fini_early(adev);
3251
3252         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3253                 if (!adev->ip_blocks[i].status.hw)
3254                         continue;
3255
3256                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3257                 /* XXX handle errors */
3258                 if (r) {
3259                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3260                                   adev->ip_blocks[i].version->funcs->name, r);
3261                 }
3262
3263                 adev->ip_blocks[i].status.hw = false;
3264         }
3265
3266         if (amdgpu_sriov_vf(adev)) {
3267                 if (amdgpu_virt_release_full_gpu(adev, false))
3268                         DRM_ERROR("failed to release exclusive mode on fini\n");
3269         }
3270
3271         return 0;
3272 }
3273
3274 /**
3275  * amdgpu_device_ip_fini - run fini for hardware IPs
3276  *
3277  * @adev: amdgpu_device pointer
3278  *
3279  * Main teardown pass for hardware IPs.  The list of all the hardware
3280  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3281  * are run.  hw_fini tears down the hardware associated with each IP
3282  * and sw_fini tears down any software state associated with each IP.
3283  * Returns 0 on success, negative error code on failure.
3284  */
3285 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3286 {
3287         int i, r;
3288
3289         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3290                 amdgpu_virt_release_ras_err_handler_data(adev);
3291
3292         if (adev->gmc.xgmi.num_physical_nodes > 1)
3293                 amdgpu_xgmi_remove_device(adev);
3294
3295         amdgpu_amdkfd_device_fini_sw(adev);
3296
3297         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3298                 if (!adev->ip_blocks[i].status.sw)
3299                         continue;
3300
3301                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3302                         amdgpu_ucode_free_bo(adev);
3303                         amdgpu_free_static_csa(&adev->virt.csa_obj);
3304                         amdgpu_device_wb_fini(adev);
3305                         amdgpu_device_mem_scratch_fini(adev);
3306                         amdgpu_ib_pool_fini(adev);
3307                         amdgpu_seq64_fini(adev);
3308                 }
3309
3310                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3311                 /* XXX handle errors */
3312                 if (r) {
3313                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3314                                   adev->ip_blocks[i].version->funcs->name, r);
3315                 }
3316                 adev->ip_blocks[i].status.sw = false;
3317                 adev->ip_blocks[i].status.valid = false;
3318         }
3319
3320         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3321                 if (!adev->ip_blocks[i].status.late_initialized)
3322                         continue;
3323                 if (adev->ip_blocks[i].version->funcs->late_fini)
3324                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3325                 adev->ip_blocks[i].status.late_initialized = false;
3326         }
3327
3328         amdgpu_ras_fini(adev);
3329
3330         return 0;
3331 }
3332
3333 /**
3334  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3335  *
3336  * @work: work_struct.
3337  */
3338 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3339 {
3340         struct amdgpu_device *adev =
3341                 container_of(work, struct amdgpu_device, delayed_init_work.work);
3342         int r;
3343
3344         r = amdgpu_ib_ring_tests(adev);
3345         if (r)
3346                 DRM_ERROR("ib ring test failed (%d).\n", r);
3347 }
3348
3349 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3350 {
3351         struct amdgpu_device *adev =
3352                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3353
3354         WARN_ON_ONCE(adev->gfx.gfx_off_state);
3355         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3356
3357         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3358                 adev->gfx.gfx_off_state = true;
3359 }
3360
3361 /**
3362  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3363  *
3364  * @adev: amdgpu_device pointer
3365  *
3366  * Main suspend function for hardware IPs.  The list of all the hardware
3367  * IPs that make up the asic is walked, clockgating is disabled and the
3368  * suspend callbacks are run.  suspend puts the hardware and software state
3369  * in each IP into a state suitable for suspend.
3370  * Returns 0 on success, negative error code on failure.
3371  */
3372 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3373 {
3374         int i, r;
3375
3376         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3377         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3378
3379         /*
3380          * Per PMFW team's suggestion, driver needs to handle gfxoff
3381          * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3382          * scenario. Add the missing df cstate disablement here.
3383          */
3384         if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3385                 dev_warn(adev->dev, "Failed to disallow df cstate");
3386
3387         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3388                 if (!adev->ip_blocks[i].status.valid)
3389                         continue;
3390
3391                 /* displays are handled separately */
3392                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3393                         continue;
3394
3395                 /* XXX handle errors */
3396                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3397                 /* XXX handle errors */
3398                 if (r) {
3399                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3400                                   adev->ip_blocks[i].version->funcs->name, r);
3401                         return r;
3402                 }
3403
3404                 adev->ip_blocks[i].status.hw = false;
3405         }
3406
3407         return 0;
3408 }
3409
3410 /**
3411  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3412  *
3413  * @adev: amdgpu_device pointer
3414  *
3415  * Main suspend function for hardware IPs.  The list of all the hardware
3416  * IPs that make up the asic is walked, clockgating is disabled and the
3417  * suspend callbacks are run.  suspend puts the hardware and software state
3418  * in each IP into a state suitable for suspend.
3419  * Returns 0 on success, negative error code on failure.
3420  */
3421 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3422 {
3423         int i, r;
3424
3425         if (adev->in_s0ix)
3426                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3427
3428         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3429                 if (!adev->ip_blocks[i].status.valid)
3430                         continue;
3431                 /* displays are handled in phase1 */
3432                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3433                         continue;
3434                 /* PSP lost connection when err_event_athub occurs */
3435                 if (amdgpu_ras_intr_triggered() &&
3436                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3437                         adev->ip_blocks[i].status.hw = false;
3438                         continue;
3439                 }
3440
3441                 /* skip unnecessary suspend if we do not initialize them yet */
3442                 if (adev->gmc.xgmi.pending_reset &&
3443                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3444                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3445                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3446                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3447                         adev->ip_blocks[i].status.hw = false;
3448                         continue;
3449                 }
3450
3451                 /* skip suspend of gfx/mes and psp for S0ix
3452                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
3453                  * like at runtime. PSP is also part of the always on hardware
3454                  * so no need to suspend it.
3455                  */
3456                 if (adev->in_s0ix &&
3457                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3458                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3459                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3460                         continue;
3461
3462                 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3463                 if (adev->in_s0ix &&
3464                     (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3465                      IP_VERSION(5, 0, 0)) &&
3466                     (adev->ip_blocks[i].version->type ==
3467                      AMD_IP_BLOCK_TYPE_SDMA))
3468                         continue;
3469
3470                 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3471                  * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3472                  * from this location and RLC Autoload automatically also gets loaded
3473                  * from here based on PMFW -> PSP message during re-init sequence.
3474                  * Therefore, the psp suspend & resume should be skipped to avoid destroy
3475                  * the TMR and reload FWs again for IMU enabled APU ASICs.
3476                  */
3477                 if (amdgpu_in_reset(adev) &&
3478                     (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3479                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3480                         continue;
3481
3482                 /* XXX handle errors */
3483                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3484                 /* XXX handle errors */
3485                 if (r) {
3486                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3487                                   adev->ip_blocks[i].version->funcs->name, r);
3488                 }
3489                 adev->ip_blocks[i].status.hw = false;
3490                 /* handle putting the SMC in the appropriate state */
3491                 if (!amdgpu_sriov_vf(adev)) {
3492                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3493                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3494                                 if (r) {
3495                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3496                                                         adev->mp1_state, r);
3497                                         return r;
3498                                 }
3499                         }
3500                 }
3501         }
3502
3503         return 0;
3504 }
3505
3506 /**
3507  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3508  *
3509  * @adev: amdgpu_device pointer
3510  *
3511  * Main suspend function for hardware IPs.  The list of all the hardware
3512  * IPs that make up the asic is walked, clockgating is disabled and the
3513  * suspend callbacks are run.  suspend puts the hardware and software state
3514  * in each IP into a state suitable for suspend.
3515  * Returns 0 on success, negative error code on failure.
3516  */
3517 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3518 {
3519         int r;
3520
3521         if (amdgpu_sriov_vf(adev)) {
3522                 amdgpu_virt_fini_data_exchange(adev);
3523                 amdgpu_virt_request_full_gpu(adev, false);
3524         }
3525
3526         amdgpu_ttm_set_buffer_funcs_status(adev, false);
3527
3528         r = amdgpu_device_ip_suspend_phase1(adev);
3529         if (r)
3530                 return r;
3531         r = amdgpu_device_ip_suspend_phase2(adev);
3532
3533         if (amdgpu_sriov_vf(adev))
3534                 amdgpu_virt_release_full_gpu(adev, false);
3535
3536         return r;
3537 }
3538
3539 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3540 {
3541         int i, r;
3542
3543         static enum amd_ip_block_type ip_order[] = {
3544                 AMD_IP_BLOCK_TYPE_COMMON,
3545                 AMD_IP_BLOCK_TYPE_GMC,
3546                 AMD_IP_BLOCK_TYPE_PSP,
3547                 AMD_IP_BLOCK_TYPE_IH,
3548         };
3549
3550         for (i = 0; i < adev->num_ip_blocks; i++) {
3551                 int j;
3552                 struct amdgpu_ip_block *block;
3553
3554                 block = &adev->ip_blocks[i];
3555                 block->status.hw = false;
3556
3557                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3558
3559                         if (block->version->type != ip_order[j] ||
3560                                 !block->status.valid)
3561                                 continue;
3562
3563                         r = block->version->funcs->hw_init(adev);
3564                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3565                         if (r)
3566                                 return r;
3567                         block->status.hw = true;
3568                 }
3569         }
3570
3571         return 0;
3572 }
3573
3574 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3575 {
3576         int i, r;
3577
3578         static enum amd_ip_block_type ip_order[] = {
3579                 AMD_IP_BLOCK_TYPE_SMC,
3580                 AMD_IP_BLOCK_TYPE_DCE,
3581                 AMD_IP_BLOCK_TYPE_GFX,
3582                 AMD_IP_BLOCK_TYPE_SDMA,
3583                 AMD_IP_BLOCK_TYPE_MES,
3584                 AMD_IP_BLOCK_TYPE_UVD,
3585                 AMD_IP_BLOCK_TYPE_VCE,
3586                 AMD_IP_BLOCK_TYPE_VCN,
3587                 AMD_IP_BLOCK_TYPE_JPEG
3588         };
3589
3590         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3591                 int j;
3592                 struct amdgpu_ip_block *block;
3593
3594                 for (j = 0; j < adev->num_ip_blocks; j++) {
3595                         block = &adev->ip_blocks[j];
3596
3597                         if (block->version->type != ip_order[i] ||
3598                                 !block->status.valid ||
3599                                 block->status.hw)
3600                                 continue;
3601
3602                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3603                                 r = block->version->funcs->resume(adev);
3604                         else
3605                                 r = block->version->funcs->hw_init(adev);
3606
3607                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3608                         if (r)
3609                                 return r;
3610                         block->status.hw = true;
3611                 }
3612         }
3613
3614         return 0;
3615 }
3616
3617 /**
3618  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3619  *
3620  * @adev: amdgpu_device pointer
3621  *
3622  * First resume function for hardware IPs.  The list of all the hardware
3623  * IPs that make up the asic is walked and the resume callbacks are run for
3624  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3625  * after a suspend and updates the software state as necessary.  This
3626  * function is also used for restoring the GPU after a GPU reset.
3627  * Returns 0 on success, negative error code on failure.
3628  */
3629 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3630 {
3631         int i, r;
3632
3633         for (i = 0; i < adev->num_ip_blocks; i++) {
3634                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3635                         continue;
3636                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3637                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3638                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3639                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3640
3641                         r = adev->ip_blocks[i].version->funcs->resume(adev);
3642                         if (r) {
3643                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
3644                                           adev->ip_blocks[i].version->funcs->name, r);
3645                                 return r;
3646                         }
3647                         adev->ip_blocks[i].status.hw = true;
3648                 }
3649         }
3650
3651         return 0;
3652 }
3653
3654 /**
3655  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3656  *
3657  * @adev: amdgpu_device pointer
3658  *
3659  * First resume function for hardware IPs.  The list of all the hardware
3660  * IPs that make up the asic is walked and the resume callbacks are run for
3661  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3662  * functional state after a suspend and updates the software state as
3663  * necessary.  This function is also used for restoring the GPU after a GPU
3664  * reset.
3665  * Returns 0 on success, negative error code on failure.
3666  */
3667 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3668 {
3669         int i, r;
3670
3671         for (i = 0; i < adev->num_ip_blocks; i++) {
3672                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3673                         continue;
3674                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3675                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3676                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3677                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3678                         continue;
3679                 r = adev->ip_blocks[i].version->funcs->resume(adev);
3680                 if (r) {
3681                         DRM_ERROR("resume of IP block <%s> failed %d\n",
3682                                   adev->ip_blocks[i].version->funcs->name, r);
3683                         return r;
3684                 }
3685                 adev->ip_blocks[i].status.hw = true;
3686         }
3687
3688         return 0;
3689 }
3690
3691 /**
3692  * amdgpu_device_ip_resume - run resume for hardware IPs
3693  *
3694  * @adev: amdgpu_device pointer
3695  *
3696  * Main resume function for hardware IPs.  The hardware IPs
3697  * are split into two resume functions because they are
3698  * also used in recovering from a GPU reset and some additional
3699  * steps need to be take between them.  In this case (S3/S4) they are
3700  * run sequentially.
3701  * Returns 0 on success, negative error code on failure.
3702  */
3703 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3704 {
3705         int r;
3706
3707         r = amdgpu_device_ip_resume_phase1(adev);
3708         if (r)
3709                 return r;
3710
3711         r = amdgpu_device_fw_loading(adev);
3712         if (r)
3713                 return r;
3714
3715         r = amdgpu_device_ip_resume_phase2(adev);
3716
3717         if (adev->mman.buffer_funcs_ring->sched.ready)
3718                 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3719
3720         return r;
3721 }
3722
3723 /**
3724  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3725  *
3726  * @adev: amdgpu_device pointer
3727  *
3728  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3729  */
3730 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3731 {
3732         if (amdgpu_sriov_vf(adev)) {
3733                 if (adev->is_atom_fw) {
3734                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3735                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3736                 } else {
3737                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3738                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3739                 }
3740
3741                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3742                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3743         }
3744 }
3745
3746 /**
3747  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3748  *
3749  * @asic_type: AMD asic type
3750  *
3751  * Check if there is DC (new modesetting infrastructre) support for an asic.
3752  * returns true if DC has support, false if not.
3753  */
3754 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3755 {
3756         switch (asic_type) {
3757 #ifdef CONFIG_DRM_AMDGPU_SI
3758         case CHIP_HAINAN:
3759 #endif
3760         case CHIP_TOPAZ:
3761                 /* chips with no display hardware */
3762                 return false;
3763 #if defined(CONFIG_DRM_AMD_DC)
3764         case CHIP_TAHITI:
3765         case CHIP_PITCAIRN:
3766         case CHIP_VERDE:
3767         case CHIP_OLAND:
3768                 /*
3769                  * We have systems in the wild with these ASICs that require
3770                  * LVDS and VGA support which is not supported with DC.
3771                  *
3772                  * Fallback to the non-DC driver here by default so as not to
3773                  * cause regressions.
3774                  */
3775 #if defined(CONFIG_DRM_AMD_DC_SI)
3776                 return amdgpu_dc > 0;
3777 #else
3778                 return false;
3779 #endif
3780         case CHIP_BONAIRE:
3781         case CHIP_KAVERI:
3782         case CHIP_KABINI:
3783         case CHIP_MULLINS:
3784                 /*
3785                  * We have systems in the wild with these ASICs that require
3786                  * VGA support which is not supported with DC.
3787                  *
3788                  * Fallback to the non-DC driver here by default so as not to
3789                  * cause regressions.
3790                  */
3791                 return amdgpu_dc > 0;
3792         default:
3793                 return amdgpu_dc != 0;
3794 #else
3795         default:
3796                 if (amdgpu_dc > 0)
3797                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3798                 return false;
3799 #endif
3800         }
3801 }
3802
3803 /**
3804  * amdgpu_device_has_dc_support - check if dc is supported
3805  *
3806  * @adev: amdgpu_device pointer
3807  *
3808  * Returns true for supported, false for not supported
3809  */
3810 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3811 {
3812         if (adev->enable_virtual_display ||
3813             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3814                 return false;
3815
3816         return amdgpu_device_asic_has_dc_support(adev->asic_type);
3817 }
3818
3819 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3820 {
3821         struct amdgpu_device *adev =
3822                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3823         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3824
3825         /* It's a bug to not have a hive within this function */
3826         if (WARN_ON(!hive))
3827                 return;
3828
3829         /*
3830          * Use task barrier to synchronize all xgmi reset works across the
3831          * hive. task_barrier_enter and task_barrier_exit will block
3832          * until all the threads running the xgmi reset works reach
3833          * those points. task_barrier_full will do both blocks.
3834          */
3835         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3836
3837                 task_barrier_enter(&hive->tb);
3838                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3839
3840                 if (adev->asic_reset_res)
3841                         goto fail;
3842
3843                 task_barrier_exit(&hive->tb);
3844                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3845
3846                 if (adev->asic_reset_res)
3847                         goto fail;
3848
3849                 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3850         } else {
3851
3852                 task_barrier_full(&hive->tb);
3853                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
3854         }
3855
3856 fail:
3857         if (adev->asic_reset_res)
3858                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3859                          adev->asic_reset_res, adev_to_drm(adev)->unique);
3860         amdgpu_put_xgmi_hive(hive);
3861 }
3862
3863 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3864 {
3865         char *input = amdgpu_lockup_timeout;
3866         char *timeout_setting = NULL;
3867         int index = 0;
3868         long timeout;
3869         int ret = 0;
3870
3871         /*
3872          * By default timeout for non compute jobs is 10000
3873          * and 60000 for compute jobs.
3874          * In SR-IOV or passthrough mode, timeout for compute
3875          * jobs are 60000 by default.
3876          */
3877         adev->gfx_timeout = msecs_to_jiffies(10000);
3878         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3879         if (amdgpu_sriov_vf(adev))
3880                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3881                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3882         else
3883                 adev->compute_timeout =  msecs_to_jiffies(60000);
3884
3885         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3886                 while ((timeout_setting = strsep(&input, ",")) &&
3887                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3888                         ret = kstrtol(timeout_setting, 0, &timeout);
3889                         if (ret)
3890                                 return ret;
3891
3892                         if (timeout == 0) {
3893                                 index++;
3894                                 continue;
3895                         } else if (timeout < 0) {
3896                                 timeout = MAX_SCHEDULE_TIMEOUT;
3897                                 dev_warn(adev->dev, "lockup timeout disabled");
3898                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3899                         } else {
3900                                 timeout = msecs_to_jiffies(timeout);
3901                         }
3902
3903                         switch (index++) {
3904                         case 0:
3905                                 adev->gfx_timeout = timeout;
3906                                 break;
3907                         case 1:
3908                                 adev->compute_timeout = timeout;
3909                                 break;
3910                         case 2:
3911                                 adev->sdma_timeout = timeout;
3912                                 break;
3913                         case 3:
3914                                 adev->video_timeout = timeout;
3915                                 break;
3916                         default:
3917                                 break;
3918                         }
3919                 }
3920                 /*
3921                  * There is only one value specified and
3922                  * it should apply to all non-compute jobs.
3923                  */
3924                 if (index == 1) {
3925                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3926                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3927                                 adev->compute_timeout = adev->gfx_timeout;
3928                 }
3929         }
3930
3931         return ret;
3932 }
3933
3934 /**
3935  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3936  *
3937  * @adev: amdgpu_device pointer
3938  *
3939  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3940  */
3941 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3942 {
3943         struct iommu_domain *domain;
3944
3945         domain = iommu_get_domain_for_dev(adev->dev);
3946         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3947                 adev->ram_is_direct_mapped = true;
3948 }
3949
3950 static const struct attribute *amdgpu_dev_attributes[] = {
3951         &dev_attr_pcie_replay_count.attr,
3952         NULL
3953 };
3954
3955 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3956 {
3957         if (amdgpu_mcbp == 1)
3958                 adev->gfx.mcbp = true;
3959         else if (amdgpu_mcbp == 0)
3960                 adev->gfx.mcbp = false;
3961
3962         if (amdgpu_sriov_vf(adev))
3963                 adev->gfx.mcbp = true;
3964
3965         if (adev->gfx.mcbp)
3966                 DRM_INFO("MCBP is enabled\n");
3967 }
3968
3969 /**
3970  * amdgpu_device_init - initialize the driver
3971  *
3972  * @adev: amdgpu_device pointer
3973  * @flags: driver flags
3974  *
3975  * Initializes the driver info and hw (all asics).
3976  * Returns 0 for success or an error on failure.
3977  * Called at driver startup.
3978  */
3979 int amdgpu_device_init(struct amdgpu_device *adev,
3980                        uint32_t flags)
3981 {
3982         struct drm_device *ddev = adev_to_drm(adev);
3983         struct pci_dev *pdev = adev->pdev;
3984         int r, i;
3985         bool px = false;
3986         u32 max_MBps;
3987         int tmp;
3988
3989         adev->shutdown = false;
3990         adev->flags = flags;
3991
3992         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3993                 adev->asic_type = amdgpu_force_asic_type;
3994         else
3995                 adev->asic_type = flags & AMD_ASIC_MASK;
3996
3997         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3998         if (amdgpu_emu_mode == 1)
3999                 adev->usec_timeout *= 10;
4000         adev->gmc.gart_size = 512 * 1024 * 1024;
4001         adev->accel_working = false;
4002         adev->num_rings = 0;
4003         RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4004         adev->mman.buffer_funcs = NULL;
4005         adev->mman.buffer_funcs_ring = NULL;
4006         adev->vm_manager.vm_pte_funcs = NULL;
4007         adev->vm_manager.vm_pte_num_scheds = 0;
4008         adev->gmc.gmc_funcs = NULL;
4009         adev->harvest_ip_mask = 0x0;
4010         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4011         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4012
4013         adev->smc_rreg = &amdgpu_invalid_rreg;
4014         adev->smc_wreg = &amdgpu_invalid_wreg;
4015         adev->pcie_rreg = &amdgpu_invalid_rreg;
4016         adev->pcie_wreg = &amdgpu_invalid_wreg;
4017         adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4018         adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4019         adev->pciep_rreg = &amdgpu_invalid_rreg;
4020         adev->pciep_wreg = &amdgpu_invalid_wreg;
4021         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4022         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4023         adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4024         adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4025         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4026         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4027         adev->didt_rreg = &amdgpu_invalid_rreg;
4028         adev->didt_wreg = &amdgpu_invalid_wreg;
4029         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4030         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4031         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4032         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4033
4034         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4035                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4036                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4037
4038         /* mutex initialization are all done here so we
4039          * can recall function without having locking issues
4040          */
4041         mutex_init(&adev->firmware.mutex);
4042         mutex_init(&adev->pm.mutex);
4043         mutex_init(&adev->gfx.gpu_clock_mutex);
4044         mutex_init(&adev->srbm_mutex);
4045         mutex_init(&adev->gfx.pipe_reserve_mutex);
4046         mutex_init(&adev->gfx.gfx_off_mutex);
4047         mutex_init(&adev->gfx.partition_mutex);
4048         mutex_init(&adev->grbm_idx_mutex);
4049         mutex_init(&adev->mn_lock);
4050         mutex_init(&adev->virt.vf_errors.lock);
4051         hash_init(adev->mn_hash);
4052         mutex_init(&adev->psp.mutex);
4053         mutex_init(&adev->notifier_lock);
4054         mutex_init(&adev->pm.stable_pstate_ctx_lock);
4055         mutex_init(&adev->benchmark_mutex);
4056
4057         amdgpu_device_init_apu_flags(adev);
4058
4059         r = amdgpu_device_check_arguments(adev);
4060         if (r)
4061                 return r;
4062
4063         spin_lock_init(&adev->mmio_idx_lock);
4064         spin_lock_init(&adev->smc_idx_lock);
4065         spin_lock_init(&adev->pcie_idx_lock);
4066         spin_lock_init(&adev->uvd_ctx_idx_lock);
4067         spin_lock_init(&adev->didt_idx_lock);
4068         spin_lock_init(&adev->gc_cac_idx_lock);
4069         spin_lock_init(&adev->se_cac_idx_lock);
4070         spin_lock_init(&adev->audio_endpt_idx_lock);
4071         spin_lock_init(&adev->mm_stats.lock);
4072         spin_lock_init(&adev->wb.lock);
4073
4074         INIT_LIST_HEAD(&adev->shadow_list);
4075         mutex_init(&adev->shadow_list_lock);
4076
4077         INIT_LIST_HEAD(&adev->reset_list);
4078
4079         INIT_LIST_HEAD(&adev->ras_list);
4080
4081         INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4082
4083         INIT_DELAYED_WORK(&adev->delayed_init_work,
4084                           amdgpu_device_delayed_init_work_handler);
4085         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4086                           amdgpu_device_delay_enable_gfx_off);
4087
4088         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4089
4090         adev->gfx.gfx_off_req_count = 1;
4091         adev->gfx.gfx_off_residency = 0;
4092         adev->gfx.gfx_off_entrycount = 0;
4093         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4094
4095         atomic_set(&adev->throttling_logging_enabled, 1);
4096         /*
4097          * If throttling continues, logging will be performed every minute
4098          * to avoid log flooding. "-1" is subtracted since the thermal
4099          * throttling interrupt comes every second. Thus, the total logging
4100          * interval is 59 seconds(retelimited printk interval) + 1(waiting
4101          * for throttling interrupt) = 60 seconds.
4102          */
4103         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4104         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4105
4106         /* Registers mapping */
4107         /* TODO: block userspace mapping of io register */
4108         if (adev->asic_type >= CHIP_BONAIRE) {
4109                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4110                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4111         } else {
4112                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4113                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4114         }
4115
4116         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4117                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4118
4119         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4120         if (!adev->rmmio)
4121                 return -ENOMEM;
4122
4123         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4124         DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4125
4126         /*
4127          * Reset domain needs to be present early, before XGMI hive discovered
4128          * (if any) and intitialized to use reset sem and in_gpu reset flag
4129          * early on during init and before calling to RREG32.
4130          */
4131         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4132         if (!adev->reset_domain)
4133                 return -ENOMEM;
4134
4135         /* detect hw virtualization here */
4136         amdgpu_detect_virtualization(adev);
4137
4138         amdgpu_device_get_pcie_info(adev);
4139
4140         r = amdgpu_device_get_job_timeout_settings(adev);
4141         if (r) {
4142                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4143                 return r;
4144         }
4145
4146         amdgpu_device_set_mcbp(adev);
4147
4148         /* early init functions */
4149         r = amdgpu_device_ip_early_init(adev);
4150         if (r)
4151                 return r;
4152
4153         /* Get rid of things like offb */
4154         r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4155         if (r)
4156                 return r;
4157
4158         /* Enable TMZ based on IP_VERSION */
4159         amdgpu_gmc_tmz_set(adev);
4160
4161         if (amdgpu_sriov_vf(adev) &&
4162             amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4163                 /* VF MMIO access (except mailbox range) from CPU
4164                  * will be blocked during sriov runtime
4165                  */
4166                 adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4167
4168         amdgpu_gmc_noretry_set(adev);
4169         /* Need to get xgmi info early to decide the reset behavior*/
4170         if (adev->gmc.xgmi.supported) {
4171                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4172                 if (r)
4173                         return r;
4174         }
4175
4176         /* enable PCIE atomic ops */
4177         if (amdgpu_sriov_vf(adev)) {
4178                 if (adev->virt.fw_reserve.p_pf2vf)
4179                         adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4180                                                       adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4181                                 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4182         /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4183          * internal path natively support atomics, set have_atomics_support to true.
4184          */
4185         } else if ((adev->flags & AMD_IS_APU) &&
4186                    (amdgpu_ip_version(adev, GC_HWIP, 0) >
4187                     IP_VERSION(9, 0, 0))) {
4188                 adev->have_atomics_support = true;
4189         } else {
4190                 adev->have_atomics_support =
4191                         !pci_enable_atomic_ops_to_root(adev->pdev,
4192                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4193                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4194         }
4195
4196         if (!adev->have_atomics_support)
4197                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4198
4199         /* doorbell bar mapping and doorbell index init*/
4200         amdgpu_doorbell_init(adev);
4201
4202         if (amdgpu_emu_mode == 1) {
4203                 /* post the asic on emulation mode */
4204                 emu_soc_asic_init(adev);
4205                 goto fence_driver_init;
4206         }
4207
4208         amdgpu_reset_init(adev);
4209
4210         /* detect if we are with an SRIOV vbios */
4211         if (adev->bios)
4212                 amdgpu_device_detect_sriov_bios(adev);
4213
4214         /* check if we need to reset the asic
4215          *  E.g., driver was not cleanly unloaded previously, etc.
4216          */
4217         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4218                 if (adev->gmc.xgmi.num_physical_nodes) {
4219                         dev_info(adev->dev, "Pending hive reset.\n");
4220                         adev->gmc.xgmi.pending_reset = true;
4221                         /* Only need to init necessary block for SMU to handle the reset */
4222                         for (i = 0; i < adev->num_ip_blocks; i++) {
4223                                 if (!adev->ip_blocks[i].status.valid)
4224                                         continue;
4225                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4226                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4227                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4228                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4229                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
4230                                                 adev->ip_blocks[i].version->funcs->name);
4231                                         adev->ip_blocks[i].status.hw = true;
4232                                 }
4233                         }
4234                 } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4235                                    !amdgpu_device_has_display_hardware(adev)) {
4236                                         r = psp_gpu_reset(adev);
4237                 } else {
4238                                 tmp = amdgpu_reset_method;
4239                                 /* It should do a default reset when loading or reloading the driver,
4240                                  * regardless of the module parameter reset_method.
4241                                  */
4242                                 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4243                                 r = amdgpu_asic_reset(adev);
4244                                 amdgpu_reset_method = tmp;
4245                 }
4246
4247                 if (r) {
4248                   dev_err(adev->dev, "asic reset on init failed\n");
4249                   goto failed;
4250                 }
4251         }
4252
4253         /* Post card if necessary */
4254         if (amdgpu_device_need_post(adev)) {
4255                 if (!adev->bios) {
4256                         dev_err(adev->dev, "no vBIOS found\n");
4257                         r = -EINVAL;
4258                         goto failed;
4259                 }
4260                 DRM_INFO("GPU posting now...\n");
4261                 r = amdgpu_device_asic_init(adev);
4262                 if (r) {
4263                         dev_err(adev->dev, "gpu post error!\n");
4264                         goto failed;
4265                 }
4266         }
4267
4268         if (adev->bios) {
4269                 if (adev->is_atom_fw) {
4270                         /* Initialize clocks */
4271                         r = amdgpu_atomfirmware_get_clock_info(adev);
4272                         if (r) {
4273                                 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4274                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4275                                 goto failed;
4276                         }
4277                 } else {
4278                         /* Initialize clocks */
4279                         r = amdgpu_atombios_get_clock_info(adev);
4280                         if (r) {
4281                                 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4282                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4283                                 goto failed;
4284                         }
4285                         /* init i2c buses */
4286                         if (!amdgpu_device_has_dc_support(adev))
4287                                 amdgpu_atombios_i2c_init(adev);
4288                 }
4289         }
4290
4291 fence_driver_init:
4292         /* Fence driver */
4293         r = amdgpu_fence_driver_sw_init(adev);
4294         if (r) {
4295                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4296                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4297                 goto failed;
4298         }
4299
4300         /* init the mode config */
4301         drm_mode_config_init(adev_to_drm(adev));
4302
4303         r = amdgpu_device_ip_init(adev);
4304         if (r) {
4305                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4306                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4307                 goto release_ras_con;
4308         }
4309
4310         amdgpu_fence_driver_hw_init(adev);
4311
4312         dev_info(adev->dev,
4313                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4314                         adev->gfx.config.max_shader_engines,
4315                         adev->gfx.config.max_sh_per_se,
4316                         adev->gfx.config.max_cu_per_sh,
4317                         adev->gfx.cu_info.number);
4318
4319         adev->accel_working = true;
4320
4321         amdgpu_vm_check_compute_bug(adev);
4322
4323         /* Initialize the buffer migration limit. */
4324         if (amdgpu_moverate >= 0)
4325                 max_MBps = amdgpu_moverate;
4326         else
4327                 max_MBps = 8; /* Allow 8 MB/s. */
4328         /* Get a log2 for easy divisions. */
4329         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4330
4331         /*
4332          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4333          * Otherwise the mgpu fan boost feature will be skipped due to the
4334          * gpu instance is counted less.
4335          */
4336         amdgpu_register_gpu_instance(adev);
4337
4338         /* enable clockgating, etc. after ib tests, etc. since some blocks require
4339          * explicit gating rather than handling it automatically.
4340          */
4341         if (!adev->gmc.xgmi.pending_reset) {
4342                 r = amdgpu_device_ip_late_init(adev);
4343                 if (r) {
4344                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4345                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4346                         goto release_ras_con;
4347                 }
4348                 /* must succeed. */
4349                 amdgpu_ras_resume(adev);
4350                 queue_delayed_work(system_wq, &adev->delayed_init_work,
4351                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
4352         }
4353
4354         if (amdgpu_sriov_vf(adev)) {
4355                 amdgpu_virt_release_full_gpu(adev, true);
4356                 flush_delayed_work(&adev->delayed_init_work);
4357         }
4358
4359         /*
4360          * Place those sysfs registering after `late_init`. As some of those
4361          * operations performed in `late_init` might affect the sysfs
4362          * interfaces creating.
4363          */
4364         r = amdgpu_atombios_sysfs_init(adev);
4365         if (r)
4366                 drm_err(&adev->ddev,
4367                         "registering atombios sysfs failed (%d).\n", r);
4368
4369         r = amdgpu_pm_sysfs_init(adev);
4370         if (r)
4371                 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4372
4373         r = amdgpu_ucode_sysfs_init(adev);
4374         if (r) {
4375                 adev->ucode_sysfs_en = false;
4376                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4377         } else
4378                 adev->ucode_sysfs_en = true;
4379
4380         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4381         if (r)
4382                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
4383
4384         r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4385         if (r)
4386                 dev_err(adev->dev,
4387                         "Could not create amdgpu board attributes\n");
4388
4389         amdgpu_fru_sysfs_init(adev);
4390         amdgpu_reg_state_sysfs_init(adev);
4391
4392         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4393                 r = amdgpu_pmu_init(adev);
4394         if (r)
4395                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4396
4397         /* Have stored pci confspace at hand for restore in sudden PCI error */
4398         if (amdgpu_device_cache_pci_state(adev->pdev))
4399                 pci_restore_state(pdev);
4400
4401         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4402         /* this will fail for cards that aren't VGA class devices, just
4403          * ignore it
4404          */
4405         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4406                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4407
4408         px = amdgpu_device_supports_px(ddev);
4409
4410         if (px || (!dev_is_removable(&adev->pdev->dev) &&
4411                                 apple_gmux_detect(NULL, NULL)))
4412                 vga_switcheroo_register_client(adev->pdev,
4413                                                &amdgpu_switcheroo_ops, px);
4414
4415         if (px)
4416                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4417
4418         if (adev->gmc.xgmi.pending_reset)
4419                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4420                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
4421
4422         amdgpu_device_check_iommu_direct_map(adev);
4423
4424         return 0;
4425
4426 release_ras_con:
4427         if (amdgpu_sriov_vf(adev))
4428                 amdgpu_virt_release_full_gpu(adev, true);
4429
4430         /* failed in exclusive mode due to timeout */
4431         if (amdgpu_sriov_vf(adev) &&
4432                 !amdgpu_sriov_runtime(adev) &&
4433                 amdgpu_virt_mmio_blocked(adev) &&
4434                 !amdgpu_virt_wait_reset(adev)) {
4435                 dev_err(adev->dev, "VF exclusive mode timeout\n");
4436                 /* Don't send request since VF is inactive. */
4437                 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4438                 adev->virt.ops = NULL;
4439                 r = -EAGAIN;
4440         }
4441         amdgpu_release_ras_context(adev);
4442
4443 failed:
4444         amdgpu_vf_error_trans_all(adev);
4445
4446         return r;
4447 }
4448
4449 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4450 {
4451
4452         /* Clear all CPU mappings pointing to this device */
4453         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4454
4455         /* Unmap all mapped bars - Doorbell, registers and VRAM */
4456         amdgpu_doorbell_fini(adev);
4457
4458         iounmap(adev->rmmio);
4459         adev->rmmio = NULL;
4460         if (adev->mman.aper_base_kaddr)
4461                 iounmap(adev->mman.aper_base_kaddr);
4462         adev->mman.aper_base_kaddr = NULL;
4463
4464         /* Memory manager related */
4465         if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4466                 arch_phys_wc_del(adev->gmc.vram_mtrr);
4467                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4468         }
4469 }
4470
4471 /**
4472  * amdgpu_device_fini_hw - tear down the driver
4473  *
4474  * @adev: amdgpu_device pointer
4475  *
4476  * Tear down the driver info (all asics).
4477  * Called at driver shutdown.
4478  */
4479 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4480 {
4481         dev_info(adev->dev, "amdgpu: finishing device.\n");
4482         flush_delayed_work(&adev->delayed_init_work);
4483         adev->shutdown = true;
4484
4485         /* make sure IB test finished before entering exclusive mode
4486          * to avoid preemption on IB test
4487          */
4488         if (amdgpu_sriov_vf(adev)) {
4489                 amdgpu_virt_request_full_gpu(adev, false);
4490                 amdgpu_virt_fini_data_exchange(adev);
4491         }
4492
4493         /* disable all interrupts */
4494         amdgpu_irq_disable_all(adev);
4495         if (adev->mode_info.mode_config_initialized) {
4496                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4497                         drm_helper_force_disable_all(adev_to_drm(adev));
4498                 else
4499                         drm_atomic_helper_shutdown(adev_to_drm(adev));
4500         }
4501         amdgpu_fence_driver_hw_fini(adev);
4502
4503         if (adev->mman.initialized)
4504                 drain_workqueue(adev->mman.bdev.wq);
4505
4506         if (adev->pm.sysfs_initialized)
4507                 amdgpu_pm_sysfs_fini(adev);
4508         if (adev->ucode_sysfs_en)
4509                 amdgpu_ucode_sysfs_fini(adev);
4510         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4511         amdgpu_fru_sysfs_fini(adev);
4512
4513         amdgpu_reg_state_sysfs_fini(adev);
4514
4515         /* disable ras feature must before hw fini */
4516         amdgpu_ras_pre_fini(adev);
4517
4518         amdgpu_ttm_set_buffer_funcs_status(adev, false);
4519
4520         amdgpu_device_ip_fini_early(adev);
4521
4522         amdgpu_irq_fini_hw(adev);
4523
4524         if (adev->mman.initialized)
4525                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4526
4527         amdgpu_gart_dummy_page_fini(adev);
4528
4529         if (drm_dev_is_unplugged(adev_to_drm(adev)))
4530                 amdgpu_device_unmap_mmio(adev);
4531
4532 }
4533
4534 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4535 {
4536         int idx;
4537         bool px;
4538
4539         amdgpu_fence_driver_sw_fini(adev);
4540         amdgpu_device_ip_fini(adev);
4541         amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4542         adev->accel_working = false;
4543         dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4544
4545         amdgpu_reset_fini(adev);
4546
4547         /* free i2c buses */
4548         if (!amdgpu_device_has_dc_support(adev))
4549                 amdgpu_i2c_fini(adev);
4550
4551         if (amdgpu_emu_mode != 1)
4552                 amdgpu_atombios_fini(adev);
4553
4554         kfree(adev->bios);
4555         adev->bios = NULL;
4556
4557         kfree(adev->fru_info);
4558         adev->fru_info = NULL;
4559
4560         px = amdgpu_device_supports_px(adev_to_drm(adev));
4561
4562         if (px || (!dev_is_removable(&adev->pdev->dev) &&
4563                                 apple_gmux_detect(NULL, NULL)))
4564                 vga_switcheroo_unregister_client(adev->pdev);
4565
4566         if (px)
4567                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4568
4569         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4570                 vga_client_unregister(adev->pdev);
4571
4572         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4573
4574                 iounmap(adev->rmmio);
4575                 adev->rmmio = NULL;
4576                 amdgpu_doorbell_fini(adev);
4577                 drm_dev_exit(idx);
4578         }
4579
4580         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4581                 amdgpu_pmu_fini(adev);
4582         if (adev->mman.discovery_bin)
4583                 amdgpu_discovery_fini(adev);
4584
4585         amdgpu_reset_put_reset_domain(adev->reset_domain);
4586         adev->reset_domain = NULL;
4587
4588         kfree(adev->pci_state);
4589
4590 }
4591
4592 /**
4593  * amdgpu_device_evict_resources - evict device resources
4594  * @adev: amdgpu device object
4595  *
4596  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4597  * of the vram memory type. Mainly used for evicting device resources
4598  * at suspend time.
4599  *
4600  */
4601 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4602 {
4603         int ret;
4604
4605         /* No need to evict vram on APUs for suspend to ram or s2idle */
4606         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4607                 return 0;
4608
4609         ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4610         if (ret)
4611                 DRM_WARN("evicting device resources failed\n");
4612         return ret;
4613 }
4614
4615 /*
4616  * Suspend & resume.
4617  */
4618 /**
4619  * amdgpu_device_prepare - prepare for device suspend
4620  *
4621  * @dev: drm dev pointer
4622  *
4623  * Prepare to put the hw in the suspend state (all asics).
4624  * Returns 0 for success or an error on failure.
4625  * Called at driver suspend.
4626  */
4627 int amdgpu_device_prepare(struct drm_device *dev)
4628 {
4629         struct amdgpu_device *adev = drm_to_adev(dev);
4630         int i, r;
4631
4632         amdgpu_choose_low_power_state(adev);
4633
4634         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4635                 return 0;
4636
4637         /* Evict the majority of BOs before starting suspend sequence */
4638         r = amdgpu_device_evict_resources(adev);
4639         if (r)
4640                 goto unprepare;
4641
4642         flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4643
4644         for (i = 0; i < adev->num_ip_blocks; i++) {
4645                 if (!adev->ip_blocks[i].status.valid)
4646                         continue;
4647                 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4648                         continue;
4649                 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4650                 if (r)
4651                         goto unprepare;
4652         }
4653
4654         return 0;
4655
4656 unprepare:
4657         adev->in_s0ix = adev->in_s3 = false;
4658
4659         return r;
4660 }
4661
4662 /**
4663  * amdgpu_device_suspend - initiate device suspend
4664  *
4665  * @dev: drm dev pointer
4666  * @fbcon : notify the fbdev of suspend
4667  *
4668  * Puts the hw in the suspend state (all asics).
4669  * Returns 0 for success or an error on failure.
4670  * Called at driver suspend.
4671  */
4672 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4673 {
4674         struct amdgpu_device *adev = drm_to_adev(dev);
4675         int r = 0;
4676
4677         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4678                 return 0;
4679
4680         adev->in_suspend = true;
4681
4682         if (amdgpu_sriov_vf(adev)) {
4683                 amdgpu_virt_fini_data_exchange(adev);
4684                 r = amdgpu_virt_request_full_gpu(adev, false);
4685                 if (r)
4686                         return r;
4687         }
4688
4689         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4690                 DRM_WARN("smart shift update failed\n");
4691
4692         if (fbcon)
4693                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4694
4695         cancel_delayed_work_sync(&adev->delayed_init_work);
4696
4697         amdgpu_ras_suspend(adev);
4698
4699         amdgpu_device_ip_suspend_phase1(adev);
4700
4701         if (!adev->in_s0ix)
4702                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4703
4704         r = amdgpu_device_evict_resources(adev);
4705         if (r)
4706                 return r;
4707
4708         amdgpu_ttm_set_buffer_funcs_status(adev, false);
4709
4710         amdgpu_fence_driver_hw_fini(adev);
4711
4712         amdgpu_device_ip_suspend_phase2(adev);
4713
4714         if (amdgpu_sriov_vf(adev))
4715                 amdgpu_virt_release_full_gpu(adev, false);
4716
4717         r = amdgpu_dpm_notify_rlc_state(adev, false);
4718         if (r)
4719                 return r;
4720
4721         return 0;
4722 }
4723
4724 /**
4725  * amdgpu_device_resume - initiate device resume
4726  *
4727  * @dev: drm dev pointer
4728  * @fbcon : notify the fbdev of resume
4729  *
4730  * Bring the hw back to operating state (all asics).
4731  * Returns 0 for success or an error on failure.
4732  * Called at driver resume.
4733  */
4734 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4735 {
4736         struct amdgpu_device *adev = drm_to_adev(dev);
4737         int r = 0;
4738
4739         if (amdgpu_sriov_vf(adev)) {
4740                 r = amdgpu_virt_request_full_gpu(adev, true);
4741                 if (r)
4742                         return r;
4743         }
4744
4745         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4746                 return 0;
4747
4748         if (adev->in_s0ix)
4749                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4750
4751         /* post card */
4752         if (amdgpu_device_need_post(adev)) {
4753                 r = amdgpu_device_asic_init(adev);
4754                 if (r)
4755                         dev_err(adev->dev, "amdgpu asic init failed\n");
4756         }
4757
4758         r = amdgpu_device_ip_resume(adev);
4759
4760         if (r) {
4761                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4762                 goto exit;
4763         }
4764         amdgpu_fence_driver_hw_init(adev);
4765
4766         if (!adev->in_s0ix) {
4767                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4768                 if (r)
4769                         goto exit;
4770         }
4771
4772         r = amdgpu_device_ip_late_init(adev);
4773         if (r)
4774                 goto exit;
4775
4776         queue_delayed_work(system_wq, &adev->delayed_init_work,
4777                            msecs_to_jiffies(AMDGPU_RESUME_MS));
4778 exit:
4779         if (amdgpu_sriov_vf(adev)) {
4780                 amdgpu_virt_init_data_exchange(adev);
4781                 amdgpu_virt_release_full_gpu(adev, true);
4782         }
4783
4784         if (r)
4785                 return r;
4786
4787         /* Make sure IB tests flushed */
4788         flush_delayed_work(&adev->delayed_init_work);
4789
4790         if (fbcon)
4791                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4792
4793         amdgpu_ras_resume(adev);
4794
4795         if (adev->mode_info.num_crtc) {
4796                 /*
4797                  * Most of the connector probing functions try to acquire runtime pm
4798                  * refs to ensure that the GPU is powered on when connector polling is
4799                  * performed. Since we're calling this from a runtime PM callback,
4800                  * trying to acquire rpm refs will cause us to deadlock.
4801                  *
4802                  * Since we're guaranteed to be holding the rpm lock, it's safe to
4803                  * temporarily disable the rpm helpers so this doesn't deadlock us.
4804                  */
4805 #ifdef CONFIG_PM
4806                 dev->dev->power.disable_depth++;
4807 #endif
4808                 if (!adev->dc_enabled)
4809                         drm_helper_hpd_irq_event(dev);
4810                 else
4811                         drm_kms_helper_hotplug_event(dev);
4812 #ifdef CONFIG_PM
4813                 dev->dev->power.disable_depth--;
4814 #endif
4815         }
4816         adev->in_suspend = false;
4817
4818         if (adev->enable_mes)
4819                 amdgpu_mes_self_test(adev);
4820
4821         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4822                 DRM_WARN("smart shift update failed\n");
4823
4824         return 0;
4825 }
4826
4827 /**
4828  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4829  *
4830  * @adev: amdgpu_device pointer
4831  *
4832  * The list of all the hardware IPs that make up the asic is walked and
4833  * the check_soft_reset callbacks are run.  check_soft_reset determines
4834  * if the asic is still hung or not.
4835  * Returns true if any of the IPs are still in a hung state, false if not.
4836  */
4837 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4838 {
4839         int i;
4840         bool asic_hang = false;
4841
4842         if (amdgpu_sriov_vf(adev))
4843                 return true;
4844
4845         if (amdgpu_asic_need_full_reset(adev))
4846                 return true;
4847
4848         for (i = 0; i < adev->num_ip_blocks; i++) {
4849                 if (!adev->ip_blocks[i].status.valid)
4850                         continue;
4851                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4852                         adev->ip_blocks[i].status.hang =
4853                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4854                 if (adev->ip_blocks[i].status.hang) {
4855                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4856                         asic_hang = true;
4857                 }
4858         }
4859         return asic_hang;
4860 }
4861
4862 /**
4863  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4864  *
4865  * @adev: amdgpu_device pointer
4866  *
4867  * The list of all the hardware IPs that make up the asic is walked and the
4868  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4869  * handles any IP specific hardware or software state changes that are
4870  * necessary for a soft reset to succeed.
4871  * Returns 0 on success, negative error code on failure.
4872  */
4873 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4874 {
4875         int i, r = 0;
4876
4877         for (i = 0; i < adev->num_ip_blocks; i++) {
4878                 if (!adev->ip_blocks[i].status.valid)
4879                         continue;
4880                 if (adev->ip_blocks[i].status.hang &&
4881                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4882                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4883                         if (r)
4884                                 return r;
4885                 }
4886         }
4887
4888         return 0;
4889 }
4890
4891 /**
4892  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4893  *
4894  * @adev: amdgpu_device pointer
4895  *
4896  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4897  * reset is necessary to recover.
4898  * Returns true if a full asic reset is required, false if not.
4899  */
4900 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4901 {
4902         int i;
4903
4904         if (amdgpu_asic_need_full_reset(adev))
4905                 return true;
4906
4907         for (i = 0; i < adev->num_ip_blocks; i++) {
4908                 if (!adev->ip_blocks[i].status.valid)
4909                         continue;
4910                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4911                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4912                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4913                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4914                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4915                         if (adev->ip_blocks[i].status.hang) {
4916                                 dev_info(adev->dev, "Some block need full reset!\n");
4917                                 return true;
4918                         }
4919                 }
4920         }
4921         return false;
4922 }
4923
4924 /**
4925  * amdgpu_device_ip_soft_reset - do a soft reset
4926  *
4927  * @adev: amdgpu_device pointer
4928  *
4929  * The list of all the hardware IPs that make up the asic is walked and the
4930  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4931  * IP specific hardware or software state changes that are necessary to soft
4932  * reset the IP.
4933  * Returns 0 on success, negative error code on failure.
4934  */
4935 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4936 {
4937         int i, r = 0;
4938
4939         for (i = 0; i < adev->num_ip_blocks; i++) {
4940                 if (!adev->ip_blocks[i].status.valid)
4941                         continue;
4942                 if (adev->ip_blocks[i].status.hang &&
4943                     adev->ip_blocks[i].version->funcs->soft_reset) {
4944                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4945                         if (r)
4946                                 return r;
4947                 }
4948         }
4949
4950         return 0;
4951 }
4952
4953 /**
4954  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4955  *
4956  * @adev: amdgpu_device pointer
4957  *
4958  * The list of all the hardware IPs that make up the asic is walked and the
4959  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4960  * handles any IP specific hardware or software state changes that are
4961  * necessary after the IP has been soft reset.
4962  * Returns 0 on success, negative error code on failure.
4963  */
4964 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4965 {
4966         int i, r = 0;
4967
4968         for (i = 0; i < adev->num_ip_blocks; i++) {
4969                 if (!adev->ip_blocks[i].status.valid)
4970                         continue;
4971                 if (adev->ip_blocks[i].status.hang &&
4972                     adev->ip_blocks[i].version->funcs->post_soft_reset)
4973                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4974                 if (r)
4975                         return r;
4976         }
4977
4978         return 0;
4979 }
4980
4981 /**
4982  * amdgpu_device_recover_vram - Recover some VRAM contents
4983  *
4984  * @adev: amdgpu_device pointer
4985  *
4986  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4987  * restore things like GPUVM page tables after a GPU reset where
4988  * the contents of VRAM might be lost.
4989  *
4990  * Returns:
4991  * 0 on success, negative error code on failure.
4992  */
4993 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4994 {
4995         struct dma_fence *fence = NULL, *next = NULL;
4996         struct amdgpu_bo *shadow;
4997         struct amdgpu_bo_vm *vmbo;
4998         long r = 1, tmo;
4999
5000         if (amdgpu_sriov_runtime(adev))
5001                 tmo = msecs_to_jiffies(8000);
5002         else
5003                 tmo = msecs_to_jiffies(100);
5004
5005         dev_info(adev->dev, "recover vram bo from shadow start\n");
5006         mutex_lock(&adev->shadow_list_lock);
5007         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
5008                 /* If vm is compute context or adev is APU, shadow will be NULL */
5009                 if (!vmbo->shadow)
5010                         continue;
5011                 shadow = vmbo->shadow;
5012
5013                 /* No need to recover an evicted BO */
5014                 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
5015                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
5016                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
5017                         continue;
5018
5019                 r = amdgpu_bo_restore_shadow(shadow, &next);
5020                 if (r)
5021                         break;
5022
5023                 if (fence) {
5024                         tmo = dma_fence_wait_timeout(fence, false, tmo);
5025                         dma_fence_put(fence);
5026                         fence = next;
5027                         if (tmo == 0) {
5028                                 r = -ETIMEDOUT;
5029                                 break;
5030                         } else if (tmo < 0) {
5031                                 r = tmo;
5032                                 break;
5033                         }
5034                 } else {
5035                         fence = next;
5036                 }
5037         }
5038         mutex_unlock(&adev->shadow_list_lock);
5039
5040         if (fence)
5041                 tmo = dma_fence_wait_timeout(fence, false, tmo);
5042         dma_fence_put(fence);
5043
5044         if (r < 0 || tmo <= 0) {
5045                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
5046                 return -EIO;
5047         }
5048
5049         dev_info(adev->dev, "recover vram bo from shadow done\n");
5050         return 0;
5051 }
5052
5053
5054 /**
5055  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5056  *
5057  * @adev: amdgpu_device pointer
5058  * @from_hypervisor: request from hypervisor
5059  *
5060  * do VF FLR and reinitialize Asic
5061  * return 0 means succeeded otherwise failed
5062  */
5063 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5064                                      bool from_hypervisor)
5065 {
5066         int r;
5067         struct amdgpu_hive_info *hive = NULL;
5068         int retry_limit = 0;
5069
5070 retry:
5071         amdgpu_amdkfd_pre_reset(adev);
5072
5073         amdgpu_device_stop_pending_resets(adev);
5074
5075         if (from_hypervisor)
5076                 r = amdgpu_virt_request_full_gpu(adev, true);
5077         else
5078                 r = amdgpu_virt_reset_gpu(adev);
5079         if (r)
5080                 return r;
5081         amdgpu_ras_set_fed(adev, false);
5082         amdgpu_irq_gpu_reset_resume_helper(adev);
5083
5084         /* some sw clean up VF needs to do before recover */
5085         amdgpu_virt_post_reset(adev);
5086
5087         /* Resume IP prior to SMC */
5088         r = amdgpu_device_ip_reinit_early_sriov(adev);
5089         if (r)
5090                 goto error;
5091
5092         amdgpu_virt_init_data_exchange(adev);
5093
5094         r = amdgpu_device_fw_loading(adev);
5095         if (r)
5096                 return r;
5097
5098         /* now we are okay to resume SMC/CP/SDMA */
5099         r = amdgpu_device_ip_reinit_late_sriov(adev);
5100         if (r)
5101                 goto error;
5102
5103         hive = amdgpu_get_xgmi_hive(adev);
5104         /* Update PSP FW topology after reset */
5105         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5106                 r = amdgpu_xgmi_update_topology(hive, adev);
5107
5108         if (hive)
5109                 amdgpu_put_xgmi_hive(hive);
5110
5111         if (!r) {
5112                 r = amdgpu_ib_ring_tests(adev);
5113
5114                 amdgpu_amdkfd_post_reset(adev);
5115         }
5116
5117 error:
5118         if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5119                 amdgpu_inc_vram_lost(adev);
5120                 r = amdgpu_device_recover_vram(adev);
5121         }
5122         amdgpu_virt_release_full_gpu(adev, true);
5123
5124         if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5125                 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5126                         retry_limit++;
5127                         goto retry;
5128                 } else
5129                         DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5130         }
5131
5132         return r;
5133 }
5134
5135 /**
5136  * amdgpu_device_has_job_running - check if there is any job in mirror list
5137  *
5138  * @adev: amdgpu_device pointer
5139  *
5140  * check if there is any job in mirror list
5141  */
5142 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5143 {
5144         int i;
5145         struct drm_sched_job *job;
5146
5147         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5148                 struct amdgpu_ring *ring = adev->rings[i];
5149
5150                 if (!amdgpu_ring_sched_ready(ring))
5151                         continue;
5152
5153                 spin_lock(&ring->sched.job_list_lock);
5154                 job = list_first_entry_or_null(&ring->sched.pending_list,
5155                                                struct drm_sched_job, list);
5156                 spin_unlock(&ring->sched.job_list_lock);
5157                 if (job)
5158                         return true;
5159         }
5160         return false;
5161 }
5162
5163 /**
5164  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5165  *
5166  * @adev: amdgpu_device pointer
5167  *
5168  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5169  * a hung GPU.
5170  */
5171 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5172 {
5173
5174         if (amdgpu_gpu_recovery == 0)
5175                 goto disabled;
5176
5177         /* Skip soft reset check in fatal error mode */
5178         if (!amdgpu_ras_is_poison_mode_supported(adev))
5179                 return true;
5180
5181         if (amdgpu_sriov_vf(adev))
5182                 return true;
5183
5184         if (amdgpu_gpu_recovery == -1) {
5185                 switch (adev->asic_type) {
5186 #ifdef CONFIG_DRM_AMDGPU_SI
5187                 case CHIP_VERDE:
5188                 case CHIP_TAHITI:
5189                 case CHIP_PITCAIRN:
5190                 case CHIP_OLAND:
5191                 case CHIP_HAINAN:
5192 #endif
5193 #ifdef CONFIG_DRM_AMDGPU_CIK
5194                 case CHIP_KAVERI:
5195                 case CHIP_KABINI:
5196                 case CHIP_MULLINS:
5197 #endif
5198                 case CHIP_CARRIZO:
5199                 case CHIP_STONEY:
5200                 case CHIP_CYAN_SKILLFISH:
5201                         goto disabled;
5202                 default:
5203                         break;
5204                 }
5205         }
5206
5207         return true;
5208
5209 disabled:
5210                 dev_info(adev->dev, "GPU recovery disabled.\n");
5211                 return false;
5212 }
5213
5214 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5215 {
5216         u32 i;
5217         int ret = 0;
5218
5219         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5220
5221         dev_info(adev->dev, "GPU mode1 reset\n");
5222
5223         /* disable BM */
5224         pci_clear_master(adev->pdev);
5225
5226         amdgpu_device_cache_pci_state(adev->pdev);
5227
5228         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5229                 dev_info(adev->dev, "GPU smu mode1 reset\n");
5230                 ret = amdgpu_dpm_mode1_reset(adev);
5231         } else {
5232                 dev_info(adev->dev, "GPU psp mode1 reset\n");
5233                 ret = psp_gpu_reset(adev);
5234         }
5235
5236         if (ret)
5237                 goto mode1_reset_failed;
5238
5239         amdgpu_device_load_pci_state(adev->pdev);
5240         ret = amdgpu_psp_wait_for_bootloader(adev);
5241         if (ret)
5242                 goto mode1_reset_failed;
5243
5244         /* wait for asic to come out of reset */
5245         for (i = 0; i < adev->usec_timeout; i++) {
5246                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5247
5248                 if (memsize != 0xffffffff)
5249                         break;
5250                 udelay(1);
5251         }
5252
5253         if (i >= adev->usec_timeout) {
5254                 ret = -ETIMEDOUT;
5255                 goto mode1_reset_failed;
5256         }
5257
5258         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5259
5260         return 0;
5261
5262 mode1_reset_failed:
5263         dev_err(adev->dev, "GPU mode1 reset failed\n");
5264         return ret;
5265 }
5266
5267 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5268                                  struct amdgpu_reset_context *reset_context)
5269 {
5270         int i, r = 0;
5271         struct amdgpu_job *job = NULL;
5272         bool need_full_reset =
5273                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5274
5275         if (reset_context->reset_req_dev == adev)
5276                 job = reset_context->job;
5277
5278         if (amdgpu_sriov_vf(adev)) {
5279                 /* stop the data exchange thread */
5280                 amdgpu_virt_fini_data_exchange(adev);
5281         }
5282
5283         amdgpu_fence_driver_isr_toggle(adev, true);
5284
5285         /* block all schedulers and reset given job's ring */
5286         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5287                 struct amdgpu_ring *ring = adev->rings[i];
5288
5289                 if (!amdgpu_ring_sched_ready(ring))
5290                         continue;
5291
5292                 /* Clear job fence from fence drv to avoid force_completion
5293                  * leave NULL and vm flush fence in fence drv
5294                  */
5295                 amdgpu_fence_driver_clear_job_fences(ring);
5296
5297                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5298                 amdgpu_fence_driver_force_completion(ring);
5299         }
5300
5301         amdgpu_fence_driver_isr_toggle(adev, false);
5302
5303         if (job && job->vm)
5304                 drm_sched_increase_karma(&job->base);
5305
5306         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5307         /* If reset handler not implemented, continue; otherwise return */
5308         if (r == -EOPNOTSUPP)
5309                 r = 0;
5310         else
5311                 return r;
5312
5313         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5314         if (!amdgpu_sriov_vf(adev)) {
5315
5316                 if (!need_full_reset)
5317                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5318
5319                 if (!need_full_reset && amdgpu_gpu_recovery &&
5320                     amdgpu_device_ip_check_soft_reset(adev)) {
5321                         amdgpu_device_ip_pre_soft_reset(adev);
5322                         r = amdgpu_device_ip_soft_reset(adev);
5323                         amdgpu_device_ip_post_soft_reset(adev);
5324                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5325                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5326                                 need_full_reset = true;
5327                         }
5328                 }
5329
5330                 if (need_full_reset)
5331                         r = amdgpu_device_ip_suspend(adev);
5332                 if (need_full_reset)
5333                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5334                 else
5335                         clear_bit(AMDGPU_NEED_FULL_RESET,
5336                                   &reset_context->flags);
5337         }
5338
5339         return r;
5340 }
5341
5342 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5343 {
5344         int i;
5345
5346         lockdep_assert_held(&adev->reset_domain->sem);
5347
5348         for (i = 0; i < adev->reset_info.num_regs; i++) {
5349                 adev->reset_info.reset_dump_reg_value[i] =
5350                         RREG32(adev->reset_info.reset_dump_reg_list[i]);
5351
5352                 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5353                                              adev->reset_info.reset_dump_reg_value[i]);
5354         }
5355
5356         return 0;
5357 }
5358
5359 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5360                          struct amdgpu_reset_context *reset_context)
5361 {
5362         struct amdgpu_device *tmp_adev = NULL;
5363         bool need_full_reset, skip_hw_reset, vram_lost = false;
5364         int r = 0;
5365         uint32_t i;
5366
5367         /* Try reset handler method first */
5368         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5369                                     reset_list);
5370
5371         if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5372                 amdgpu_reset_reg_dumps(tmp_adev);
5373
5374                 /* Trigger ip dump before we reset the asic */
5375                 for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5376                         if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5377                                 tmp_adev->ip_blocks[i].version->funcs
5378                                 ->dump_ip_state((void *)tmp_adev);
5379         }
5380
5381         reset_context->reset_device_list = device_list_handle;
5382         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5383         /* If reset handler not implemented, continue; otherwise return */
5384         if (r == -EOPNOTSUPP)
5385                 r = 0;
5386         else
5387                 return r;
5388
5389         /* Reset handler not implemented, use the default method */
5390         need_full_reset =
5391                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5392         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5393
5394         /*
5395          * ASIC reset has to be done on all XGMI hive nodes ASAP
5396          * to allow proper links negotiation in FW (within 1 sec)
5397          */
5398         if (!skip_hw_reset && need_full_reset) {
5399                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5400                         /* For XGMI run all resets in parallel to speed up the process */
5401                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5402                                 tmp_adev->gmc.xgmi.pending_reset = false;
5403                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5404                                         r = -EALREADY;
5405                         } else
5406                                 r = amdgpu_asic_reset(tmp_adev);
5407
5408                         if (r) {
5409                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5410                                          r, adev_to_drm(tmp_adev)->unique);
5411                                 goto out;
5412                         }
5413                 }
5414
5415                 /* For XGMI wait for all resets to complete before proceed */
5416                 if (!r) {
5417                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5418                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5419                                         flush_work(&tmp_adev->xgmi_reset_work);
5420                                         r = tmp_adev->asic_reset_res;
5421                                         if (r)
5422                                                 break;
5423                                 }
5424                         }
5425                 }
5426         }
5427
5428         if (!r && amdgpu_ras_intr_triggered()) {
5429                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5430                         amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5431                 }
5432
5433                 amdgpu_ras_intr_cleared();
5434         }
5435
5436         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5437                 if (need_full_reset) {
5438                         /* post card */
5439                         amdgpu_ras_set_fed(tmp_adev, false);
5440                         r = amdgpu_device_asic_init(tmp_adev);
5441                         if (r) {
5442                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
5443                         } else {
5444                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5445
5446                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5447                                 if (r)
5448                                         goto out;
5449
5450                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5451
5452                                 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5453                                         amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5454
5455                                 if (vram_lost) {
5456                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
5457                                         amdgpu_inc_vram_lost(tmp_adev);
5458                                 }
5459
5460                                 r = amdgpu_device_fw_loading(tmp_adev);
5461                                 if (r)
5462                                         return r;
5463
5464                                 r = amdgpu_xcp_restore_partition_mode(
5465                                         tmp_adev->xcp_mgr);
5466                                 if (r)
5467                                         goto out;
5468
5469                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5470                                 if (r)
5471                                         goto out;
5472
5473                                 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5474                                         amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5475
5476                                 if (vram_lost)
5477                                         amdgpu_device_fill_reset_magic(tmp_adev);
5478
5479                                 /*
5480                                  * Add this ASIC as tracked as reset was already
5481                                  * complete successfully.
5482                                  */
5483                                 amdgpu_register_gpu_instance(tmp_adev);
5484
5485                                 if (!reset_context->hive &&
5486                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5487                                         amdgpu_xgmi_add_device(tmp_adev);
5488
5489                                 r = amdgpu_device_ip_late_init(tmp_adev);
5490                                 if (r)
5491                                         goto out;
5492
5493                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5494
5495                                 /*
5496                                  * The GPU enters bad state once faulty pages
5497                                  * by ECC has reached the threshold, and ras
5498                                  * recovery is scheduled next. So add one check
5499                                  * here to break recovery if it indeed exceeds
5500                                  * bad page threshold, and remind user to
5501                                  * retire this GPU or setting one bigger
5502                                  * bad_page_threshold value to fix this once
5503                                  * probing driver again.
5504                                  */
5505                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5506                                         /* must succeed. */
5507                                         amdgpu_ras_resume(tmp_adev);
5508                                 } else {
5509                                         r = -EINVAL;
5510                                         goto out;
5511                                 }
5512
5513                                 /* Update PSP FW topology after reset */
5514                                 if (reset_context->hive &&
5515                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5516                                         r = amdgpu_xgmi_update_topology(
5517                                                 reset_context->hive, tmp_adev);
5518                         }
5519                 }
5520
5521 out:
5522                 if (!r) {
5523                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5524                         r = amdgpu_ib_ring_tests(tmp_adev);
5525                         if (r) {
5526                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5527                                 need_full_reset = true;
5528                                 r = -EAGAIN;
5529                                 goto end;
5530                         }
5531                 }
5532
5533                 if (!r)
5534                         r = amdgpu_device_recover_vram(tmp_adev);
5535                 else
5536                         tmp_adev->asic_reset_res = r;
5537         }
5538
5539 end:
5540         if (need_full_reset)
5541                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5542         else
5543                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5544         return r;
5545 }
5546
5547 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5548 {
5549
5550         switch (amdgpu_asic_reset_method(adev)) {
5551         case AMD_RESET_METHOD_MODE1:
5552                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5553                 break;
5554         case AMD_RESET_METHOD_MODE2:
5555                 adev->mp1_state = PP_MP1_STATE_RESET;
5556                 break;
5557         default:
5558                 adev->mp1_state = PP_MP1_STATE_NONE;
5559                 break;
5560         }
5561 }
5562
5563 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5564 {
5565         amdgpu_vf_error_trans_all(adev);
5566         adev->mp1_state = PP_MP1_STATE_NONE;
5567 }
5568
5569 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5570 {
5571         struct pci_dev *p = NULL;
5572
5573         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5574                         adev->pdev->bus->number, 1);
5575         if (p) {
5576                 pm_runtime_enable(&(p->dev));
5577                 pm_runtime_resume(&(p->dev));
5578         }
5579
5580         pci_dev_put(p);
5581 }
5582
5583 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5584 {
5585         enum amd_reset_method reset_method;
5586         struct pci_dev *p = NULL;
5587         u64 expires;
5588
5589         /*
5590          * For now, only BACO and mode1 reset are confirmed
5591          * to suffer the audio issue without proper suspended.
5592          */
5593         reset_method = amdgpu_asic_reset_method(adev);
5594         if ((reset_method != AMD_RESET_METHOD_BACO) &&
5595              (reset_method != AMD_RESET_METHOD_MODE1))
5596                 return -EINVAL;
5597
5598         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5599                         adev->pdev->bus->number, 1);
5600         if (!p)
5601                 return -ENODEV;
5602
5603         expires = pm_runtime_autosuspend_expiration(&(p->dev));
5604         if (!expires)
5605                 /*
5606                  * If we cannot get the audio device autosuspend delay,
5607                  * a fixed 4S interval will be used. Considering 3S is
5608                  * the audio controller default autosuspend delay setting.
5609                  * 4S used here is guaranteed to cover that.
5610                  */
5611                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5612
5613         while (!pm_runtime_status_suspended(&(p->dev))) {
5614                 if (!pm_runtime_suspend(&(p->dev)))
5615                         break;
5616
5617                 if (expires < ktime_get_mono_fast_ns()) {
5618                         dev_warn(adev->dev, "failed to suspend display audio\n");
5619                         pci_dev_put(p);
5620                         /* TODO: abort the succeeding gpu reset? */
5621                         return -ETIMEDOUT;
5622                 }
5623         }
5624
5625         pm_runtime_disable(&(p->dev));
5626
5627         pci_dev_put(p);
5628         return 0;
5629 }
5630
5631 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5632 {
5633         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5634
5635 #if defined(CONFIG_DEBUG_FS)
5636         if (!amdgpu_sriov_vf(adev))
5637                 cancel_work(&adev->reset_work);
5638 #endif
5639
5640         if (adev->kfd.dev)
5641                 cancel_work(&adev->kfd.reset_work);
5642
5643         if (amdgpu_sriov_vf(adev))
5644                 cancel_work(&adev->virt.flr_work);
5645
5646         if (con && adev->ras_enabled)
5647                 cancel_work(&con->recovery_work);
5648
5649 }
5650
5651 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5652 {
5653         struct amdgpu_device *tmp_adev;
5654         int ret = 0;
5655         u32 status;
5656
5657         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5658                 pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5659                 if (PCI_POSSIBLE_ERROR(status)) {
5660                         dev_err(tmp_adev->dev, "device lost from bus!");
5661                         ret = -ENODEV;
5662                 }
5663         }
5664
5665         return ret;
5666 }
5667
5668 /**
5669  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5670  *
5671  * @adev: amdgpu_device pointer
5672  * @job: which job trigger hang
5673  * @reset_context: amdgpu reset context pointer
5674  *
5675  * Attempt to reset the GPU if it has hung (all asics).
5676  * Attempt to do soft-reset or full-reset and reinitialize Asic
5677  * Returns 0 for success or an error on failure.
5678  */
5679
5680 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5681                               struct amdgpu_job *job,
5682                               struct amdgpu_reset_context *reset_context)
5683 {
5684         struct list_head device_list, *device_list_handle =  NULL;
5685         bool job_signaled = false;
5686         struct amdgpu_hive_info *hive = NULL;
5687         struct amdgpu_device *tmp_adev = NULL;
5688         int i, r = 0;
5689         bool need_emergency_restart = false;
5690         bool audio_suspended = false;
5691
5692         /*
5693          * Special case: RAS triggered and full reset isn't supported
5694          */
5695         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5696
5697         /*
5698          * Flush RAM to disk so that after reboot
5699          * the user can read log and see why the system rebooted.
5700          */
5701         if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5702                 amdgpu_ras_get_context(adev)->reboot) {
5703                 DRM_WARN("Emergency reboot.");
5704
5705                 ksys_sync_helper();
5706                 emergency_restart();
5707         }
5708
5709         dev_info(adev->dev, "GPU %s begin!\n",
5710                 need_emergency_restart ? "jobs stop":"reset");
5711
5712         if (!amdgpu_sriov_vf(adev))
5713                 hive = amdgpu_get_xgmi_hive(adev);
5714         if (hive)
5715                 mutex_lock(&hive->hive_lock);
5716
5717         reset_context->job = job;
5718         reset_context->hive = hive;
5719         /*
5720          * Build list of devices to reset.
5721          * In case we are in XGMI hive mode, resort the device list
5722          * to put adev in the 1st position.
5723          */
5724         INIT_LIST_HEAD(&device_list);
5725         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5726                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5727                         list_add_tail(&tmp_adev->reset_list, &device_list);
5728                         if (adev->shutdown)
5729                                 tmp_adev->shutdown = true;
5730                 }
5731                 if (!list_is_first(&adev->reset_list, &device_list))
5732                         list_rotate_to_front(&adev->reset_list, &device_list);
5733                 device_list_handle = &device_list;
5734         } else {
5735                 list_add_tail(&adev->reset_list, &device_list);
5736                 device_list_handle = &device_list;
5737         }
5738
5739         if (!amdgpu_sriov_vf(adev)) {
5740                 r = amdgpu_device_health_check(device_list_handle);
5741                 if (r)
5742                         goto end_reset;
5743         }
5744
5745         /* We need to lock reset domain only once both for XGMI and single device */
5746         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5747                                     reset_list);
5748         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5749
5750         /* block all schedulers and reset given job's ring */
5751         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5752
5753                 amdgpu_device_set_mp1_state(tmp_adev);
5754
5755                 /*
5756                  * Try to put the audio codec into suspend state
5757                  * before gpu reset started.
5758                  *
5759                  * Due to the power domain of the graphics device
5760                  * is shared with AZ power domain. Without this,
5761                  * we may change the audio hardware from behind
5762                  * the audio driver's back. That will trigger
5763                  * some audio codec errors.
5764                  */
5765                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5766                         audio_suspended = true;
5767
5768                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5769
5770                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5771
5772                 if (!amdgpu_sriov_vf(tmp_adev))
5773                         amdgpu_amdkfd_pre_reset(tmp_adev);
5774
5775                 /*
5776                  * Mark these ASICs to be reseted as untracked first
5777                  * And add them back after reset completed
5778                  */
5779                 amdgpu_unregister_gpu_instance(tmp_adev);
5780
5781                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5782
5783                 /* disable ras on ALL IPs */
5784                 if (!need_emergency_restart &&
5785                       amdgpu_device_ip_need_full_reset(tmp_adev))
5786                         amdgpu_ras_suspend(tmp_adev);
5787
5788                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5789                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5790
5791                         if (!amdgpu_ring_sched_ready(ring))
5792                                 continue;
5793
5794                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5795
5796                         if (need_emergency_restart)
5797                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5798                 }
5799                 atomic_inc(&tmp_adev->gpu_reset_counter);
5800         }
5801
5802         if (need_emergency_restart)
5803                 goto skip_sched_resume;
5804
5805         /*
5806          * Must check guilty signal here since after this point all old
5807          * HW fences are force signaled.
5808          *
5809          * job->base holds a reference to parent fence
5810          */
5811         if (job && dma_fence_is_signaled(&job->hw_fence)) {
5812                 job_signaled = true;
5813                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5814                 goto skip_hw_reset;
5815         }
5816
5817 retry:  /* Rest of adevs pre asic reset from XGMI hive. */
5818         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5819                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5820                 /*TODO Should we stop ?*/
5821                 if (r) {
5822                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5823                                   r, adev_to_drm(tmp_adev)->unique);
5824                         tmp_adev->asic_reset_res = r;
5825                 }
5826
5827                 if (!amdgpu_sriov_vf(tmp_adev))
5828                         /*
5829                         * Drop all pending non scheduler resets. Scheduler resets
5830                         * were already dropped during drm_sched_stop
5831                         */
5832                         amdgpu_device_stop_pending_resets(tmp_adev);
5833         }
5834
5835         /* Actual ASIC resets if needed.*/
5836         /* Host driver will handle XGMI hive reset for SRIOV */
5837         if (amdgpu_sriov_vf(adev)) {
5838                 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5839                 if (r)
5840                         adev->asic_reset_res = r;
5841
5842                 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5843                 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5844                             IP_VERSION(9, 4, 2) ||
5845                     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5846                     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5847                         amdgpu_ras_resume(adev);
5848         } else {
5849                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5850                 if (r && r == -EAGAIN)
5851                         goto retry;
5852         }
5853
5854 skip_hw_reset:
5855
5856         /* Post ASIC reset for all devs .*/
5857         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5858
5859                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5860                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5861
5862                         if (!amdgpu_ring_sched_ready(ring))
5863                                 continue;
5864
5865                         drm_sched_start(&ring->sched, true);
5866                 }
5867
5868                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5869                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5870
5871                 if (tmp_adev->asic_reset_res)
5872                         r = tmp_adev->asic_reset_res;
5873
5874                 tmp_adev->asic_reset_res = 0;
5875
5876                 if (r) {
5877                         /* bad news, how to tell it to userspace ? */
5878                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5879                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5880                 } else {
5881                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5882                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5883                                 DRM_WARN("smart shift update failed\n");
5884                 }
5885         }
5886
5887 skip_sched_resume:
5888         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5889                 /* unlock kfd: SRIOV would do it separately */
5890                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5891                         amdgpu_amdkfd_post_reset(tmp_adev);
5892
5893                 /* kfd_post_reset will do nothing if kfd device is not initialized,
5894                  * need to bring up kfd here if it's not be initialized before
5895                  */
5896                 if (!adev->kfd.init_complete)
5897                         amdgpu_amdkfd_device_init(adev);
5898
5899                 if (audio_suspended)
5900                         amdgpu_device_resume_display_audio(tmp_adev);
5901
5902                 amdgpu_device_unset_mp1_state(tmp_adev);
5903
5904                 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5905         }
5906
5907         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5908                                             reset_list);
5909         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5910
5911 end_reset:
5912         if (hive) {
5913                 mutex_unlock(&hive->hive_lock);
5914                 amdgpu_put_xgmi_hive(hive);
5915         }
5916
5917         if (r)
5918                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5919
5920         atomic_set(&adev->reset_domain->reset_res, r);
5921         return r;
5922 }
5923
5924 /**
5925  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5926  *
5927  * @adev: amdgpu_device pointer
5928  * @speed: pointer to the speed of the link
5929  * @width: pointer to the width of the link
5930  *
5931  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5932  * first physical partner to an AMD dGPU.
5933  * This will exclude any virtual switches and links.
5934  */
5935 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5936                                             enum pci_bus_speed *speed,
5937                                             enum pcie_link_width *width)
5938 {
5939         struct pci_dev *parent = adev->pdev;
5940
5941         if (!speed || !width)
5942                 return;
5943
5944         *speed = PCI_SPEED_UNKNOWN;
5945         *width = PCIE_LNK_WIDTH_UNKNOWN;
5946
5947         while ((parent = pci_upstream_bridge(parent))) {
5948                 /* skip upstream/downstream switches internal to dGPU*/
5949                 if (parent->vendor == PCI_VENDOR_ID_ATI)
5950                         continue;
5951                 *speed = pcie_get_speed_cap(parent);
5952                 *width = pcie_get_width_cap(parent);
5953                 break;
5954         }
5955 }
5956
5957 /**
5958  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5959  *
5960  * @adev: amdgpu_device pointer
5961  *
5962  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5963  * and lanes) of the slot the device is in. Handles APUs and
5964  * virtualized environments where PCIE config space may not be available.
5965  */
5966 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5967 {
5968         struct pci_dev *pdev;
5969         enum pci_bus_speed speed_cap, platform_speed_cap;
5970         enum pcie_link_width platform_link_width;
5971
5972         if (amdgpu_pcie_gen_cap)
5973                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5974
5975         if (amdgpu_pcie_lane_cap)
5976                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5977
5978         /* covers APUs as well */
5979         if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5980                 if (adev->pm.pcie_gen_mask == 0)
5981                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5982                 if (adev->pm.pcie_mlw_mask == 0)
5983                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5984                 return;
5985         }
5986
5987         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5988                 return;
5989
5990         amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5991                                         &platform_link_width);
5992
5993         if (adev->pm.pcie_gen_mask == 0) {
5994                 /* asic caps */
5995                 pdev = adev->pdev;
5996                 speed_cap = pcie_get_speed_cap(pdev);
5997                 if (speed_cap == PCI_SPEED_UNKNOWN) {
5998                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5999                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6000                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6001                 } else {
6002                         if (speed_cap == PCIE_SPEED_32_0GT)
6003                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6004                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6005                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6006                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6007                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6008                         else if (speed_cap == PCIE_SPEED_16_0GT)
6009                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6010                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6011                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6012                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6013                         else if (speed_cap == PCIE_SPEED_8_0GT)
6014                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6015                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6016                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6017                         else if (speed_cap == PCIE_SPEED_5_0GT)
6018                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6019                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6020                         else
6021                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6022                 }
6023                 /* platform caps */
6024                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6025                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6026                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6027                 } else {
6028                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
6029                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6030                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6031                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6032                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6033                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6034                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6035                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6036                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6037                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6038                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6039                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6040                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6041                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6042                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6043                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6044                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6045                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6046                         else
6047                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6048
6049                 }
6050         }
6051         if (adev->pm.pcie_mlw_mask == 0) {
6052                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6053                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6054                 } else {
6055                         switch (platform_link_width) {
6056                         case PCIE_LNK_X32:
6057                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6058                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6059                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6060                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6061                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6062                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6063                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6064                                 break;
6065                         case PCIE_LNK_X16:
6066                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6067                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6068                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6069                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6070                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6071                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6072                                 break;
6073                         case PCIE_LNK_X12:
6074                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6075                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6076                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6077                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6078                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6079                                 break;
6080                         case PCIE_LNK_X8:
6081                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6082                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6083                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6084                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6085                                 break;
6086                         case PCIE_LNK_X4:
6087                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6088                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6089                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6090                                 break;
6091                         case PCIE_LNK_X2:
6092                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6093                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6094                                 break;
6095                         case PCIE_LNK_X1:
6096                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6097                                 break;
6098                         default:
6099                                 break;
6100                         }
6101                 }
6102         }
6103 }
6104
6105 /**
6106  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6107  *
6108  * @adev: amdgpu_device pointer
6109  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6110  *
6111  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6112  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6113  * @peer_adev.
6114  */
6115 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6116                                       struct amdgpu_device *peer_adev)
6117 {
6118 #ifdef CONFIG_HSA_AMD_P2P
6119         uint64_t address_mask = peer_adev->dev->dma_mask ?
6120                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6121         resource_size_t aper_limit =
6122                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
6123         bool p2p_access =
6124                 !adev->gmc.xgmi.connected_to_cpu &&
6125                 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6126
6127         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6128                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6129                 !(adev->gmc.aper_base & address_mask ||
6130                   aper_limit & address_mask));
6131 #else
6132         return false;
6133 #endif
6134 }
6135
6136 int amdgpu_device_baco_enter(struct drm_device *dev)
6137 {
6138         struct amdgpu_device *adev = drm_to_adev(dev);
6139         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6140
6141         if (!amdgpu_device_supports_baco(dev))
6142                 return -ENOTSUPP;
6143
6144         if (ras && adev->ras_enabled &&
6145             adev->nbio.funcs->enable_doorbell_interrupt)
6146                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6147
6148         return amdgpu_dpm_baco_enter(adev);
6149 }
6150
6151 int amdgpu_device_baco_exit(struct drm_device *dev)
6152 {
6153         struct amdgpu_device *adev = drm_to_adev(dev);
6154         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6155         int ret = 0;
6156
6157         if (!amdgpu_device_supports_baco(dev))
6158                 return -ENOTSUPP;
6159
6160         ret = amdgpu_dpm_baco_exit(adev);
6161         if (ret)
6162                 return ret;
6163
6164         if (ras && adev->ras_enabled &&
6165             adev->nbio.funcs->enable_doorbell_interrupt)
6166                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6167
6168         if (amdgpu_passthrough(adev) &&
6169             adev->nbio.funcs->clear_doorbell_interrupt)
6170                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6171
6172         return 0;
6173 }
6174
6175 /**
6176  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6177  * @pdev: PCI device struct
6178  * @state: PCI channel state
6179  *
6180  * Description: Called when a PCI error is detected.
6181  *
6182  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6183  */
6184 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6185 {
6186         struct drm_device *dev = pci_get_drvdata(pdev);
6187         struct amdgpu_device *adev = drm_to_adev(dev);
6188         int i;
6189
6190         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6191
6192         if (adev->gmc.xgmi.num_physical_nodes > 1) {
6193                 DRM_WARN("No support for XGMI hive yet...");
6194                 return PCI_ERS_RESULT_DISCONNECT;
6195         }
6196
6197         adev->pci_channel_state = state;
6198
6199         switch (state) {
6200         case pci_channel_io_normal:
6201                 return PCI_ERS_RESULT_CAN_RECOVER;
6202         /* Fatal error, prepare for slot reset */
6203         case pci_channel_io_frozen:
6204                 /*
6205                  * Locking adev->reset_domain->sem will prevent any external access
6206                  * to GPU during PCI error recovery
6207                  */
6208                 amdgpu_device_lock_reset_domain(adev->reset_domain);
6209                 amdgpu_device_set_mp1_state(adev);
6210
6211                 /*
6212                  * Block any work scheduling as we do for regular GPU reset
6213                  * for the duration of the recovery
6214                  */
6215                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6216                         struct amdgpu_ring *ring = adev->rings[i];
6217
6218                         if (!amdgpu_ring_sched_ready(ring))
6219                                 continue;
6220
6221                         drm_sched_stop(&ring->sched, NULL);
6222                 }
6223                 atomic_inc(&adev->gpu_reset_counter);
6224                 return PCI_ERS_RESULT_NEED_RESET;
6225         case pci_channel_io_perm_failure:
6226                 /* Permanent error, prepare for device removal */
6227                 return PCI_ERS_RESULT_DISCONNECT;
6228         }
6229
6230         return PCI_ERS_RESULT_NEED_RESET;
6231 }
6232
6233 /**
6234  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6235  * @pdev: pointer to PCI device
6236  */
6237 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6238 {
6239
6240         DRM_INFO("PCI error: mmio enabled callback!!\n");
6241
6242         /* TODO - dump whatever for debugging purposes */
6243
6244         /* This called only if amdgpu_pci_error_detected returns
6245          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6246          * works, no need to reset slot.
6247          */
6248
6249         return PCI_ERS_RESULT_RECOVERED;
6250 }
6251
6252 /**
6253  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6254  * @pdev: PCI device struct
6255  *
6256  * Description: This routine is called by the pci error recovery
6257  * code after the PCI slot has been reset, just before we
6258  * should resume normal operations.
6259  */
6260 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6261 {
6262         struct drm_device *dev = pci_get_drvdata(pdev);
6263         struct amdgpu_device *adev = drm_to_adev(dev);
6264         int r, i;
6265         struct amdgpu_reset_context reset_context;
6266         u32 memsize;
6267         struct list_head device_list;
6268         struct amdgpu_hive_info *hive;
6269         int hive_ras_recovery = 0;
6270         struct amdgpu_ras *ras;
6271
6272         /* PCI error slot reset should be skipped During RAS recovery */
6273         hive = amdgpu_get_xgmi_hive(adev);
6274         if (hive) {
6275                 hive_ras_recovery = atomic_read(&hive->ras_recovery);
6276                 amdgpu_put_xgmi_hive(hive);
6277         }
6278         ras = amdgpu_ras_get_context(adev);
6279         if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
6280                  ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6281                 return PCI_ERS_RESULT_RECOVERED;
6282
6283         DRM_INFO("PCI error: slot reset callback!!\n");
6284
6285         memset(&reset_context, 0, sizeof(reset_context));
6286
6287         INIT_LIST_HEAD(&device_list);
6288         list_add_tail(&adev->reset_list, &device_list);
6289
6290         /* wait for asic to come out of reset */
6291         msleep(500);
6292
6293         /* Restore PCI confspace */
6294         amdgpu_device_load_pci_state(pdev);
6295
6296         /* confirm  ASIC came out of reset */
6297         for (i = 0; i < adev->usec_timeout; i++) {
6298                 memsize = amdgpu_asic_get_config_memsize(adev);
6299
6300                 if (memsize != 0xffffffff)
6301                         break;
6302                 udelay(1);
6303         }
6304         if (memsize == 0xffffffff) {
6305                 r = -ETIME;
6306                 goto out;
6307         }
6308
6309         reset_context.method = AMD_RESET_METHOD_NONE;
6310         reset_context.reset_req_dev = adev;
6311         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6312         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6313
6314         adev->no_hw_access = true;
6315         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6316         adev->no_hw_access = false;
6317         if (r)
6318                 goto out;
6319
6320         r = amdgpu_do_asic_reset(&device_list, &reset_context);
6321
6322 out:
6323         if (!r) {
6324                 if (amdgpu_device_cache_pci_state(adev->pdev))
6325                         pci_restore_state(adev->pdev);
6326
6327                 DRM_INFO("PCIe error recovery succeeded\n");
6328         } else {
6329                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
6330                 amdgpu_device_unset_mp1_state(adev);
6331                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6332         }
6333
6334         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6335 }
6336
6337 /**
6338  * amdgpu_pci_resume() - resume normal ops after PCI reset
6339  * @pdev: pointer to PCI device
6340  *
6341  * Called when the error recovery driver tells us that its
6342  * OK to resume normal operation.
6343  */
6344 void amdgpu_pci_resume(struct pci_dev *pdev)
6345 {
6346         struct drm_device *dev = pci_get_drvdata(pdev);
6347         struct amdgpu_device *adev = drm_to_adev(dev);
6348         int i;
6349
6350
6351         DRM_INFO("PCI error: resume callback!!\n");
6352
6353         /* Only continue execution for the case of pci_channel_io_frozen */
6354         if (adev->pci_channel_state != pci_channel_io_frozen)
6355                 return;
6356
6357         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6358                 struct amdgpu_ring *ring = adev->rings[i];
6359
6360                 if (!amdgpu_ring_sched_ready(ring))
6361                         continue;
6362
6363                 drm_sched_start(&ring->sched, true);
6364         }
6365
6366         amdgpu_device_unset_mp1_state(adev);
6367         amdgpu_device_unlock_reset_domain(adev->reset_domain);
6368 }
6369
6370 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6371 {
6372         struct drm_device *dev = pci_get_drvdata(pdev);
6373         struct amdgpu_device *adev = drm_to_adev(dev);
6374         int r;
6375
6376         r = pci_save_state(pdev);
6377         if (!r) {
6378                 kfree(adev->pci_state);
6379
6380                 adev->pci_state = pci_store_saved_state(pdev);
6381
6382                 if (!adev->pci_state) {
6383                         DRM_ERROR("Failed to store PCI saved state");
6384                         return false;
6385                 }
6386         } else {
6387                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6388                 return false;
6389         }
6390
6391         return true;
6392 }
6393
6394 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6395 {
6396         struct drm_device *dev = pci_get_drvdata(pdev);
6397         struct amdgpu_device *adev = drm_to_adev(dev);
6398         int r;
6399
6400         if (!adev->pci_state)
6401                 return false;
6402
6403         r = pci_load_saved_state(pdev, adev->pci_state);
6404
6405         if (!r) {
6406                 pci_restore_state(pdev);
6407         } else {
6408                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6409                 return false;
6410         }
6411
6412         return true;
6413 }
6414
6415 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6416                 struct amdgpu_ring *ring)
6417 {
6418 #ifdef CONFIG_X86_64
6419         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6420                 return;
6421 #endif
6422         if (adev->gmc.xgmi.connected_to_cpu)
6423                 return;
6424
6425         if (ring && ring->funcs->emit_hdp_flush)
6426                 amdgpu_ring_emit_hdp_flush(ring);
6427         else
6428                 amdgpu_asic_flush_hdp(adev, ring);
6429 }
6430
6431 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6432                 struct amdgpu_ring *ring)
6433 {
6434 #ifdef CONFIG_X86_64
6435         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6436                 return;
6437 #endif
6438         if (adev->gmc.xgmi.connected_to_cpu)
6439                 return;
6440
6441         amdgpu_asic_invalidate_hdp(adev, ring);
6442 }
6443
6444 int amdgpu_in_reset(struct amdgpu_device *adev)
6445 {
6446         return atomic_read(&adev->reset_domain->in_gpu_reset);
6447 }
6448
6449 /**
6450  * amdgpu_device_halt() - bring hardware to some kind of halt state
6451  *
6452  * @adev: amdgpu_device pointer
6453  *
6454  * Bring hardware to some kind of halt state so that no one can touch it
6455  * any more. It will help to maintain error context when error occurred.
6456  * Compare to a simple hang, the system will keep stable at least for SSH
6457  * access. Then it should be trivial to inspect the hardware state and
6458  * see what's going on. Implemented as following:
6459  *
6460  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6461  *    clears all CPU mappings to device, disallows remappings through page faults
6462  * 2. amdgpu_irq_disable_all() disables all interrupts
6463  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6464  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6465  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6466  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6467  *    flush any in flight DMA operations
6468  */
6469 void amdgpu_device_halt(struct amdgpu_device *adev)
6470 {
6471         struct pci_dev *pdev = adev->pdev;
6472         struct drm_device *ddev = adev_to_drm(adev);
6473
6474         amdgpu_xcp_dev_unplug(adev);
6475         drm_dev_unplug(ddev);
6476
6477         amdgpu_irq_disable_all(adev);
6478
6479         amdgpu_fence_driver_hw_fini(adev);
6480
6481         adev->no_hw_access = true;
6482
6483         amdgpu_device_unmap_mmio(adev);
6484
6485         pci_disable_device(pdev);
6486         pci_wait_for_pending_transaction(pdev);
6487 }
6488
6489 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6490                                 u32 reg)
6491 {
6492         unsigned long flags, address, data;
6493         u32 r;
6494
6495         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6496         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6497
6498         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6499         WREG32(address, reg * 4);
6500         (void)RREG32(address);
6501         r = RREG32(data);
6502         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6503         return r;
6504 }
6505
6506 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6507                                 u32 reg, u32 v)
6508 {
6509         unsigned long flags, address, data;
6510
6511         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6512         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6513
6514         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6515         WREG32(address, reg * 4);
6516         (void)RREG32(address);
6517         WREG32(data, v);
6518         (void)RREG32(data);
6519         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6520 }
6521
6522 /**
6523  * amdgpu_device_switch_gang - switch to a new gang
6524  * @adev: amdgpu_device pointer
6525  * @gang: the gang to switch to
6526  *
6527  * Try to switch to a new gang.
6528  * Returns: NULL if we switched to the new gang or a reference to the current
6529  * gang leader.
6530  */
6531 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6532                                             struct dma_fence *gang)
6533 {
6534         struct dma_fence *old = NULL;
6535
6536         do {
6537                 dma_fence_put(old);
6538                 rcu_read_lock();
6539                 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6540                 rcu_read_unlock();
6541
6542                 if (old == gang)
6543                         break;
6544
6545                 if (!dma_fence_is_signaled(old))
6546                         return old;
6547
6548         } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6549                          old, gang) != old);
6550
6551         dma_fence_put(old);
6552         return NULL;
6553 }
6554
6555 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6556 {
6557         switch (adev->asic_type) {
6558 #ifdef CONFIG_DRM_AMDGPU_SI
6559         case CHIP_HAINAN:
6560 #endif
6561         case CHIP_TOPAZ:
6562                 /* chips with no display hardware */
6563                 return false;
6564 #ifdef CONFIG_DRM_AMDGPU_SI
6565         case CHIP_TAHITI:
6566         case CHIP_PITCAIRN:
6567         case CHIP_VERDE:
6568         case CHIP_OLAND:
6569 #endif
6570 #ifdef CONFIG_DRM_AMDGPU_CIK
6571         case CHIP_BONAIRE:
6572         case CHIP_HAWAII:
6573         case CHIP_KAVERI:
6574         case CHIP_KABINI:
6575         case CHIP_MULLINS:
6576 #endif
6577         case CHIP_TONGA:
6578         case CHIP_FIJI:
6579         case CHIP_POLARIS10:
6580         case CHIP_POLARIS11:
6581         case CHIP_POLARIS12:
6582         case CHIP_VEGAM:
6583         case CHIP_CARRIZO:
6584         case CHIP_STONEY:
6585                 /* chips with display hardware */
6586                 return true;
6587         default:
6588                 /* IP discovery */
6589                 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6590                     (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6591                         return false;
6592                 return true;
6593         }
6594 }
6595
6596 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6597                 uint32_t inst, uint32_t reg_addr, char reg_name[],
6598                 uint32_t expected_value, uint32_t mask)
6599 {
6600         uint32_t ret = 0;
6601         uint32_t old_ = 0;
6602         uint32_t tmp_ = RREG32(reg_addr);
6603         uint32_t loop = adev->usec_timeout;
6604
6605         while ((tmp_ & (mask)) != (expected_value)) {
6606                 if (old_ != tmp_) {
6607                         loop = adev->usec_timeout;
6608                         old_ = tmp_;
6609                 } else
6610                         udelay(1);
6611                 tmp_ = RREG32(reg_addr);
6612                 loop--;
6613                 if (!loop) {
6614                         DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6615                                   inst, reg_name, (uint32_t)expected_value,
6616                                   (uint32_t)(tmp_ & (mask)));
6617                         ret = -ETIMEDOUT;
6618                         break;
6619                 }
6620         }
6621         return ret;
6622 }