drm/amdgpu: add sysfs node for compute partition mode

[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 3d98fc2ad36b04eb0d44fbc0e897608e5122a8c7..f432064a0535854900d02aaa979526556b505788 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -35,6 +35,7 @@
  #include <linux/devcoredump.h>
  #include <generated/utsrelease.h>
  #include <linux/pci-p2pdma.h>
+#include <linux/apple-gmux.h>
  
  #include <drm/drm_aperture.h>
  #include <drm/drm_atomic_helper.h>
@@ -601,7 +602,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
         if (amdgpu_device_skip_hw_access(adev))
                 return 0;
  
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                 return readl(adev->doorbell.ptr + index);
         } else {
                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
@@ -624,7 +625,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
         if (amdgpu_device_skip_hw_access(adev))
                 return;
  
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                 writel(v, adev->doorbell.ptr + index);
         } else {
                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
@@ -645,7 +646,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
         if (amdgpu_device_skip_hw_access(adev))
                 return 0;
  
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
         } else {
                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
@@ -668,7 +669,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
         if (amdgpu_device_skip_hw_access(adev))
                 return;
  
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
         } else {
                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
@@ -679,20 +680,20 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
   * amdgpu_device_indirect_rreg - read an indirect register
   *
   * @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
   * @reg_addr: indirect register address to read from
   *
   * Returns the value of indirect register @reg_addr
   */
  u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
-                               u32 pcie_index, u32 pcie_data,
                                 u32 reg_addr)
  {
-       unsigned long flags;
-       u32 r;
+       unsigned long flags, pcie_index, pcie_data;
         void __iomem *pcie_index_offset;
         void __iomem *pcie_data_offset;
+       u32 r;
+
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
  
         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -710,20 +711,20 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
   * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
   *
   * @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
   * @reg_addr: indirect register address to read from
   *
   * Returns the value of indirect register @reg_addr
   */
  u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
-                                 u32 pcie_index, u32 pcie_data,
                                   u32 reg_addr)
  {
-       unsigned long flags;
-       u64 r;
+       unsigned long flags, pcie_index, pcie_data;
         void __iomem *pcie_index_offset;
         void __iomem *pcie_data_offset;
+       u64 r;
+
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
  
         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -753,13 +754,15 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
   *
   */
  void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
-                                u32 pcie_index, u32 pcie_data,
                                  u32 reg_addr, u32 reg_data)
  {
-       unsigned long flags;
+       unsigned long flags, pcie_index, pcie_data;
         void __iomem *pcie_index_offset;
         void __iomem *pcie_data_offset;
  
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
@@ -782,13 +785,15 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
   *
   */
  void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
-                                  u32 pcie_index, u32 pcie_data,
                                    u32 reg_addr, u64 reg_data)
  {
-       unsigned long flags;
+       unsigned long flags, pcie_index, pcie_data;
         void __iomem *pcie_index_offset;
         void __iomem *pcie_data_offset;
  
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
@@ -806,6 +811,18 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
  }
  
+/**
+ * amdgpu_device_get_rev_id - query device rev_id
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return device rev_id
+ */
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
+{
+       return adev->nbio.funcs->get_rev_id(adev);
+}
+
  /**
   * amdgpu_invalid_rreg - dummy reg read function
   *
@@ -981,7 +998,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
         if (array_size % 3)
                 return;
  
-       for (i = 0; i < array_size; i +=3) {
+       for (i = 0; i < array_size; i += 3) {
                 reg = registers[i + 0];
                 and_mask = registers[i + 1];
                 or_mask = registers[i + 2];
@@ -1043,7 +1060,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
         if (adev->asic_type < CHIP_BONAIRE) {
                 adev->doorbell.base = 0;
                 adev->doorbell.size = 0;
-               adev->doorbell.num_doorbells = 0;
+               adev->doorbell.num_kernel_doorbells = 0;
                 adev->doorbell.ptr = NULL;
                 return 0;
         }
@@ -1058,27 +1075,27 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
         adev->doorbell.size = pci_resource_len(adev->pdev, 2);
  
         if (adev->enable_mes) {
-               adev->doorbell.num_doorbells =
+               adev->doorbell.num_kernel_doorbells =
                         adev->doorbell.size / sizeof(u32);
         } else {
-               adev->doorbell.num_doorbells =
+               adev->doorbell.num_kernel_doorbells =
                         min_t(u32, adev->doorbell.size / sizeof(u32),
                               adev->doorbell_index.max_assignment+1);
-               if (adev->doorbell.num_doorbells == 0)
+               if (adev->doorbell.num_kernel_doorbells == 0)
                         return -EINVAL;
  
                 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
                  * paging queue doorbell use the second page. The
                  * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
                  * doorbells are in the first page. So with paging queue enabled,
-                * the max num_doorbells should + 1 page (0x400 in dword)
+                * the max num_kernel_doorbells should + 1 page (0x400 in dword)
                  */
                 if (adev->asic_type >= CHIP_VEGA10)
-                       adev->doorbell.num_doorbells += 0x400;
+                       adev->doorbell.num_kernel_doorbells += 0x400;
         }
  
         adev->doorbell.ptr = ioremap(adev->doorbell.base,
-                                    adev->doorbell.num_doorbells *
+                                    adev->doorbell.num_kernel_doorbells *
                                      sizeof(u32));
         if (adev->doorbell.ptr == NULL)
                 return -ENOMEM;
@@ -1530,7 +1547,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
                          amdgpu_sched_jobs);
                 amdgpu_sched_jobs = 4;
-       } else if (!is_power_of_2(amdgpu_sched_jobs)){
+       } else if (!is_power_of_2(amdgpu_sched_jobs)) {
                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
                          amdgpu_sched_jobs);
                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
@@ -2167,7 +2184,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
         }
  
-       amdgpu_amdkfd_device_probe(adev);
  
         adev->pm.pp_feature = amdgpu_pp_feature_mask;
         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
@@ -2223,6 +2239,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
         if (!total)
                 return -ENODEV;
  
+       amdgpu_amdkfd_device_probe(adev);
         adev->cg_flags &= amdgpu_cg_mask;
         adev->pg_flags &= amdgpu_pg_mask;
  
@@ -2348,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
                 }
  
                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
-                                  ring->num_hw_submission, amdgpu_job_hang_limit,
+                                  ring->num_hw_submission, 0,
                                    timeout, adev->reset_domain->wq,
                                    ring->sched_score, ring->name,
                                    adev->dev);
@@ -2522,8 +2539,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
         amdgpu_fru_get_product_info(adev);
  
  init_failed:
-       if (amdgpu_sriov_vf(adev))
-               amdgpu_virt_release_full_gpu(adev, true);
  
         return r;
  }
@@ -2744,8 +2759,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
  
         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
-       if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
-                              adev->asic_type == CHIP_ALDEBARAN ))
+       if (amdgpu_passthrough(adev) &&
+           ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
+            adev->asic_type == CHIP_ALDEBARAN))
                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
  
         if (adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -3074,7 +3090,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
                 }
                 adev->ip_blocks[i].status.hw = false;
                 /* handle putting the SMC in the appropriate state */
-               if(!amdgpu_sriov_vf(adev)){
+               if (!amdgpu_sriov_vf(adev)) {
                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
                                 if (r) {
@@ -3164,9 +3180,11 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
                 AMD_IP_BLOCK_TYPE_DCE,
                 AMD_IP_BLOCK_TYPE_GFX,
                 AMD_IP_BLOCK_TYPE_SDMA,
+               AMD_IP_BLOCK_TYPE_MES,
                 AMD_IP_BLOCK_TYPE_UVD,
                 AMD_IP_BLOCK_TYPE_VCE,
-               AMD_IP_BLOCK_TYPE_VCN
+               AMD_IP_BLOCK_TYPE_VCN,
+               AMD_IP_BLOCK_TYPE_JPEG
         };
  
         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
@@ -3286,9 +3304,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
  {
         int r;
  
-       r = amdgpu_amdkfd_resume_iommu(adev);
-       if (r)
-               return r;
+       if (!adev->in_s0ix) {
+               r = amdgpu_amdkfd_resume_iommu(adev);
+               if (r)
+                       return r;
+       }
  
         r = amdgpu_device_ip_resume_phase1(adev);
         if (r)
@@ -3559,6 +3579,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
         int r, i;
         bool px = false;
         u32 max_MBps;
+       int tmp;
  
         adev->shutdown = false;
         adev->flags = flags;
@@ -3613,6 +3634,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
         mutex_init(&adev->srbm_mutex);
         mutex_init(&adev->gfx.pipe_reserve_mutex);
         mutex_init(&adev->gfx.gfx_off_mutex);
+       mutex_init(&adev->gfx.partition_mutex);
         mutex_init(&adev->grbm_idx_mutex);
         mutex_init(&adev->mn_lock);
         mutex_init(&adev->virt.vf_errors.lock);
@@ -3737,6 +3759,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
                         adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
                         (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+       /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
+        * internal path natively support atomics, set have_atomics_support to true.
+        */
+       else if ((adev->flags & AMD_IS_APU) &&
+               (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
+               adev->have_atomics_support = true;
         else
                 adev->have_atomics_support =
                         !pci_enable_atomic_ops_to_root(adev->pdev,
@@ -3780,7 +3808,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                                 }
                         }
                 } else {
+                       tmp = amdgpu_reset_method;
+                       /* It should do a default reset when loading or reloading the driver,
+                        * regardless of the module parameter reset_method.
+                        */
+                       amdgpu_reset_method = AMD_RESET_METHOD_NONE;
                         r = amdgpu_asic_reset(adev);
+                       amdgpu_reset_method = tmp;
                         if (r) {
                                 dev_err(adev->dev, "asic reset on init failed\n");
                                 goto failed;
@@ -3788,8 +3822,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                 }
         }
  
-       pci_enable_pcie_error_reporting(adev->pdev);
-
         /* Post card if necessary */
         if (amdgpu_device_need_post(adev)) {
                 if (!adev->bios) {
@@ -3840,18 +3872,6 @@ fence_driver_init:
  
         r = amdgpu_device_ip_init(adev);
         if (r) {
-               /* failed in exclusive mode due to timeout */
-               if (amdgpu_sriov_vf(adev) &&
-                   !amdgpu_sriov_runtime(adev) &&
-                   amdgpu_virt_mmio_blocked(adev) &&
-                   !amdgpu_virt_wait_reset(adev)) {
-                       dev_err(adev->dev, "VF exclusive mode timeout\n");
-                       /* Don't send request since VF is inactive. */
-                       adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
-                       adev->virt.ops = NULL;
-                       r = -EAGAIN;
-                       goto release_ras_con;
-               }
                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
                 goto release_ras_con;
@@ -3879,11 +3899,8 @@ fence_driver_init:
         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
  
         r = amdgpu_pm_sysfs_init(adev);
-       if (r) {
-               adev->pm_sysfs_en = false;
-               DRM_ERROR("registering pm debugfs failed (%d).\n", r);
-       } else
-               adev->pm_sysfs_en = true;
+       if (r)
+               DRM_ERROR("registering pm sysfs failed (%d).\n", r);
  
         r = amdgpu_ucode_sysfs_init(adev);
         if (r) {
@@ -3923,8 +3940,10 @@ fence_driver_init:
                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
         }
  
-       if (amdgpu_sriov_vf(adev))
+       if (amdgpu_sriov_vf(adev)) {
+               amdgpu_virt_release_full_gpu(adev, true);
                 flush_delayed_work(&adev->delayed_init_work);
+       }
  
         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
         if (r)
@@ -3945,12 +3964,15 @@ fence_driver_init:
         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
  
-       if (amdgpu_device_supports_px(ddev)) {
-               px = true;
+       px = amdgpu_device_supports_px(ddev);
+
+       if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+                               apple_gmux_detect(NULL, NULL)))
                 vga_switcheroo_register_client(adev->pdev,
                                                &amdgpu_switcheroo_ops, px);
+
+       if (px)
                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
-       }
  
         if (adev->gmc.xgmi.pending_reset)
                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
@@ -3961,6 +3983,20 @@ fence_driver_init:
         return 0;
  
  release_ras_con:
+       if (amdgpu_sriov_vf(adev))
+               amdgpu_virt_release_full_gpu(adev, true);
+
+       /* failed in exclusive mode due to timeout */
+       if (amdgpu_sriov_vf(adev) &&
+               !amdgpu_sriov_runtime(adev) &&
+               amdgpu_virt_mmio_blocked(adev) &&
+               !amdgpu_virt_wait_reset(adev)) {
+               dev_err(adev->dev, "VF exclusive mode timeout\n");
+               /* Don't send request since VF is inactive. */
+               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+               adev->virt.ops = NULL;
+               r = -EAGAIN;
+       }
         amdgpu_release_ras_context(adev);
  
  failed:
@@ -4015,7 +4051,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
  
         /* disable all interrupts */
         amdgpu_irq_disable_all(adev);
-       if (adev->mode_info.mode_config_initialized){
+       if (adev->mode_info.mode_config_initialized) {
                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
                         drm_helper_force_disable_all(adev_to_drm(adev));
                 else
@@ -4026,7 +4062,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
         if (adev->mman.initialized)
                 drain_workqueue(adev->mman.bdev.wq);
  
-       if (adev->pm_sysfs_en)
+       if (adev->pm.sysfs_initialized)
                 amdgpu_pm_sysfs_fini(adev);
         if (adev->ucode_sysfs_en)
                 amdgpu_ucode_sysfs_fini(adev);
@@ -4054,6 +4090,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
  void amdgpu_device_fini_sw(struct amdgpu_device *adev)
  {
         int idx;
+       bool px;
  
         amdgpu_fence_driver_sw_fini(adev);
         amdgpu_device_ip_fini(adev);
@@ -4072,10 +4109,16 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
  
         kfree(adev->bios);
         adev->bios = NULL;
-       if (amdgpu_device_supports_px(adev_to_drm(adev))) {
+
+       px = amdgpu_device_supports_px(adev_to_drm(adev));
+
+       if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+                               apple_gmux_detect(NULL, NULL)))
                 vga_switcheroo_unregister_client(adev->pdev);
+
+       if (px)
                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
-       }
+
         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
                 vga_client_unregister(adev->pdev);
  
@@ -4471,7 +4514,11 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
         dev_info(adev->dev, "recover vram bo from shadow start\n");
         mutex_lock(&adev->shadow_list_lock);
         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
-               shadow = &vmbo->bo;
+               /* If vm is compute context or adev is APU, shadow will be NULL */
+               if (!vmbo->shadow)
+                       continue;
+               shadow = vmbo->shadow;
+
                 /* No need to recover an evicted BO */
                 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
@@ -4669,42 +4716,42 @@ disabled:
  
  int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
  {
-        u32 i;
-        int ret = 0;
+       u32 i;
+       int ret = 0;
  
-        amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+       amdgpu_atombios_scratch_regs_engine_hung(adev, true);
  
-        dev_info(adev->dev, "GPU mode1 reset\n");
+       dev_info(adev->dev, "GPU mode1 reset\n");
  
-        /* disable BM */
-        pci_clear_master(adev->pdev);
+       /* disable BM */
+       pci_clear_master(adev->pdev);
  
-        amdgpu_device_cache_pci_state(adev->pdev);
+       amdgpu_device_cache_pci_state(adev->pdev);
  
-        if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
-                dev_info(adev->dev, "GPU smu mode1 reset\n");
-                ret = amdgpu_dpm_mode1_reset(adev);
-        } else {
-                dev_info(adev->dev, "GPU psp mode1 reset\n");
-                ret = psp_gpu_reset(adev);
-        }
+       if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+               dev_info(adev->dev, "GPU smu mode1 reset\n");
+               ret = amdgpu_dpm_mode1_reset(adev);
+       } else {
+               dev_info(adev->dev, "GPU psp mode1 reset\n");
+               ret = psp_gpu_reset(adev);
+       }
  
-        if (ret)
-                dev_err(adev->dev, "GPU mode1 reset failed\n");
+       if (ret)
+               dev_err(adev->dev, "GPU mode1 reset failed\n");
  
-        amdgpu_device_load_pci_state(adev->pdev);
+       amdgpu_device_load_pci_state(adev->pdev);
  
-        /* wait for asic to come out of reset */
-        for (i = 0; i < adev->usec_timeout; i++) {
-                u32 memsize = adev->nbio.funcs->get_memsize(adev);
+       /* wait for asic to come out of reset */
+       for (i = 0; i < adev->usec_timeout; i++) {
+               u32 memsize = adev->nbio.funcs->get_memsize(adev);
  
-                if (memsize != 0xffffffff)
-                        break;
-                udelay(1);
-        }
+               if (memsize != 0xffffffff)
+                       break;
+               udelay(1);
+       }
  
-        amdgpu_atombios_scratch_regs_engine_hung(adev, false);
-        return ret;
+       amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+       return ret;
  }
  
  int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
@@ -5154,6 +5201,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
   *
   * @adev: amdgpu_device pointer
   * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
   *
   * Attempt to reset the GPU if it has hung (all asics).
   * Attempt to do soft-reset or full-reset and reinitialize Asic
@@ -5323,8 +5371,9 @@ retry:    /* Rest of adevs pre asic reset from XGMI hive. */
                 if (r)
                         adev->asic_reset_res = r;
  
-               /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
-               if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+               /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
+               if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+                   adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
                         amdgpu_ras_resume(adev);
         } else {
                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
@@ -5593,7 +5642,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
         struct amdgpu_device *adev = drm_to_adev(dev);
         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
  
-       if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+       if (!amdgpu_device_supports_baco(dev))
                 return -ENOTSUPP;
  
         if (ras && adev->ras_enabled &&
@@ -5609,7 +5658,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
         int ret = 0;
  
-       if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+       if (!amdgpu_device_supports_baco(dev))
                 return -ENOTSUPP;
  
         ret = amdgpu_dpm_baco_exit(adev);