drm/amdgpu: add sysfs node for compute partition mode
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
index 3d98fc2ad36b04eb0d44fbc0e897608e5122a8c7..f432064a0535854900d02aaa979526556b505788 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/devcoredump.h>
 #include <generated/utsrelease.h>
 #include <linux/pci-p2pdma.h>
+#include <linux/apple-gmux.h>
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
@@ -601,7 +602,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
        if (amdgpu_device_skip_hw_access(adev))
                return 0;
 
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                return readl(adev->doorbell.ptr + index);
        } else {
                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
@@ -624,7 +625,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
        if (amdgpu_device_skip_hw_access(adev))
                return;
 
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                writel(v, adev->doorbell.ptr + index);
        } else {
                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
@@ -645,7 +646,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
        if (amdgpu_device_skip_hw_access(adev))
                return 0;
 
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
        } else {
                DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
@@ -668,7 +669,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
        if (amdgpu_device_skip_hw_access(adev))
                return;
 
-       if (index < adev->doorbell.num_doorbells) {
+       if (index < adev->doorbell.num_kernel_doorbells) {
                atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
        } else {
                DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
@@ -679,20 +680,20 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
  * amdgpu_device_indirect_rreg - read an indirect register
  *
  * @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
  * @reg_addr: indirect register address to read from
  *
  * Returns the value of indirect register @reg_addr
  */
 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
-                               u32 pcie_index, u32 pcie_data,
                                u32 reg_addr)
 {
-       unsigned long flags;
-       u32 r;
+       unsigned long flags, pcie_index, pcie_data;
        void __iomem *pcie_index_offset;
        void __iomem *pcie_data_offset;
+       u32 r;
+
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -710,20 +711,20 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
  *
  * @adev: amdgpu_device pointer
- * @pcie_index: mmio register offset
- * @pcie_data: mmio register offset
  * @reg_addr: indirect register address to read from
  *
  * Returns the value of indirect register @reg_addr
  */
 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
-                                 u32 pcie_index, u32 pcie_data,
                                  u32 reg_addr)
 {
-       unsigned long flags;
-       u64 r;
+       unsigned long flags, pcie_index, pcie_data;
        void __iomem *pcie_index_offset;
        void __iomem *pcie_data_offset;
+       u64 r;
+
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -753,13 +754,15 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
  *
  */
 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
-                                u32 pcie_index, u32 pcie_data,
                                 u32 reg_addr, u32 reg_data)
 {
-       unsigned long flags;
+       unsigned long flags, pcie_index, pcie_data;
        void __iomem *pcie_index_offset;
        void __iomem *pcie_data_offset;
 
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
        pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
@@ -782,13 +785,15 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
  *
  */
 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
-                                  u32 pcie_index, u32 pcie_data,
                                   u32 reg_addr, u64 reg_data)
 {
-       unsigned long flags;
+       unsigned long flags, pcie_index, pcie_data;
        void __iomem *pcie_index_offset;
        void __iomem *pcie_data_offset;
 
+       pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+       pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
        spin_lock_irqsave(&adev->pcie_idx_lock, flags);
        pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
        pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
@@ -806,6 +811,18 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
        spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 }
 
+/**
+ * amdgpu_device_get_rev_id - query device rev_id
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return device rev_id
+ */
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
+{
+       return adev->nbio.funcs->get_rev_id(adev);
+}
+
 /**
  * amdgpu_invalid_rreg - dummy reg read function
  *
@@ -981,7 +998,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
        if (array_size % 3)
                return;
 
-       for (i = 0; i < array_size; i +=3) {
+       for (i = 0; i < array_size; i += 3) {
                reg = registers[i + 0];
                and_mask = registers[i + 1];
                or_mask = registers[i + 2];
@@ -1043,7 +1060,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
        if (adev->asic_type < CHIP_BONAIRE) {
                adev->doorbell.base = 0;
                adev->doorbell.size = 0;
-               adev->doorbell.num_doorbells = 0;
+               adev->doorbell.num_kernel_doorbells = 0;
                adev->doorbell.ptr = NULL;
                return 0;
        }
@@ -1058,27 +1075,27 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
        adev->doorbell.size = pci_resource_len(adev->pdev, 2);
 
        if (adev->enable_mes) {
-               adev->doorbell.num_doorbells =
+               adev->doorbell.num_kernel_doorbells =
                        adev->doorbell.size / sizeof(u32);
        } else {
-               adev->doorbell.num_doorbells =
+               adev->doorbell.num_kernel_doorbells =
                        min_t(u32, adev->doorbell.size / sizeof(u32),
                              adev->doorbell_index.max_assignment+1);
-               if (adev->doorbell.num_doorbells == 0)
+               if (adev->doorbell.num_kernel_doorbells == 0)
                        return -EINVAL;
 
                /* For Vega, reserve and map two pages on doorbell BAR since SDMA
                 * paging queue doorbell use the second page. The
                 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
                 * doorbells are in the first page. So with paging queue enabled,
-                * the max num_doorbells should + 1 page (0x400 in dword)
+                * the max num_kernel_doorbells should + 1 page (0x400 in dword)
                 */
                if (adev->asic_type >= CHIP_VEGA10)
-                       adev->doorbell.num_doorbells += 0x400;
+                       adev->doorbell.num_kernel_doorbells += 0x400;
        }
 
        adev->doorbell.ptr = ioremap(adev->doorbell.base,
-                                    adev->doorbell.num_doorbells *
+                                    adev->doorbell.num_kernel_doorbells *
                                     sizeof(u32));
        if (adev->doorbell.ptr == NULL)
                return -ENOMEM;
@@ -1530,7 +1547,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
                dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
                         amdgpu_sched_jobs);
                amdgpu_sched_jobs = 4;
-       } else if (!is_power_of_2(amdgpu_sched_jobs)){
+       } else if (!is_power_of_2(amdgpu_sched_jobs)) {
                dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
                         amdgpu_sched_jobs);
                amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
@@ -2167,7 +2184,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
                adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
        }
 
-       amdgpu_amdkfd_device_probe(adev);
 
        adev->pm.pp_feature = amdgpu_pp_feature_mask;
        if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
@@ -2223,6 +2239,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
        if (!total)
                return -ENODEV;
 
+       amdgpu_amdkfd_device_probe(adev);
        adev->cg_flags &= amdgpu_cg_mask;
        adev->pg_flags &= amdgpu_pg_mask;
 
@@ -2348,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
                }
 
                r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
-                                  ring->num_hw_submission, amdgpu_job_hang_limit,
+                                  ring->num_hw_submission, 0,
                                   timeout, adev->reset_domain->wq,
                                   ring->sched_score, ring->name,
                                   adev->dev);
@@ -2522,8 +2539,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
        amdgpu_fru_get_product_info(adev);
 
 init_failed:
-       if (amdgpu_sriov_vf(adev))
-               amdgpu_virt_release_full_gpu(adev, true);
 
        return r;
 }
@@ -2744,8 +2759,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
                DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
 
        /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
-       if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
-                              adev->asic_type == CHIP_ALDEBARAN ))
+       if (amdgpu_passthrough(adev) &&
+           ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
+            adev->asic_type == CHIP_ALDEBARAN))
                amdgpu_dpm_handle_passthrough_sbr(adev, true);
 
        if (adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -3074,7 +3090,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
                }
                adev->ip_blocks[i].status.hw = false;
                /* handle putting the SMC in the appropriate state */
-               if(!amdgpu_sriov_vf(adev)){
+               if (!amdgpu_sriov_vf(adev)) {
                        if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
                                r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
                                if (r) {
@@ -3164,9 +3180,11 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
                AMD_IP_BLOCK_TYPE_DCE,
                AMD_IP_BLOCK_TYPE_GFX,
                AMD_IP_BLOCK_TYPE_SDMA,
+               AMD_IP_BLOCK_TYPE_MES,
                AMD_IP_BLOCK_TYPE_UVD,
                AMD_IP_BLOCK_TYPE_VCE,
-               AMD_IP_BLOCK_TYPE_VCN
+               AMD_IP_BLOCK_TYPE_VCN,
+               AMD_IP_BLOCK_TYPE_JPEG
        };
 
        for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
@@ -3286,9 +3304,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
 {
        int r;
 
-       r = amdgpu_amdkfd_resume_iommu(adev);
-       if (r)
-               return r;
+       if (!adev->in_s0ix) {
+               r = amdgpu_amdkfd_resume_iommu(adev);
+               if (r)
+                       return r;
+       }
 
        r = amdgpu_device_ip_resume_phase1(adev);
        if (r)
@@ -3559,6 +3579,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        int r, i;
        bool px = false;
        u32 max_MBps;
+       int tmp;
 
        adev->shutdown = false;
        adev->flags = flags;
@@ -3613,6 +3634,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        mutex_init(&adev->srbm_mutex);
        mutex_init(&adev->gfx.pipe_reserve_mutex);
        mutex_init(&adev->gfx.gfx_off_mutex);
+       mutex_init(&adev->gfx.partition_mutex);
        mutex_init(&adev->grbm_idx_mutex);
        mutex_init(&adev->mn_lock);
        mutex_init(&adev->virt.vf_errors.lock);
@@ -3737,6 +3759,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
                        adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
                        (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+       /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
+        * internal path natively support atomics, set have_atomics_support to true.
+        */
+       else if ((adev->flags & AMD_IS_APU) &&
+               (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
+               adev->have_atomics_support = true;
        else
                adev->have_atomics_support =
                        !pci_enable_atomic_ops_to_root(adev->pdev,
@@ -3780,7 +3808,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                                }
                        }
                } else {
+                       tmp = amdgpu_reset_method;
+                       /* It should do a default reset when loading or reloading the driver,
+                        * regardless of the module parameter reset_method.
+                        */
+                       amdgpu_reset_method = AMD_RESET_METHOD_NONE;
                        r = amdgpu_asic_reset(adev);
+                       amdgpu_reset_method = tmp;
                        if (r) {
                                dev_err(adev->dev, "asic reset on init failed\n");
                                goto failed;
@@ -3788,8 +3822,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                }
        }
 
-       pci_enable_pcie_error_reporting(adev->pdev);
-
        /* Post card if necessary */
        if (amdgpu_device_need_post(adev)) {
                if (!adev->bios) {
@@ -3840,18 +3872,6 @@ fence_driver_init:
 
        r = amdgpu_device_ip_init(adev);
        if (r) {
-               /* failed in exclusive mode due to timeout */
-               if (amdgpu_sriov_vf(adev) &&
-                   !amdgpu_sriov_runtime(adev) &&
-                   amdgpu_virt_mmio_blocked(adev) &&
-                   !amdgpu_virt_wait_reset(adev)) {
-                       dev_err(adev->dev, "VF exclusive mode timeout\n");
-                       /* Don't send request since VF is inactive. */
-                       adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
-                       adev->virt.ops = NULL;
-                       r = -EAGAIN;
-                       goto release_ras_con;
-               }
                dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
                amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
                goto release_ras_con;
@@ -3879,11 +3899,8 @@ fence_driver_init:
        adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
 
        r = amdgpu_pm_sysfs_init(adev);
-       if (r) {
-               adev->pm_sysfs_en = false;
-               DRM_ERROR("registering pm debugfs failed (%d).\n", r);
-       } else
-               adev->pm_sysfs_en = true;
+       if (r)
+               DRM_ERROR("registering pm sysfs failed (%d).\n", r);
 
        r = amdgpu_ucode_sysfs_init(adev);
        if (r) {
@@ -3923,8 +3940,10 @@ fence_driver_init:
                                   msecs_to_jiffies(AMDGPU_RESUME_MS));
        }
 
-       if (amdgpu_sriov_vf(adev))
+       if (amdgpu_sriov_vf(adev)) {
+               amdgpu_virt_release_full_gpu(adev, true);
                flush_delayed_work(&adev->delayed_init_work);
+       }
 
        r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
        if (r)
@@ -3945,12 +3964,15 @@ fence_driver_init:
        if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
                vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
 
-       if (amdgpu_device_supports_px(ddev)) {
-               px = true;
+       px = amdgpu_device_supports_px(ddev);
+
+       if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+                               apple_gmux_detect(NULL, NULL)))
                vga_switcheroo_register_client(adev->pdev,
                                               &amdgpu_switcheroo_ops, px);
+
+       if (px)
                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
-       }
 
        if (adev->gmc.xgmi.pending_reset)
                queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
@@ -3961,6 +3983,20 @@ fence_driver_init:
        return 0;
 
 release_ras_con:
+       if (amdgpu_sriov_vf(adev))
+               amdgpu_virt_release_full_gpu(adev, true);
+
+       /* failed in exclusive mode due to timeout */
+       if (amdgpu_sriov_vf(adev) &&
+               !amdgpu_sriov_runtime(adev) &&
+               amdgpu_virt_mmio_blocked(adev) &&
+               !amdgpu_virt_wait_reset(adev)) {
+               dev_err(adev->dev, "VF exclusive mode timeout\n");
+               /* Don't send request since VF is inactive. */
+               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+               adev->virt.ops = NULL;
+               r = -EAGAIN;
+       }
        amdgpu_release_ras_context(adev);
 
 failed:
@@ -4015,7 +4051,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 
        /* disable all interrupts */
        amdgpu_irq_disable_all(adev);
-       if (adev->mode_info.mode_config_initialized){
+       if (adev->mode_info.mode_config_initialized) {
                if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
                        drm_helper_force_disable_all(adev_to_drm(adev));
                else
@@ -4026,7 +4062,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
        if (adev->mman.initialized)
                drain_workqueue(adev->mman.bdev.wq);
 
-       if (adev->pm_sysfs_en)
+       if (adev->pm.sysfs_initialized)
                amdgpu_pm_sysfs_fini(adev);
        if (adev->ucode_sysfs_en)
                amdgpu_ucode_sysfs_fini(adev);
@@ -4054,6 +4090,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 {
        int idx;
+       bool px;
 
        amdgpu_fence_driver_sw_fini(adev);
        amdgpu_device_ip_fini(adev);
@@ -4072,10 +4109,16 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
 
        kfree(adev->bios);
        adev->bios = NULL;
-       if (amdgpu_device_supports_px(adev_to_drm(adev))) {
+
+       px = amdgpu_device_supports_px(adev_to_drm(adev));
+
+       if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+                               apple_gmux_detect(NULL, NULL)))
                vga_switcheroo_unregister_client(adev->pdev);
+
+       if (px)
                vga_switcheroo_fini_domain_pm_ops(adev->dev);
-       }
+
        if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
                vga_client_unregister(adev->pdev);
 
@@ -4471,7 +4514,11 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
        dev_info(adev->dev, "recover vram bo from shadow start\n");
        mutex_lock(&adev->shadow_list_lock);
        list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
-               shadow = &vmbo->bo;
+               /* If vm is compute context or adev is APU, shadow will be NULL */
+               if (!vmbo->shadow)
+                       continue;
+               shadow = vmbo->shadow;
+
                /* No need to recover an evicted BO */
                if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
                    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
@@ -4669,42 +4716,42 @@ disabled:
 
 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 {
-        u32 i;
-        int ret = 0;
+       u32 i;
+       int ret = 0;
 
-        amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+       amdgpu_atombios_scratch_regs_engine_hung(adev, true);
 
-        dev_info(adev->dev, "GPU mode1 reset\n");
+       dev_info(adev->dev, "GPU mode1 reset\n");
 
-        /* disable BM */
-        pci_clear_master(adev->pdev);
+       /* disable BM */
+       pci_clear_master(adev->pdev);
 
-        amdgpu_device_cache_pci_state(adev->pdev);
+       amdgpu_device_cache_pci_state(adev->pdev);
 
-        if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
-                dev_info(adev->dev, "GPU smu mode1 reset\n");
-                ret = amdgpu_dpm_mode1_reset(adev);
-        } else {
-                dev_info(adev->dev, "GPU psp mode1 reset\n");
-                ret = psp_gpu_reset(adev);
-        }
+       if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+               dev_info(adev->dev, "GPU smu mode1 reset\n");
+               ret = amdgpu_dpm_mode1_reset(adev);
+       } else {
+               dev_info(adev->dev, "GPU psp mode1 reset\n");
+               ret = psp_gpu_reset(adev);
+       }
 
-        if (ret)
-                dev_err(adev->dev, "GPU mode1 reset failed\n");
+       if (ret)
+               dev_err(adev->dev, "GPU mode1 reset failed\n");
 
-        amdgpu_device_load_pci_state(adev->pdev);
+       amdgpu_device_load_pci_state(adev->pdev);
 
-        /* wait for asic to come out of reset */
-        for (i = 0; i < adev->usec_timeout; i++) {
-                u32 memsize = adev->nbio.funcs->get_memsize(adev);
+       /* wait for asic to come out of reset */
+       for (i = 0; i < adev->usec_timeout; i++) {
+               u32 memsize = adev->nbio.funcs->get_memsize(adev);
 
-                if (memsize != 0xffffffff)
-                        break;
-                udelay(1);
-        }
+               if (memsize != 0xffffffff)
+                       break;
+               udelay(1);
+       }
 
-        amdgpu_atombios_scratch_regs_engine_hung(adev, false);
-        return ret;
+       amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+       return ret;
 }
 
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
@@ -5154,6 +5201,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
  *
  * @adev: amdgpu_device pointer
  * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
  *
  * Attempt to reset the GPU if it has hung (all asics).
  * Attempt to do soft-reset or full-reset and reinitialize Asic
@@ -5323,8 +5371,9 @@ retry:    /* Rest of adevs pre asic reset from XGMI hive. */
                if (r)
                        adev->asic_reset_res = r;
 
-               /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
-               if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+               /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
+               if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+                   adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
                        amdgpu_ras_resume(adev);
        } else {
                r = amdgpu_do_asic_reset(device_list_handle, reset_context);
@@ -5593,7 +5642,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
        struct amdgpu_device *adev = drm_to_adev(dev);
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
-       if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+       if (!amdgpu_device_supports_baco(dev))
                return -ENOTSUPP;
 
        if (ras && adev->ras_enabled &&
@@ -5609,7 +5658,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
        int ret = 0;
 
-       if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
+       if (!amdgpu_device_supports_baco(dev))
                return -ENOTSUPP;
 
        ret = amdgpu_dpm_baco_exit(adev);