drm/amdgpu: Use init level for pending_reset flag
authorLijo Lazar <lijo.lazar@amd.com>
Mon, 19 Aug 2024 07:53:42 +0000 (13:23 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 26 Sep 2024 21:06:18 +0000 (17:06 -0400)
Drop pending_reset flag in gmc block. Instead use init level to
determine which type of init is preferred - in this case MINIMAL.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Acked-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c

index dcab05744f56faf1c937336143bb9b0437a5618e..d162a7d6adc5dde25f94c3f3ed46c75eda73b61c 100644 (file)
@@ -1699,7 +1699,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
        }
 
        /* Don't post if we need to reset whole hive on init */
-       if (adev->gmc.xgmi.pending_reset)
+       if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
                return false;
 
        if (adev->has_hw_reset) {
@@ -3015,7 +3015,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
                amdgpu_ttm_set_buffer_funcs_status(adev, true);
 
        /* Don't init kfd if whole hive need to be reset during init */
-       if (!adev->gmc.xgmi.pending_reset) {
+       if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
                kgd2kfd_init_zone_device(adev);
                amdgpu_amdkfd_device_init(adev);
        }
@@ -3529,14 +3529,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
                }
 
                /* skip unnecessary suspend if we do not initialize them yet */
-               if (adev->gmc.xgmi.pending_reset &&
-                   !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
-                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
-                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
-                       adev->ip_blocks[i].status.hw = false;
+               if (!amdgpu_ip_member_of_hwini(
+                           adev, adev->ip_blocks[i].version->type))
                        continue;
-               }
 
                /* skip suspend of gfx/mes and psp for S0ix
                 * gfx is in gfxoff state, so on resume it will exit gfxoff just
@@ -4351,20 +4346,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
                if (adev->gmc.xgmi.num_physical_nodes) {
                        dev_info(adev->dev, "Pending hive reset.\n");
-                       adev->gmc.xgmi.pending_reset = true;
-                       /* Only need to init necessary block for SMU to handle the reset */
-                       for (i = 0; i < adev->num_ip_blocks; i++) {
-                               if (!adev->ip_blocks[i].status.valid)
-                                       continue;
-                               if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-                                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
-                                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
-                                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
-                                       DRM_DEBUG("IP %s disabled for hw_init.\n",
-                                               adev->ip_blocks[i].version->funcs->name);
-                                       adev->ip_blocks[i].status.hw = true;
-                               }
-                       }
+                       amdgpu_set_init_level(adev,
+                                             AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
                } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
                                   !amdgpu_device_has_display_hardware(adev)) {
                                        r = psp_gpu_reset(adev);
@@ -4472,7 +4455,7 @@ fence_driver_init:
        /* enable clockgating, etc. after ib tests, etc. since some blocks require
         * explicit gating rather than handling it automatically.
         */
-       if (!adev->gmc.xgmi.pending_reset) {
+       if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
                r = amdgpu_device_ip_late_init(adev);
                if (r) {
                        dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
@@ -4549,7 +4532,7 @@ fence_driver_init:
        if (px)
                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
-       if (adev->gmc.xgmi.pending_reset)
+       if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
                queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
                                   msecs_to_jiffies(AMDGPU_RESUME_MS));
 
@@ -5443,7 +5426,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
                list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
                        /* For XGMI run all resets in parallel to speed up the process */
                        if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
-                               tmp_adev->gmc.xgmi.pending_reset = false;
                                if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
                                        r = -EALREADY;
                        } else
index db0763ffeff7d46a9ba772e042e9f9b55ed853fe..69fa3c87dbe18429bd15db46065b63d9fc579e25 100644 (file)
@@ -2512,7 +2512,6 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
        for (i = 0; i < mgpu_info.num_dgpu; i++) {
                adev = mgpu_info.gpu_ins[i].adev;
                flush_work(&adev->xgmi_reset_work);
-               adev->gmc.xgmi.pending_reset = false;
        }
 
        /* reset function will rebuild the xgmi hive info , clear it now */
index 4d951a1baefab7bcd347e1b6a14c07454d513843..33b2adffd58b1062af6c36ed7408d436c64f039d 100644 (file)
@@ -182,7 +182,6 @@ struct amdgpu_xgmi {
        bool supported;
        struct ras_common_if *ras_if;
        bool connected_to_cpu;
-       bool pending_reset;
        struct amdgpu_xgmi_ras *ras;
 };
 
index 4319bca0f6c4b00917b7ef42b26c6c7118856099..6665d9abb27b995aa08b2b62a83eb0cea5b24e45 100644 (file)
@@ -3185,7 +3185,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
         * when the GPU is pending on XGMI reset during probe time
         * (Mostly after second bus reset), skip it now
         */
-       if (adev->gmc.xgmi.pending_reset)
+       if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
                return 0;
        ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
        /*
index 7de449fae1e3ad9354b78207a4e2a8a15984eb42..74135d611cba5100226b0d7c26ce934881d07330 100644 (file)
@@ -860,7 +860,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
        if (!adev->gmc.xgmi.supported)
                return 0;
 
-       if (!adev->gmc.xgmi.pending_reset &&
+       if ((adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
            amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
                ret = psp_xgmi_initialize(&adev->psp, false, true);
                if (ret) {
@@ -907,7 +907,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 
        task_barrier_add_task(&hive->tb);
 
-       if (!adev->gmc.xgmi.pending_reset &&
+       if ((adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
            amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
                list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
                        /* update node list for other device in the hive */
@@ -985,7 +985,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
                }
        }
 
-       if (!ret && !adev->gmc.xgmi.pending_reset)
+       if (!ret && (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI))
                ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
 
 exit_unlock:
index 16fcd9dcd202e02b831635ca1f5412d36a83dc75..8981302b19c8e1b1d539ab05874f6e81f29ebb64 100644 (file)
@@ -1616,7 +1616,8 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
                        break;
                default:
                        if (!ras || !adev->ras_enabled ||
-                           adev->gmc.xgmi.pending_reset) {
+                           (adev->init_lvl->level ==
+                            AMDGPU_INIT_LEVEL_MINIMAL_XGMI)) {
                                if (amdgpu_ip_version(adev, MP1_HWIP, 0) ==
                                    IP_VERSION(11, 0, 2)) {
                                        data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT);