drm/amdgpu: Modify sdma block to fit for the unified ras block data and ops
authoryipechai <YiPeng.Chai@amd.com>
Wed, 5 Jan 2022 07:30:37 +0000 (15:30 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Jan 2022 22:52:00 +0000 (17:52 -0500)
1.Modify sdma block to fit for the unified ras block data and ops.
2.Change amdgpu_sdma_ras_funcs to amdgpu_sdma_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of sdma ras variable so that sdma ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register sdma ras block into amdgpu device ras block link list.
5.Remove the redundant code about sdma in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of sdma versions. If .ras_late_init and .ras_fini had been defined by the selected sdma version, the defined functions will take effect; if not defined, default fill them with amdgpu_sdma_ras_late_init and amdgpu_sdma_ras_fini.

v2: squash in warning fix (Alex)

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h

index fba1c415a2a8331d3d84bf1effed4f35c9bcd634..7c21eab95fc88f584dfdf8753f583bc83ff8cb58 100644 (file)
@@ -967,7 +967,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
        struct amdgpu_ras_block_object* block_obj = NULL;
        struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
        struct ras_err_data err_data = {0, 0, 0, NULL};
-       int i;
 
        if (!obj)
                return -EINVAL;
@@ -979,12 +978,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                amdgpu_ras_get_ecc_info(adev, &err_data);
                break;
        case AMDGPU_RAS_BLOCK__SDMA:
-               if (adev->sdma.funcs->query_ras_error_count) {
-                       for (i = 0; i < adev->sdma.num_instances; i++)
-                               adev->sdma.funcs->query_ras_error_count(adev, i,
-                                                                       &err_data);
-               }
-               break;
        case AMDGPU_RAS_BLOCK__GFX:
        case AMDGPU_RAS_BLOCK__MMHUB:
                if (!block_obj || !block_obj->hw_ops)   {
@@ -1090,9 +1083,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
                        block_obj->hw_ops->reset_ras_error_status(adev);
                break;
        case AMDGPU_RAS_BLOCK__SDMA:
-               if (adev->sdma.funcs->reset_ras_error_count)
-                       adev->sdma.funcs->reset_ras_error_count(adev);
-               break;
        case AMDGPU_RAS_BLOCK__HDP:
                if (!block_obj || !block_obj->hw_ops)   {
                        dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block));
index f8fb755e3aa64e42f1bff0830b61ed279abd58cf..eaee12ab651811dd04bba35723c313e526e2f560 100644 (file)
@@ -23,6 +23,7 @@
 
 #ifndef __AMDGPU_SDMA_H__
 #define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
 
 /* max number of IP instances */
 #define AMDGPU_MAX_SDMA_INSTANCES              8
@@ -50,13 +51,8 @@ struct amdgpu_sdma_instance {
        bool                    burst_nop;
 };
 
-struct amdgpu_sdma_ras_funcs {
-       int (*ras_late_init)(struct amdgpu_device *adev,
-                       void *ras_ih_info);
-       void (*ras_fini)(struct amdgpu_device *adev);
-       int (*query_ras_error_count)(struct amdgpu_device *adev,
-                       uint32_t instance, void *ras_error_status);
-       void (*reset_ras_error_count)(struct amdgpu_device *adev);
+struct amdgpu_sdma_ras {
+       struct amdgpu_ras_block_object ras_block;
 };
 
 struct amdgpu_sdma {
@@ -73,7 +69,7 @@ struct amdgpu_sdma {
        uint32_t                    srbm_soft_reset;
        bool                    has_page_queue;
        struct ras_common_if    *ras_if;
-       const struct amdgpu_sdma_ras_funcs      *funcs;
+       struct amdgpu_sdma_ras  *ras;
 };
 
 /*
index e8e4749e9c7972491325f5dae960f6495d774555..3c1483dc113e87ec0f72e9e3b5b6129586f9eba0 100644 (file)
@@ -1892,13 +1892,13 @@ static int sdma_v4_0_late_init(void *handle)
        sdma_v4_0_setup_ulv(adev);
 
        if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
-               if (adev->sdma.funcs &&
-                   adev->sdma.funcs->reset_ras_error_count)
-                       adev->sdma.funcs->reset_ras_error_count(adev);
+               if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
+                   adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
+                       adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
        }
 
-       if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
-               return adev->sdma.funcs->ras_late_init(adev, &ih_info);
+       if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init)
+               return adev->sdma.ras->ras_block.ras_late_init(adev, &ih_info);
        else
                return 0;
 }
@@ -2001,8 +2001,9 @@ static int sdma_v4_0_sw_fini(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int i;
 
-       if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
-               adev->sdma.funcs->ras_fini(adev);
+       if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
+               adev->sdma.ras->ras_block.ras_fini)
+               adev->sdma.ras->ras_block.ras_fini(adev);
 
        for (i = 0; i < adev->sdma.num_instances; i++) {
                amdgpu_ring_fini(&adev->sdma.instance[i].ring);
@@ -2740,7 +2741,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value,
        }
 }
 
-static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev,
                        uint32_t instance, void *ras_error_status)
 {
        struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -2762,6 +2763,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
        return 0;
 };
 
+static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
+{
+       int i = 0;
+       for (i = 0; i < adev->sdma.num_instances; i++) {
+               if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status))
+               {
+                       dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i);
+                       return;
+               }
+       }
+}
+
 static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
 {
        int i;
@@ -2773,26 +2786,45 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
        }
 }
 
-static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
-       .ras_late_init = amdgpu_sdma_ras_late_init,
-       .ras_fini = amdgpu_sdma_ras_fini,
+const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = {
        .query_ras_error_count = sdma_v4_0_query_ras_error_count,
        .reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
 };
 
+static struct amdgpu_sdma_ras sdma_v4_0_ras = {
+       .ras_block = {
+               .hw_ops = &sdma_v4_0_ras_hw_ops,
+       },
+};
+
 static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
 {
        switch (adev->ip_versions[SDMA0_HWIP][0]) {
        case IP_VERSION(4, 2, 0):
        case IP_VERSION(4, 2, 2):
-               adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+               adev->sdma.ras = &sdma_v4_0_ras;
                break;
        case IP_VERSION(4, 4, 0):
-               adev->sdma.funcs = &sdma_v4_4_ras_funcs;
+               adev->sdma.ras = &sdma_v4_4_ras;
                break;
        default:
                break;
        }
+
+       if (adev->sdma.ras) {
+               amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block);
+
+               strcpy(adev->sdma.ras->ras_block.name,"sdma");
+               adev->sdma.ras->ras_block.block = AMDGPU_RAS_BLOCK__SDMA;
+
+               /* If don't define special ras_late_init function, use default ras_late_init */
+               if (!adev->sdma.ras->ras_block.ras_late_init)
+                       adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init;
+
+               /* If don't define special ras_fini function, use default ras_fini */
+               if (!adev->sdma.ras->ras_block.ras_fini)
+                       adev->sdma.ras->ras_block.ras_fini = amdgpu_sdma_ras_fini;
+       }
 }
 
 const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
index bf95007f08432cb96fce5a8b8b04381de6d97a69..5c1ba1116e5ce07ec1f1cf9f60232391407d5797 100644 (file)
@@ -188,7 +188,7 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
        }
 }
 
-static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,
+static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
                                           uint32_t instance,
                                           void *ras_error_status)
 {
@@ -245,9 +245,26 @@ static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
        }
 }
 
-const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs = {
-       .ras_late_init = amdgpu_sdma_ras_late_init,
-       .ras_fini = amdgpu_sdma_ras_fini,
+static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
+{
+       int i = 0;
+       for (i = 0; i < adev->sdma.num_instances; i++) {
+               if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status))
+               {
+                       dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i);
+                       return;
+               }
+       }
+
+}
+
+const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
        .query_ras_error_count = sdma_v4_4_query_ras_error_count,
        .reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
 };
+
+struct amdgpu_sdma_ras sdma_v4_4_ras = {
+       .ras_block = {
+               .hw_ops = &sdma_v4_4_ras_hw_ops,
+       },
+};
index 74a6e5b5e949ae997203768474989475a78e53c3..a9f0c68359e0c1d7e05d36ad8dc3859d3e5a0e3d 100644 (file)
@@ -23,6 +23,6 @@
 #ifndef __SDMA_V4_4_H__
 #define __SDMA_V4_4_H__
 
-extern const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs;
+extern struct amdgpu_sdma_ras sdma_v4_4_ras;
 
 #endif