drm/amdgpu: add aca sysfs support
authorYang Wang <kevinyang.wang@amd.com>
Tue, 2 Jan 2024 02:43:23 +0000 (10:43 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 15 Jan 2024 23:35:36 +0000 (18:35 -0500)
add aca sysfs node support

Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index f106a04363ea420793a8f62d13d55cafff65e7dc..8a3c3a49415d73f2f104c05efc6428be95c1347b 100644 (file)
@@ -572,15 +572,45 @@ static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager
        return 0;
 }
 
+static ssize_t aca_sysfs_read(struct device *dev,
+                             struct device_attribute *attr, char *buf)
+{
+       struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr);
+
+       /* NOTE: the aca cache will be auto cleared once read,
+        * So the driver should unify the query entry point, forward request to ras query interface directly */
+       return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data);
+}
+
+static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle)
+{
+       struct device_attribute *aca_attr = &handle->aca_attr;
+
+       snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name);
+       aca_attr->show = aca_sysfs_read;
+       aca_attr->attr.name = handle->attr_name;
+       aca_attr->attr.mode = S_IRUGO;
+       sysfs_attr_init(&aca_attr->attr);
+
+       return sysfs_add_file_to_group(&adev->dev->kobj,
+                                      &aca_attr->attr,
+                                      "ras");
+}
+
 int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
                          const char *name, const struct aca_info *ras_info, void *data)
 {
        struct amdgpu_aca *aca = &adev->aca;
+       int ret;
 
        if (!amdgpu_aca_is_enabled(adev))
                return 0;
 
-       return add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
+       ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
+       if (ret)
+               return ret;
+
+       return add_aca_sysfs(adev, handle);
 }
 
 static void remove_aca(struct aca_handle *handle)
index f6c72371244e9576ae4463ab58b1c98cc5e30a99..6e9a35eda68309a58b6d7d7e5b29579e98b163d8 100644 (file)
@@ -149,6 +149,8 @@ struct aca_handle {
        struct aca_handle_manager *mgr;
        struct aca_error_cache error_cache;
        const struct aca_bank_ops *bank_ops;
+       struct device_attribute aca_attr;
+       char attr_name[64];
        const char *name;
        u32 mask;
        void *data;
index bb93e4e3969f2eb13ec8c4851618c11fcbca9fe2..bc459dff42f3181e6b63f9ffc4eb0653fba7c606 100644 (file)
@@ -1214,6 +1214,21 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu
        return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data);
 }
 
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+                                 struct aca_handle *handle, char *buf, void *data)
+{
+       struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle);
+       struct ras_query_if info = {
+               .head = obj->head,
+       };
+
+       if (amdgpu_ras_query_error_status(obj->adev, &info))
+               return -EINVAL;
+
+       return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+                         "ce", info.ce_count);
+}
+
 static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
                                                struct ras_query_if *info,
                                                struct ras_err_data *err_data,
index 1d568a25f1de464f0ad608d5311bdad00ce08baa..394394d98bc3e3e314c91a029e769221f2b8d849 100644 (file)
@@ -840,4 +840,7 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
                               const struct aca_info *aca_info, void *data);
 int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk);
 
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+                                 struct aca_handle *handle, char *buf, void *data);
+
 #endif