drm/amd/pm: add new data in metrics table
authorKenneth Feng <kenneth.feng@amd.com>
Fri, 5 Mar 2021 21:41:45 +0000 (16:41 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 Mar 2021 03:00:28 +0000 (23:00 -0400)
Export new data in the metrics table for gfx and memory
utilization counter, and each hbm temperature as well.

v2:
change the metrics table version to v1.1

v3:
fix the coding style
v4:
rebase against latest kernel

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Kevin Wang <kevin1.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/include/kgd_pp_interface.h
drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c

index 7d5b71e6501998885522dc8fcdb3676056c662e0..79e309a789f3cbdaf0c70b64a248c3804fc3367f 100644 (file)
@@ -231,6 +231,8 @@ enum pp_df_cstate {
 #define XGMI_MODE_PSTATE_D3 0
 #define XGMI_MODE_PSTATE_D0 1
 
+#define NUM_HBM_INSTANCES 4
+
 struct seq_file;
 enum amd_pp_clock_type;
 struct amd_pp_simple_clock_info;
@@ -449,6 +451,11 @@ struct gpu_metrics_v1_1 {
        uint16_t                        pcie_link_speed; // in 0.1 GT/s
 
        uint16_t                        padding;
+
+       uint32_t                        gfx_activity_acc;
+       uint32_t                        mem_activity_acc;
+
+       uint16_t                        temperature_hbm[NUM_HBM_INSTANCES];
 };
 
 /*
index ef9dad9a51ffae0651aec259a43a12a79845afd8..80208e1eefc95b82c9852e730c1d322c37bd8f6d 100644 (file)
@@ -265,8 +265,6 @@ int smu_v13_0_get_current_pcie_link_speed_level(struct smu_context *smu);
 
 int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu);
 
-void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics);
-
 int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
                              bool enablement);
 
index c463af1cafa034f9c0db367f8c9d697ef33669e3..c746cf4f8ea4049306d8cfe1b12b39bbaeb835f1 100644 (file)
@@ -1296,10 +1296,10 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
                                         void **table)
 {
        struct smu_table_context *smu_table = &smu->smu_table;
-       struct gpu_metrics_v1_0 *gpu_metrics =
-               (struct gpu_metrics_v1_0 *)smu_table->gpu_metrics_table;
+       struct gpu_metrics_v1_1 *gpu_metrics =
+               (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
        SmuMetrics_t metrics;
-       int ret = 0;
+       int i, ret = 0;
 
        ret = smu_cmn_get_metrics_table(smu,
                                        &metrics,
@@ -1307,7 +1307,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
        if (ret)
                return ret;
 
-       smu_v13_0_init_gpu_metrics_v1_0(gpu_metrics);
+       smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 1);
 
        gpu_metrics->temperature_edge = metrics.TemperatureEdge;
        gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
@@ -1318,12 +1318,16 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
 
        gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
        gpu_metrics->average_umc_activity = metrics.AverageUclkActivity;
+       gpu_metrics->average_mm_activity = 0;
 
        gpu_metrics->average_socket_power = metrics.AverageSocketPower;
+       gpu_metrics->energy_accumulator = 0;
 
        gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
        gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency;
        gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequency;
+       gpu_metrics->average_vclk0_frequency = 0;
+       gpu_metrics->average_dclk0_frequency = 0;
 
        gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
        gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
@@ -1333,14 +1337,24 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
 
        gpu_metrics->throttle_status = metrics.ThrottlerStatus;
 
+       gpu_metrics->current_fan_speed = 0;
+
        gpu_metrics->pcie_link_width =
                smu_v13_0_get_current_pcie_link_width(smu);
        gpu_metrics->pcie_link_speed =
                aldebaran_get_current_pcie_link_speed(smu);
 
+       gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
+
+       gpu_metrics->gfx_activity_acc = metrics.GfxBusyAcc;
+       gpu_metrics->mem_activity_acc = metrics.DramBusyAcc;
+
+       for (i = 0; i < NUM_HBM_INSTANCES; i++)
+               gpu_metrics->temperature_hbm[i] = metrics.TemperatureAllHBM[i];
+
        *table = (void *)gpu_metrics;
 
-       return sizeof(struct gpu_metrics_v1_0);
+       return sizeof(struct gpu_metrics_v1_1);
 }
 
 static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu)
index ce160f233323397503c5944418d55078260a2f0d..bd3a9c89dc440c963730601d0b3494b14e79ed60 100644 (file)
@@ -1809,14 +1809,3 @@ int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu)
        return link_speed[speed_level];
 }
 
-void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics)
-{
-       memset(gpu_metrics, 0xFF, sizeof(struct gpu_metrics_v1_0));
-
-       gpu_metrics->common_header.structure_size =
-               sizeof(struct gpu_metrics_v1_0);
-       gpu_metrics->common_header.format_revision = 1;
-       gpu_metrics->common_header.content_revision = 0;
-
-       gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
-}