drm/xe/xe_pmu: Add PMU support for engine activity
authorRiana Tauro <riana.tauro@intel.com>
Mon, 24 Feb 2025 05:39:01 +0000 (11:09 +0530)
committerLucas De Marchi <lucas.demarchi@intel.com>
Mon, 24 Feb 2025 21:23:57 +0000 (13:23 -0800)
PMU provides two counters (engine-active-ticks, engine-total-ticks)
to calculate engine activity. When querying engine activity,
user must group these 2 counters using the perf_event
group mechanism to ensure both counters are sampled together.

To list the events

./perf list
  xe_0000_03_00.0/engine-active-ticks/ [Kernel PMU event]
  xe_0000_03_00.0/engine-total-ticks/ [Kernel PMU event]

The formats to be used with the above are

engine_instance - config:12-19
engine_class - config:20-27
gt - config:60-63

The events can then be read using perf tool

./perf stat -e xe_0000_03_00.0/engine-active-ticks,gt=0,
       engine_class=0,engine_instance=0/,
       xe_0000_03_00.0/engine-total-ticks,gt=0,
       engine_class=0,engine_instance=0/ -I 1000

Engine activity can then be calculated as below
engine activity % = (engine active ticks/engine total ticks) * 100

v2: validate gt
    rename total-ticks to engine-total-ticks
    add helper to get hwe (Umesh)

v3: fix checkpatch warning
    add details to documentation (Umesh)
    remove ascii formats from documentation (Lucas)

v4: remove unnecessary warn within raw_spinlock (Lucas)

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250224053903.2253539-5-riana.tauro@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
drivers/gpu/drm/xe/xe_guc.c
drivers/gpu/drm/xe/xe_pmu.c
drivers/gpu/drm/xe/xe_uc.c

index 1619c0a52db938358f2978b6e2daa0b0b27d0140..bc1ff0a4e1e7d5202850d8cac654536288549b34 100644 (file)
@@ -27,6 +27,7 @@
 #include "xe_guc_capture.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_db_mgr.h"
+#include "xe_guc_engine_activity.h"
 #include "xe_guc_hwconfig.h"
 #include "xe_guc_log.h"
 #include "xe_guc_pc.h"
@@ -744,6 +745,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
        if (ret)
                return ret;
 
+       ret = xe_guc_engine_activity_init(guc);
+       if (ret)
+               return ret;
+
        ret = xe_guc_buf_cache_init(&guc->buf);
        if (ret)
                return ret;
index 3910a82328ee3522c5df0215aec0dd68dbfa3969..d2c035c1924ed9ab1f15c71705030d009b4e2c64 100644 (file)
@@ -8,15 +8,16 @@
 
 #include "xe_device.h"
 #include "xe_gt_idle.h"
+#include "xe_guc_engine_activity.h"
+#include "xe_hw_engine.h"
 #include "xe_pm.h"
 #include "xe_pmu.h"
 
 /**
  * DOC: Xe PMU (Performance Monitoring Unit)
  *
- * Expose events/counters like GT-C6 residency and GT frequency to user land via
- * the perf interface. Events are per device. The GT can be selected with an
- * extra config sub-field (bits 60-63).
+ * Expose events/counters like GT-C6 residency, GT frequency and per-class-engine
+ * activity to user land via the perf interface. Events are per device.
  *
  * All events are listed in sysfs:
  *
  *     $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/events/
  *     $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/format/
  *
- * The format directory has info regarding the configs that can be used.
+ * The following format parameters are available to read events,
+ * but only few are valid with each event:
+ *
+ *     gt[60:63]               Selects gt for the event
+ *     engine_class[20:27]     Selects engine-class for event
+ *     engine_instance[12:19]  Selects the engine-instance for the event
+ *
+ * For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be
+ * set as populated by DRM_XE_DEVICE_QUERY_ENGINES.
+ *
+ * For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0.
+ *
  * The standard perf tool can be used to grep for a certain event as well.
  * Example:
  *
  *     $ perf stat -e <event_name,gt=> -I <interval>
  */
 
-#define XE_PMU_EVENT_GT_MASK           GENMASK_ULL(63, 60)
-#define XE_PMU_EVENT_ID_MASK           GENMASK_ULL(11, 0)
+#define XE_PMU_EVENT_GT_MASK                   GENMASK_ULL(63, 60)
+#define XE_PMU_EVENT_ENGINE_CLASS_MASK         GENMASK_ULL(27, 20)
+#define XE_PMU_EVENT_ENGINE_INSTANCE_MASK      GENMASK_ULL(19, 12)
+#define XE_PMU_EVENT_ID_MASK                   GENMASK_ULL(11, 0)
 
 static unsigned int config_to_event_id(u64 config)
 {
        return FIELD_GET(XE_PMU_EVENT_ID_MASK, config);
 }
 
+static unsigned int config_to_engine_class(u64 config)
+{
+       return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config);
+}
+
+static unsigned int config_to_engine_instance(u64 config)
+{
+       return FIELD_GET(XE_PMU_EVENT_ENGINE_INSTANCE_MASK, config);
+}
+
 static unsigned int config_to_gt_id(u64 config)
 {
        return FIELD_GET(XE_PMU_EVENT_GT_MASK, config);
 }
 
-#define XE_PMU_EVENT_GT_C6_RESIDENCY   0x01
+#define XE_PMU_EVENT_GT_C6_RESIDENCY           0x01
+#define XE_PMU_EVENT_ENGINE_ACTIVE_TICKS       0x02
+#define XE_PMU_EVENT_ENGINE_TOTAL_TICKS                0x03
 
 static struct xe_gt *event_to_gt(struct perf_event *event)
 {
@@ -58,6 +84,24 @@ static struct xe_gt *event_to_gt(struct perf_event *event)
        return xe_device_get_gt(xe, gt);
 }
 
+static struct xe_hw_engine *event_to_hwe(struct perf_event *event)
+{
+       struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+       struct drm_xe_engine_class_instance eci;
+       u64 config = event->attr.config;
+       struct xe_hw_engine *hwe;
+
+       eci.engine_class = config_to_engine_class(config);
+       eci.engine_instance = config_to_engine_instance(config);
+       eci.gt_id = config_to_gt_id(config);
+
+       hwe = xe_hw_engine_lookup(xe, eci);
+       if (!hwe || xe_hw_engine_is_reserved(hwe))
+               return NULL;
+
+       return hwe;
+}
+
 static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
                            unsigned int id)
 {
@@ -68,6 +112,35 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
                pmu->supported_events & BIT_ULL(id);
 }
 
+static bool event_param_valid(struct perf_event *event)
+{
+       struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+       unsigned int engine_class, engine_instance;
+       u64 config = event->attr.config;
+       struct xe_gt *gt;
+
+       gt = xe_device_get_gt(xe, config_to_gt_id(config));
+       if (!gt)
+               return false;
+
+       engine_class = config_to_engine_class(config);
+       engine_instance = config_to_engine_instance(config);
+
+       switch (config_to_event_id(config)) {
+       case XE_PMU_EVENT_GT_C6_RESIDENCY:
+               if (engine_class || engine_instance)
+                       return false;
+               break;
+       case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
+       case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
+               if (!event_to_hwe(event))
+                       return false;
+               break;
+       }
+
+       return true;
+}
+
 static void xe_pmu_event_destroy(struct perf_event *event)
 {
        struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
@@ -104,6 +177,9 @@ static int xe_pmu_event_init(struct perf_event *event)
        if (has_branch_stack(event))
                return -EOPNOTSUPP;
 
+       if (!event_param_valid(event))
+               return -ENOENT;
+
        if (!event->parent) {
                drm_dev_get(&xe->drm);
                xe_pm_runtime_get(xe);
@@ -113,6 +189,20 @@ static int xe_pmu_event_init(struct perf_event *event)
        return 0;
 }
 
+static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event)
+{
+       struct xe_hw_engine *hwe;
+       u64 val = 0;
+
+       hwe = event_to_hwe(event);
+       if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
+               val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe);
+       else
+               val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe);
+
+       return val;
+}
+
 static u64 __xe_pmu_event_read(struct perf_event *event)
 {
        struct xe_gt *gt = event_to_gt(event);
@@ -123,6 +213,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
        switch (config_to_event_id(event->attr.config)) {
        case XE_PMU_EVENT_GT_C6_RESIDENCY:
                return xe_gt_idle_residency_msec(&gt->gtidle);
+       case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
+       case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
+               return read_engine_events(gt, event);
        }
 
        return 0;
@@ -207,11 +300,15 @@ static void xe_pmu_event_del(struct perf_event *event, int flags)
        xe_pmu_event_stop(event, PERF_EF_UPDATE);
 }
 
-PMU_FORMAT_ATTR(gt,    "config:60-63");
-PMU_FORMAT_ATTR(event, "config:0-11");
+PMU_FORMAT_ATTR(gt,                    "config:60-63");
+PMU_FORMAT_ATTR(engine_class,          "config:20-27");
+PMU_FORMAT_ATTR(engine_instance,       "config:12-19");
+PMU_FORMAT_ATTR(event,                 "config:0-11");
 
 static struct attribute *pmu_format_attrs[] = {
        &format_attr_event.attr,
+       &format_attr_engine_class.attr,
+       &format_attr_engine_instance.attr,
        &format_attr_gt.attr,
        NULL,
 };
@@ -270,6 +367,8 @@ static ssize_t event_attr_show(struct device *dev,
        XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr)
 
 XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms");
+XE_EVENT_ATTR_NOUNIT(engine-active-ticks, engine_active_ticks, XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
+XE_EVENT_ATTR_NOUNIT(engine-total-ticks, engine_total_ticks, XE_PMU_EVENT_ENGINE_TOTAL_TICKS);
 
 static struct attribute *pmu_empty_event_attrs[] = {
        /* Empty - all events are added as groups with .attr_update() */
@@ -283,15 +382,23 @@ static const struct attribute_group pmu_events_attr_group = {
 
 static const struct attribute_group *pmu_events_attr_update[] = {
        &pmu_group_gt_c6_residency,
+       &pmu_group_engine_active_ticks,
+       &pmu_group_engine_total_ticks,
        NULL,
 };
 
 static void set_supported_events(struct xe_pmu *pmu)
 {
        struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
+       struct xe_gt *gt = xe_device_get_gt(xe, 0);
 
        if (!xe->info.skip_guc_pc)
                pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY);
+
+       if (xe_guc_engine_activity_supported(&gt->uc.guc)) {
+               pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
+               pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_TOTAL_TICKS);
+       }
 }
 
 /**
index d8167e818280b3eff52ecdb872faa625a6fe895f..c14bd22820441cf2bf1a533d43a3a2cd51a27f58 100644 (file)
@@ -14,6 +14,7 @@
 #include "xe_gt_sriov_vf.h"
 #include "xe_guc.h"
 #include "xe_guc_pc.h"
+#include "xe_guc_engine_activity.h"
 #include "xe_huc.h"
 #include "xe_sriov.h"
 #include "xe_uc_fw.h"
@@ -210,6 +211,8 @@ int xe_uc_init_hw(struct xe_uc *uc)
        if (ret)
                return ret;
 
+       xe_guc_engine_activity_enable_stats(&uc->guc);
+
        /* We don't fail the driver load if HuC fails to auth, but let's warn */
        ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
        xe_gt_assert(uc_to_gt(uc), !ret);