perf: Avoid the read if the count is already updated
authorPeter Zijlstra (Intel) <peterz@infradead.org>
Tue, 21 Jan 2025 15:23:02 +0000 (07:23 -0800)
committerPeter Zijlstra <peterz@infradead.org>
Wed, 5 Feb 2025 09:29:45 +0000 (10:29 +0100)
The event may have been updated in the PMU-specific implementation,
e.g., Intel PEBS counters snapshotting. The common code should not
read and overwrite the value.

The PERF_SAMPLE_READ in the data->sample_type can be used to detect
whether the PMU-specific value is available. If yes, avoid the
pmu->read() in the common code. Add a new flag, skip_read, to track the
case.

Factor out a perf_pmu_read() to clean up the code.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250121152303.3128733-3-kan.liang@linux.intel.com
include/linux/perf_event.h
kernel/events/core.c
kernel/events/ring_buffer.c

index 8333f132f4a96cffda6f7c8eb4b105915f9d9d97..2d07bc1193f3ae718e6952b67f4a166588ccab2c 100644 (file)
@@ -1062,7 +1062,13 @@ struct perf_output_handle {
        struct perf_buffer              *rb;
        unsigned long                   wakeup;
        unsigned long                   size;
-       u64                             aux_flags;
+       union {
+               u64                     flags;          /* perf_output*() */
+               u64                     aux_flags;      /* perf_aux_output*() */
+               struct {
+                       u64             skip_read : 1;
+               };
+       };
        union {
                void                    *addr;
                unsigned long           head;
index bcb09e011e9e112787edcb52b4523e50352115d0..0f8c5599078305b098a7d9ba5445ff1de62d444d 100644 (file)
@@ -1191,6 +1191,12 @@ static void perf_assert_pmu_disabled(struct pmu *pmu)
        WARN_ON_ONCE(*this_cpu_ptr(pmu->pmu_disable_count) == 0);
 }
 
+static inline void perf_pmu_read(struct perf_event *event)
+{
+       if (event->state == PERF_EVENT_STATE_ACTIVE)
+               event->pmu->read(event);
+}
+
 static void get_ctx(struct perf_event_context *ctx)
 {
        refcount_inc(&ctx->refcount);
@@ -3473,8 +3479,7 @@ static void __perf_event_sync_stat(struct perf_event *event,
         * we know the event must be on the current CPU, therefore we
         * don't need to use it.
         */
-       if (event->state == PERF_EVENT_STATE_ACTIVE)
-               event->pmu->read(event);
+       perf_pmu_read(event);
 
        perf_event_update_time(event);
 
@@ -4618,15 +4623,8 @@ static void __perf_event_read(void *info)
 
        pmu->read(event);
 
-       for_each_sibling_event(sub, event) {
-               if (sub->state == PERF_EVENT_STATE_ACTIVE) {
-                       /*
-                        * Use sibling's PMU rather than @event's since
-                        * sibling could be on different (eg: software) PMU.
-                        */
-                       sub->pmu->read(sub);
-               }
-       }
+       for_each_sibling_event(sub, event)
+               perf_pmu_read(sub);
 
        data->ret = pmu->commit_txn(pmu);
 
@@ -7444,9 +7442,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
                values[n++] = running;
 
-       if ((leader != event) &&
-           (leader->state == PERF_EVENT_STATE_ACTIVE))
-               leader->pmu->read(leader);
+       if ((leader != event) && !handle->skip_read)
+               perf_pmu_read(leader);
 
        values[n++] = perf_event_count(leader, self);
        if (read_format & PERF_FORMAT_ID)
@@ -7459,9 +7456,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
        for_each_sibling_event(sub, leader) {
                n = 0;
 
-               if ((sub != event) &&
-                   (sub->state == PERF_EVENT_STATE_ACTIVE))
-                       sub->pmu->read(sub);
+               if ((sub != event) && !handle->skip_read)
+                       perf_pmu_read(sub);
 
                values[n++] = perf_event_count(sub, self);
                if (read_format & PERF_FORMAT_ID)
@@ -7520,6 +7516,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 {
        u64 sample_type = data->type;
 
+       if (data->sample_flags & PERF_SAMPLE_READ)
+               handle->skip_read = 1;
+
        perf_output_put(handle, *header);
 
        if (sample_type & PERF_SAMPLE_IDENTIFIER)
index 180509132d4b68bf11fd5e6825943cf794e7bd7b..59a52b1a1f7899d41e629bd06b8ddfba1f9868e7 100644 (file)
@@ -185,6 +185,7 @@ __perf_output_begin(struct perf_output_handle *handle,
 
        handle->rb    = rb;
        handle->event = event;
+       handle->flags = 0;
 
        have_lost = local_read(&rb->lost);
        if (unlikely(have_lost)) {