perf: Add branch stack counters
authorKan Liang <kan.liang@linux.intel.com>
Wed, 25 Oct 2023 20:16:19 +0000 (13:16 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 27 Oct 2023 13:05:08 +0000 (15:05 +0200)
Currently, the additional information of a branch entry is stored in a
u64 space. With more and more information added, the space is running
out. For example, the information of occurrences of events will be added
for each branch.

Two places were suggested to append the counters.
https://lore.kernel.org/lkml/20230802215814.GH231007@hirez.programming.kicks-ass.net/
One place is right after the flags of each branch entry. It changes the
existing struct perf_branch_entry. The later ARCH specific
implementation has to be really careful to consistently pick
the right struct.
The other place is right after the entire struct perf_branch_stack.
The disadvantage is that the pointer of the extra space has to be
recorded. The common interface perf_sample_save_brstack() has to be
updated.

The latter is much straightforward, and should be easily understood and
maintained. It is implemented in the patch.

Add a new branch sample type, PERF_SAMPLE_BRANCH_COUNTERS, to indicate
the event which is recorded in the branch info.

The "u64 counters" may store the occurrences of several events. The
information regarding the number of events/counters and the width of
each counter should be exposed via sysfs as a reference for the perf
tool. Define the branch_counter_nr and branch_counter_width ABI here.
The support will be implemented later in the Intel-specific patch.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20231025201626.3000228-1-kan.liang@linux.intel.com
Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
arch/powerpc/perf/core-book3s.c
arch/x86/events/amd/core.c
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
include/linux/perf_event.h
include/uapi/linux/perf_event.h
kernel/events/core.c

index 8757dcf41c0825b80c43b04e377f514bedf980c0..a5f506f7d4819ecee67f4dcdb62f19d3b34e8473 100644 (file)
@@ -16,3 +16,9 @@ Description:
                Example output in powerpc:
                grep . /sys/bus/event_source/devices/cpu/caps/*
                /sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
+
+               The "branch_counter_nr" in the supported platform exposes the
+               maximum number of counters which can be shown in the u64 counters
+               of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width"
+               exposes the width of each counter. Both of them can be used by
+               the perf tool to parse the logged counters in each branch.
index 8c1f7def596e4a581abcef43b383174918076f26..3c14596bbfaf8175e57f1b7b20df0280efb120ea 100644 (file)
@@ -2313,7 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                        struct cpu_hw_events *cpuhw;
                        cpuhw = this_cpu_ptr(&cpu_hw_events);
                        power_pmu_bhrb_read(event, cpuhw);
-                       perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
+                       perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
                }
 
                if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
index e24976593a298a5600dda460c9e81275928343e0..4ee6390b45c903037f81c69e9c9d2ca0b0d22976 100644 (file)
@@ -940,7 +940,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
                        continue;
 
                if (has_branch_stack(event))
-                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
 
                if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
index 40ad1425ffa24bc86cf66d8614436afc934fedb4..40c9af124128d62a43630e7afca35c9c5a083a4f 100644 (file)
@@ -1702,7 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
                perf_sample_data_init(&data, 0, event->hw.last_period);
 
                if (has_branch_stack(event))
-                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
 
                if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
index a08f794a0e79ac4af4ce49bd1dbb01397d0da78c..41a164764a848ff1c39a0b5bb3a9c34962a1b1ab 100644 (file)
@@ -3047,7 +3047,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
                perf_sample_data_init(&data, 0, event->hw.last_period);
 
                if (has_branch_stack(event))
-                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
+                       perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
 
                if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
index bf97ab904d40f79a6e6b539bb8dc6c2611572d4a..cb3f329f8fa4fa5097afa2808c0a6a7e3709f67e 100644 (file)
@@ -1755,7 +1755,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
                setup_pebs_time(event, data, pebs->tsc);
 
        if (has_branch_stack(event))
-               perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+               perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
 }
 
 static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 
                if (has_branch_stack(event)) {
                        intel_pmu_store_pebs_lbrs(lbr);
-                       perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
+                       perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
                }
        }
 
index 0367d748fae0e6f2416603de34535484bdfe3d5d..7897ef06602723eb8d53b0ecc5de42385cb99da4 100644 (file)
@@ -1139,6 +1139,10 @@ static inline bool branch_sample_priv(const struct perf_event *event)
        return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
 }
 
+static inline bool branch_sample_counters(const struct perf_event *event)
+{
+       return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
+}
 
 struct perf_sample_data {
        /*
@@ -1173,6 +1177,7 @@ struct perf_sample_data {
        struct perf_callchain_entry     *callchain;
        struct perf_raw_record          *raw;
        struct perf_branch_stack        *br_stack;
+       u64                             *br_stack_cntr;
        union perf_sample_weight        weight;
        union  perf_mem_data_src        data_src;
        u64                             txn;
@@ -1250,7 +1255,8 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
 
 static inline void perf_sample_save_brstack(struct perf_sample_data *data,
                                            struct perf_event *event,
-                                           struct perf_branch_stack *brs)
+                                           struct perf_branch_stack *brs,
+                                           u64 *brs_cntr)
 {
        int size = sizeof(u64); /* nr */
 
@@ -1258,7 +1264,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
                size += sizeof(u64);
        size += brs->nr * sizeof(struct perf_branch_entry);
 
+       /*
+        * The extension space for counters is appended after the
+        * struct perf_branch_stack. It is used to store the occurrences
+        * of events of each branch.
+        */
+       if (brs_cntr)
+               size += brs->nr * sizeof(u64);
+
        data->br_stack = brs;
+       data->br_stack_cntr = brs_cntr;
        data->dyn_size += size;
        data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
 }
index 39c6a250dd1b92af18e3b4a72a047d2784f89382..4461f380425b732bb9ced41b9d5502c44c08d97a 100644 (file)
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
 
        PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT      = 18, /* save privilege mode */
 
+       PERF_SAMPLE_BRANCH_COUNTERS_SHIFT       = 19, /* save occurrences of events on a branch */
+
        PERF_SAMPLE_BRANCH_MAX_SHIFT            /* non-ABI */
 };
 
@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
 
        PERF_SAMPLE_BRANCH_PRIV_SAVE    = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
 
+       PERF_SAMPLE_BRANCH_COUNTERS     = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+
        PERF_SAMPLE_BRANCH_MAX          = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
@@ -982,6 +986,12 @@ enum perf_event_type {
         *      { u64                   nr;
         *        { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
         *        { u64 from, to, flags } lbr[nr];
+        *        #
+        *        # The format of the counters is decided by the
+        *        # "branch_counter_nr" and "branch_counter_width",
+        *        # which are defined in the ABI.
+        *        #
+        *        { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
         *      } && PERF_SAMPLE_BRANCH_STACK
         *
         *      { u64                   abi; # enum perf_sample_regs_abi
index 3eb26c2c6e655e3dced36ee63a5a51868f8725f9..d27ffd80ed6785ba06cb4d2fcc86a0824ae12625 100644 (file)
@@ -7341,6 +7341,14 @@ void perf_output_sample(struct perf_output_handle *handle,
                        if (branch_sample_hw_index(event))
                                perf_output_put(handle, data->br_stack->hw_idx);
                        perf_output_copy(handle, data->br_stack->entries, size);
+                       /*
+                        * Add the extension space which is appended
+                        * right after the struct perf_branch_stack.
+                        */
+                       if (data->br_stack_cntr) {
+                               size = data->br_stack->nr * sizeof(u64);
+                               perf_output_copy(handle, data->br_stack_cntr, size);
+                       }
                } else {
                        /*
                         * we always store at least the value of nr