perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag
authorKan Liang <kan.liang@linux.intel.com>
Wed, 25 Oct 2023 20:16:20 +0000 (13:16 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 27 Oct 2023 13:05:09 +0000 (15:05 +0200)
Currently, branch_sample_type !=0 is used to check whether a branch
stack setup is required. But it doesn't check the sample type,
unnecessary branch stack setup may be done for a counting event. E.g.,
perf record -e "{branch-instructions,branch-misses}:S" -j any
Also, the event only with the new PERF_SAMPLE_BRANCH_COUNTERS branch
sample type may not require a branch stack setup either.

Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
a branch stack setup. Replace the needs_branch_stack() by checking the
new flag.

The counting event check is implemented here. The later patch will take
the new PERF_SAMPLE_BRANCH_COUNTERS into account.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20231025201626.3000228-2-kan.liang@linux.intel.com
arch/x86/events/intel/core.c
arch/x86/events/perf_event_flags.h

index 41a164764a848ff1c39a0b5bb3a9c34962a1b1ab..a99449c0d77c610966a2eabf9ff1409bd4037cc3 100644 (file)
@@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
                perf_report_aux_output_id(event, idx);
 }
 
+static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+       return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+}
+
 static void intel_pmu_del_event(struct perf_event *event)
 {
-       if (needs_branch_stack(event))
+       if (intel_pmu_needs_branch_stack(event))
                intel_pmu_lbr_del(event);
        if (event->attr.precise_ip)
                intel_pmu_pebs_del(event);
@@ -2820,7 +2825,7 @@ static void intel_pmu_add_event(struct perf_event *event)
 {
        if (event->attr.precise_ip)
                intel_pmu_pebs_add(event);
-       if (needs_branch_stack(event))
+       if (intel_pmu_needs_branch_stack(event))
                intel_pmu_lbr_add(event);
 }
 
@@ -3897,7 +3902,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
                        x86_pmu.pebs_aliases(event);
        }
 
-       if (needs_branch_stack(event)) {
+       if (needs_branch_stack(event) && is_sampling_event(event))
+               event->hw.flags  |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+       if (intel_pmu_needs_branch_stack(event)) {
                ret = intel_pmu_setup_lbr_filter(event);
                if (ret)
                        return ret;
index 1dc19b9b4426abde8e1b005a7561e8f97b5a3995..a1685981c5206ef0fcd493a9fc4154cf5963bf5d 100644 (file)
@@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN,            0x04000) /* Count Topdown slots/metrics events */
 PERF_ARCH(PEBS_STLAT,          0x08000) /* st+stlat data address sampling */
 PERF_ARCH(AMD_BRS,             0x10000) /* AMD Branch Sampling */
 PERF_ARCH(PEBS_LAT_HYBRID,     0x20000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK,  0x40000) /* require branch stack setup */