perf intel-pt: Do not default to recording all switch events
authorAdrian Hunter <adrian.hunter@intel.com>
Mon, 12 May 2025 09:39:31 +0000 (12:39 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 12 May 2025 17:18:16 +0000 (14:18 -0300)
On systems with many CPUs, recording extra context switch events can be
excessive and unnecessary. Add perf config intel-pt.all-switch-events=false
to control the behaviour.

Example:

 # perf config intel-pt.all-switch-events=false
 # perf record -eintel_pt//u uname
 Linux
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.082 MB perf.data ]
 # perf script -D | grep PERF_RECORD_SWITCH | awk '{print $5}' | uniq -c
       5 PERF_RECORD_SWITCH
 # perf config intel-pt.all-switch-events=true
 # perf record -eintel_pt//u uname
 Linux
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.102 MB perf.data ]
 # perf script -D | grep PERF_RECORD_SWITCH | awk '{print $5}' | uniq -c
     180 PERF_RECORD_SWITCH_CPU_WIDE

Committer testing:

While doing a make -j28 allmodconfig:

  root@five:~# grep "model name" -m1 /proc/cpuinfo
  model name : Intel(R) Core(TM) i7-14700K
  root@five:~#
  root@five:~# perf config intel-pt.all-switch-events=false
  root@five:~# perf record -e intel_pt//u uname
  Linux
  [ perf record: Woken up 2 times to write data ]
  [ perf record: Captured and wrote 0.019 MB perf.data ]
  root@five:~# perf report --stats | grep SWITCH_CPU_WIDE
  root@five:~#
  root@five:~# perf config intel-pt.all-switch-events=true
  root@five:~# perf record -e intel_pt//u uname
  Linux
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.047 MB perf.data ]
  root@five:~# perf report --stats | grep SWITCH_CPU_WIDE
       SWITCH_CPU_WIDE events:        542  (96.4%)
  root@five:~#

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20250512093932.79854-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-config.txt
tools/perf/arch/x86/util/intel-pt.c

index 36ebebc875ea2d0971bdc217b19ed5afa47e9699..c6f33565966735feca1b74d79c6d3127d1d2bdcf 100644 (file)
@@ -708,6 +708,10 @@ intel-pt.*::
                the maximum is exceeded there will be a "Never-ending loop"
                error. The default is 100000.
 
+       intel-pt.all-switch-events::
+               If the user has permission to do so, always record all context
+               switch events on all CPUs.
+
 auxtrace.*::
 
        auxtrace.dumpdir::
index 8f235d8b67b628177125b92880a97486e2d9de83..add33cb5d1da8e76a94c2ccab7268a4b5754996f 100644 (file)
@@ -19,6 +19,7 @@
 #include "../../../util/evlist.h"
 #include "../../../util/evsel.h"
 #include "../../../util/evsel_config.h"
+#include "../../../util/config.h"
 #include "../../../util/cpumap.h"
 #include "../../../util/mmap.h"
 #include <subcmd/parse-options.h>
@@ -52,6 +53,7 @@ struct intel_pt_recording {
        struct perf_pmu                 *intel_pt_pmu;
        int                             have_sched_switch;
        struct evlist           *evlist;
+       bool                            all_switch_events;
        bool                            snapshot_mode;
        bool                            snapshot_init_done;
        size_t                          snapshot_size;
@@ -794,7 +796,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                        bool cpu_wide = !target__none(&opts->target) &&
                                        !target__has_task(&opts->target);
 
-                       if (!cpu_wide && perf_can_record_cpu_wide()) {
+                       if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) {
                                struct evsel *switch_evsel;
 
                                switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
@@ -1178,6 +1180,16 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
        return rdtsc();
 }
 
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+       struct intel_pt_recording *ptr = data;
+
+       if (!strcmp(var, "intel-pt.all-switch-events"))
+               ptr->all_switch_events = perf_config_bool(var, value);
+
+       return 0;
+}
+
 struct auxtrace_record *intel_pt_recording_init(int *err)
 {
        struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
@@ -1197,6 +1209,8 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
                return NULL;
        }
 
+       perf_config(intel_pt_perf_config, ptr);
+
        ptr->intel_pt_pmu = intel_pt_pmu;
        ptr->itr.recording_options = intel_pt_recording_options;
        ptr->itr.info_priv_size = intel_pt_info_priv_size;