perf record: Add BPF event filter support

author Namhyung Kim <namhyung@kernel.org>

Tue, 14 Mar 2023 23:42:30 +0000 (16:42 -0700)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Wed, 15 Mar 2023 14:08:34 +0000 (11:08 -0300)
author Namhyung Kim <namhyung@kernel.org>
Tue, 14 Mar 2023 23:42:30 +0000 (16:42 -0700)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Wed, 15 Mar 2023 14:08:34 +0000 (11:08 -0300)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt

index ff815c2f67e8c0475137a786931d6615108e9e3b..122f71726eaab5cdd8b0336704c0ddfa3bc722f9 100644 (file)
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -119,9 +119,12 @@ OPTIONS
           "perf report" to view group events together.
  
  --filter=<filter>::
-        Event filter. This option should follow an event selector (-e) which
-       selects either tracepoint event(s) or a hardware trace PMU
-       (e.g. Intel PT or CoreSight).
+       Event filter.  This option should follow an event selector (-e).
+       If the event is a tracepoint, the filter string will be parsed by
+       the kernel.  If the event is a hardware trace PMU (e.g. Intel PT
+       or CoreSight), it'll be processed as an address filter.  Otherwise
+       it means a general filter using BPF which can be applied for any
+       kind of event.
  
         - tracepoint filters
  
@@ -176,6 +179,12 @@ OPTIONS
  
         Multiple filters can be separated with space or comma.
  
+       - bpf filters
+
+       A BPF filter can access the sample data and make a decision based on the
+       data.  Users need to set an appropriate sample type to use the BPF
+       filter.
+
  --exclude-perf::
         Don't record events issued by perf itself. This option should follow
         an event selector (-e) which selects tracepoint event(s). It adds a
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c

index aa78a15a6f0aaa04d7c0b36601025640f22ac1d8..1b77436e067e28e5726aaaa40d65925b0b9a58a6 100644 (file)
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -763,8 +763,7 @@ extern struct bpf_counter_ops bperf_cgrp_ops;
  
  static inline bool bpf_counter_skip(struct evsel *evsel)
  {
-       return list_empty(&evsel->bpf_counter_list) &&
-               evsel->follower_skel == NULL;
+       return evsel->bpf_counter_ops == NULL;
  }
  
  int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

index b74e12239aec50980106ddf6010129dfa5b00967..cc491a03783683984867956ce24fb6d1ecdfe9d9 100644 (file)
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -31,6 +31,7 @@
  #include "util/evlist-hybrid.h"
  #include "util/pmu.h"
  #include "util/sample.h"
+#include "util/bpf-filter.h"
  #include <signal.h>
  #include <unistd.h>
  #include <sched.h>
@@ -1086,17 +1087,27 @@ int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
         int err = 0;
  
         evlist__for_each_entry(evlist, evsel) {
-               if (evsel->filter == NULL)
-                       continue;
-
                 /*
                  * filters only work for tracepoint event, which doesn't have cpu limit.
                  * So evlist and evsel should always be same.
                  */
-               err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
-               if (err) {
-                       *err_evsel = evsel;
-                       break;
+               if (evsel->filter) {
+                       err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
+                       if (err) {
+                               *err_evsel = evsel;
+                               break;
+                       }
+               }
+
+               /*
+                * non-tracepoint events can have BPF filters.
+                */
+               if (!list_empty(&evsel->bpf_filters)) {
+                       err = perf_bpf_filter__prepare(evsel);
+                       if (err) {
+                               *err_evsel = evsel;
+                               break;
+                       }
                 }
         }
  
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c

index a83d8cd5eb51012c3acf293dcc992c26365f43d0..dc3faf005c3bd77e76e675fb10ae47d8078a21e7 100644 (file)
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -50,6 +50,7 @@
  #include "off_cpu.h"
  #include "../perf-sys.h"
  #include "util/parse-branch-options.h"
+#include "util/bpf-filter.h"
  #include <internal/xyarray.h>
  #include <internal/lib.h>
  #include <internal/threadmap.h>
@@ -1517,6 +1518,7 @@ void evsel__exit(struct evsel *evsel)
         assert(list_empty(&evsel->core.node));
         assert(evsel->evlist == NULL);
         bpf_counter__destroy(evsel);
+       perf_bpf_filter__destroy(evsel);
         evsel__free_counts(evsel);
         perf_evsel__free_fd(&evsel->core);
         perf_evsel__free_id(&evsel->core);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c

index 3b2e5bb3e852733fec6540559909f9844bad23a2..6c5cf524448604816247d9fd9808590466b71014 100644 (file)
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -28,6 +28,7 @@
  #include "perf.h"
  #include "util/parse-events-hybrid.h"
  #include "util/pmu-hybrid.h"
+#include "util/bpf-filter.h"
  #include "tracepoint.h"
  #include "thread_map.h"
  
@@ -2542,11 +2543,8 @@ static int set_filter(struct evsel *evsel, const void *arg)
                 perf_pmu__scan_file(pmu, "nr_addr_filters",
                                     "%d", &nr_addr_filters);
  
-       if (!nr_addr_filters) {
-               fprintf(stderr,
-                       "This CPU does not support address filtering\n");
-               return -1;
-       }
+       if (!nr_addr_filters)
+               return perf_bpf_filter__parse(&evsel->bpf_filters, str);
  
         if (evsel__append_addr_filter(evsel, str) < 0) {
                 fprintf(stderr,
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c

index be336f1b2b68960285651dff06f86622bde857e9..0faea4c75eede79cceaa53a37d98587d771442d4 100644 (file)
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -19,6 +19,7 @@
  #include "mmap.h"
  #include "stat.h"
  #include "metricgroup.h"
+#include "util/bpf-filter.h"
  #include "util/env.h"
  #include "util/pmu.h"
  #include <internal/lib.h>
@@ -135,6 +136,19 @@ int bpf_counter__disable(struct evsel *evsel __maybe_unused)
         return 0;
  }
  
+// not to drag util/bpf-filter.c
+#ifdef HAVE_BPF_SKEL
+int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused)
+{
+       return 0;
+}
+
+int perf_bpf_filter__destroy(struct evsel *evsel __maybe_unused)
+{
+       return 0;
+}
+#endif
+
  /*
   * Support debug printing even though util/debug.c is not linked.  That means
   * implementing 'verbose' and 'eprintf'.
author	Namhyung Kim <namhyung@kernel.org>
	Tue, 14 Mar 2023 23:42:30 +0000 (16:42 -0700)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Wed, 15 Mar 2023 14:08:34 +0000 (11:08 -0300)
tools/perf/Documentation/perf-record.txt		patch \| blob \| blame \| history
tools/perf/util/bpf_counter.c		patch \| blob \| blame \| history
tools/perf/util/evlist.c		patch \| blob \| blame \| history
tools/perf/util/evsel.c		patch \| blob \| blame \| history
tools/perf/util/parse-events.c		patch \| blob \| blame \| history
tools/perf/util/python.c		patch \| blob \| blame \| history