perf ftrace latency: Constify control data for BPF
authorNamhyung Kim <namhyung@kernel.org>
Mon, 2 Sep 2024 20:05:12 +0000 (13:05 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 3 Sep 2024 14:47:02 +0000 (11:47 -0300)
The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.

Committer testing:

  root@x1:~# perf ftrace latency --use-bpf -T schedule
  ^C#   DURATION     |      COUNT | GRAPH                                          |
       0 - 1    us |          0 |                                                |
       1 - 2    us |          0 |                                                |
       2 - 4    us |          0 |                                                |
       4 - 8    us |          0 |                                                |
       8 - 16   us |          1 |                                                |
      16 - 32   us |          5 |                                                |
      32 - 64   us |          2 |                                                |
      64 - 128  us |          6 |                                                |
     128 - 256  us |          7 |                                                |
     256 - 512  us |          5 |                                                |
     512 - 1024 us |         22 | #                                              |
       1 - 2    ms |         36 | ##                                             |
       2 - 4    ms |         68 | #####                                          |
       4 - 8    ms |         22 | #                                              |
       8 - 16   ms |         91 | #######                                        |
      16 - 32   ms |         11 |                                                |
      32 - 64   ms |         26 | ##                                             |
      64 - 128  ms |        213 | #################                              |
     128 - 256  ms |         19 | #                                              |
     256 - 512  ms |         14 | #                                              |
     512 - 1024 ms |          5 |                                                |
       1 - ...   s |          8 |                                                |
  root@x1:~#

  root@x1:~# perf trace -e bpf perf ftrace latency --use-bpf -T schedule
     0.000 ( 0.015 ms): perf/2944525 bpf(cmd: 36, uattr: 0x7ffe80de7b40, size: 8)                          = -1 EOPNOTSUPP (Operation not supported)
     0.025 ( 0.102 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7870, size: 148)                 = 8
     0.136 ( 0.026 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7930, size: 148)                 = 8
     0.174 ( 0.026 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de77e0, size: 148)                 = 8
     0.205 ( 0.010 ms): perf/2944525 bpf(uattr: 0x7ffe80de7990, size: 80)                                  = 8
     0.227 ( 0.011 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7810, size: 40)                   = 8
     0.244 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40)                   = 8
     0.257 ( 0.006 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7660, size: 40)                   = 8
     0.265 ( 0.058 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7730, size: 148)                 = 9
     0.330 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78e0, size: 40)                   = 8
     0.337 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7890, size: 40)                   = 8
     0.343 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40)                   = 8
     0.349 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78b0, size: 40)                   = 8
     0.355 ( 0.004 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7890, size: 40)                   = 8
     0.361 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de78b0, size: 40)                   = 8
     0.367 ( 0.003 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7880, size: 40)                   = 8
     0.373 ( 0.014 ms): perf/2944525 bpf(cmd: BTF_LOAD, uattr: 0x7ffe80de7a00, size: 40)                   = 8
     0.390 ( 0.358 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80)                                  = 9
     0.763 ( 0.014 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80)                                  = 9
     0.783 ( 0.011 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80)                                  = 9
     0.798 ( 0.017 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80)                                  = 9
     0.819 ( 0.003 ms): perf/2944525 bpf(uattr: 0x7ffe80de7700, size: 80)                                  = 9
     0.824 ( 0.047 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de76c0, size: 148)                 = 10
     0.878 ( 0.008 ms): perf/2944525 bpf(uattr: 0x7ffe80de7950, size: 80)                                  = 9
     0.891 ( 0.014 ms): perf/2944525 bpf(cmd: MAP_UPDATE_ELEM, uattr: 0x7ffe80de79e0, size: 32)            = 0
     0.910 ( 0.103 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7880, size: 148)                 = 9
     1.016 ( 0.143 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7880, size: 148)                 = 10
     3.777 ( 0.068 ms): perf/2944525 bpf(cmd: PROG_LOAD, uattr: 0x7ffe80de7570, size: 148)                 = 12
     3.848 ( 0.003 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de7550, size: 64)                = -1 EBADF (Bad file descriptor)
     3.859 ( 0.006 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de77c0, size: 64)                = 12
     6.504 ( 0.010 ms): perf/2944525 bpf(cmd: LINK_CREATE, uattr: 0x7ffe80de77c0, size: 64)                = 14
^C#   DURATION     |      COUNT | GRAPH                                          |
     0 - 1    us |          0 |                                                |
     1 - 2    us |          0 |                                                |
     2 - 4    us |          1 |                                                |
     4 - 8    us |          3 |                                                |
     8 - 16   us |          3 |                                                |
    16 - 32   us |         11 |                                                |
    32 - 64   us |          9 |                                                |
    64 - 128  us |         17 |                                                |
   128 - 256  us |         30 | #                                              |
   256 - 512  us |         20 |                                                |
   512 - 1024 us |         42 | #                                              |
     1 - 2    ms |        151 | ######                                         |
     2 - 4    ms |        106 | ####                                           |
     4 - 8    ms |         18 |                                                |
     8 - 16   ms |        149 | ######                                         |
    16 - 32   ms |         30 | #                                              |
    32 - 64   ms |         17 |                                                |
    64 - 128  ms |        360 | ###############                                |
   128 - 256  ms |         52 | ##                                             |
   256 - 512  ms |         18 |                                                |
   512 - 1024 ms |         28 | #                                              |
     1 - ...   s |          5 |                                                |
  root@x1:~#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/bpf_ftrace.c
tools/perf/util/bpf_skel/func_latency.bpf.c

index 7a4297d8fd2ce9250caa848475aa0bf9b68e79c8..06d1c4018407a26590e82c389376da4001a85329 100644 (file)
@@ -40,13 +40,17 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
        if (ftrace->target.cpu_list) {
                ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
                bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+               skel->rodata->has_cpu = 1;
        }
 
        if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
                ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
                bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+               skel->rodata->has_task = 1;
        }
 
+       skel->rodata->use_nsec = ftrace->use_nsec;
+
        set_max_rlimit();
 
        err = func_latency_bpf__load(skel);
@@ -59,7 +63,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
                u32 cpu;
                u8 val = 1;
 
-               skel->bss->has_cpu = 1;
                fd = bpf_map__fd(skel->maps.cpu_filter);
 
                for (i = 0; i < ncpus; i++) {
@@ -72,7 +75,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
                u32 pid;
                u8 val = 1;
 
-               skel->bss->has_task = 1;
                fd = bpf_map__fd(skel->maps.task_filter);
 
                for (i = 0; i < ntasks; i++) {
@@ -81,8 +83,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
                }
        }
 
-       skel->bss->use_nsec = ftrace->use_nsec;
-
        skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
                                                            false, func->name);
        if (IS_ERR(skel->links.func_begin)) {
index 9d01e3af747922ca4a69b2dd5f8e430efb506fd7..f613dc9cb123480cf64493ec2b23f389415da3a9 100644 (file)
@@ -37,9 +37,10 @@ struct {
 
 
 int enabled = 0;
-int has_cpu = 0;
-int has_task = 0;
-int use_nsec = 0;
+
+const volatile int has_cpu = 0;
+const volatile int has_task = 0;
+const volatile int use_nsec = 0;
 
 SEC("kprobe/func")
 int BPF_PROG(func_begin)