perf ftrace: Fix latency stats with BPF
authorNamhyung Kim <namhyung@kernel.org>
Thu, 27 Feb 2025 19:12:21 +0000 (11:12 -0800)
committerNamhyung Kim <namhyung@kernel.org>
Thu, 13 Mar 2025 07:18:10 +0000 (00:18 -0700)
When BPF collects the stats for the latency in usec, it first divides
the time by 1000.  But that means it would have 0 if the delta is small
and won't update the total time properly.

Let's keep the stats in nsec always and adjust to usec before printing.

Before:

  $ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1
  #   DURATION     |      COUNT | GRAPH                                          |
       0 -    1 us |        765 | #############################################  |
       1 -    2 us |         10 |                                                |
       2 -    4 us |          2 |                                                |
       4 -    8 us |          5 |                                                |

  # statistics  (in usec)
    total time:                    0    <<<--- (here)
      avg time:                    0
      max time:                    6
      min time:                    0
         count:                  782

After:

  $ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1
  #   DURATION     |      COUNT | GRAPH                                          |
       0 -    1 us |        880 | ############################################   |
       1 -    2 us |         13 |                                                |
       2 -    4 us |          8 |                                                |
       4 -    8 us |          3 |                                                |

  # statistics  (in usec)
    total time:                  268    <<<--- (here)
      avg time:                    0
      max time:                    6
      min time:                    0
         count:                  904

Tested-by: Athira Rajeev <atrajeev@linux.ibm.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Link: https://lore.kernel.org/r/20250227191223.1288473-1-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/util/bpf_ftrace.c
tools/perf/util/bpf_skel/func_latency.bpf.c

index 51f407a782d6c58af06f6deb09afe9de227ac2db..7324668cc83e747e9590567551fa28eabf7b34be 100644 (file)
@@ -128,7 +128,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
        return 0;
 }
 
-int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
                                  int buckets[], struct stats *stats)
 {
        int i, fd, err;
@@ -158,6 +158,12 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
                stats->n = skel->bss->count;
                stats->max = skel->bss->max;
                stats->min = skel->bss->min;
+
+               if (!ftrace->use_nsec) {
+                       stats->mean /= 1000;
+                       stats->max /= 1000;
+                       stats->min /= 1000;
+               }
        }
 
        free(hist);
index 09e70d40a0f4d8550a53d54c86eba156f388b8f4..3d3d9f427c20876e634f8d8bef2e65cfe0013b09 100644 (file)
@@ -102,6 +102,7 @@ int BPF_PROG(func_end)
        start = bpf_map_lookup_elem(&functime, &tid);
        if (start) {
                __s64 delta = bpf_ktime_get_ns() - *start;
+               __u64 val = delta;
                __u32 key = 0;
                __u64 *hist;
 
@@ -111,26 +112,24 @@ int BPF_PROG(func_end)
                        return 0;
 
                if (bucket_range != 0) {
-                       delta /= cmp_base;
+                       val = delta / cmp_base;
 
                        if (min_latency > 0) {
-                               if (delta > min_latency)
-                                       delta -= min_latency;
+                               if (val > min_latency)
+                                       val -= min_latency;
                                else
                                        goto do_lookup;
                        }
 
                        // Less than 1 unit (ms or ns), or, in the future,
                        // than the min latency desired.
-                       if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
-                               // clang 12 doesn't like s64 / u32 division
-                               key = (__u64)delta / bucket_range + 1;
+                       if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+                               key = val / bucket_range + 1;
                                if (key >= bucket_num ||
-                                       delta >= max_latency - min_latency)
+                                       val >= max_latency - min_latency)
                                        key = bucket_num - 1;
                        }
 
-                       delta += min_latency;
                        goto do_lookup;
                }
                // calculate index using delta
@@ -146,10 +145,7 @@ do_lookup:
 
                *hist += 1;
 
-               if (bucket_range == 0)
-                       delta /= cmp_base;
-
-               __sync_fetch_and_add(&total, delta);
+               __sync_fetch_and_add(&total, delta); // always in nsec
                __sync_fetch_and_add(&count, 1);
 
                if (delta > max)