tools/bpftool: add perf subcommand
authorYonghong Song <yhs@fb.com>
Thu, 24 May 2018 18:21:58 +0000 (11:21 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 25 May 2018 01:18:20 +0000 (18:18 -0700)
The new command "bpftool perf [show | list]" will traverse
all processes under /proc, and if any fd is associated
with a perf event, it will print out related perf event
information. Documentation is also added.

Below is an example to show the results using bcc commands.
Running the following 4 bcc commands:
  kprobe:     trace.py '__x64_sys_nanosleep'
  kretprobe:  trace.py 'r::__x64_sys_nanosleep'
  tracepoint: trace.py 't:syscalls:sys_enter_nanosleep'
  uprobe:     trace.py 'p:/home/yhs/a.out:main'

The bpftool command line and result:

  $ bpftool perf
  pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
  pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
  pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
  pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159

  $ bpftool -j perf
  [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
   {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
   {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
   {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]

  $ bpftool prog
  5: kprobe  name probe___x64_sys  tag e495a0c82f2c7a8d  gpl
  loaded_at 2018-05-15T04:46:37-0700  uid 0
  xlated 200B  not jited  memlock 4096B  map_ids 4
  7: kprobe  name probe___x64_sys  tag f2fdee479a503abf  gpl
  loaded_at 2018-05-15T04:48:32-0700  uid 0
  xlated 200B  not jited  memlock 4096B  map_ids 7
  8: tracepoint  name tracepoint__sys  tag 5390badef2395fcf  gpl
  loaded_at 2018-05-15T04:48:48-0700  uid 0
  xlated 200B  not jited  memlock 4096B  map_ids 8
  9: kprobe  name probe_main_1  tag 0a87bdc2e2953b6d  gpl
  loaded_at 2018-05-15T04:49:52-0700  uid 0
  xlated 200B  not jited  memlock 4096B  map_ids 9

  $ ps ax | grep "python ./trace.py"
  21711 pts/0    T      0:03 python ./trace.py __x64_sys_write
  21765 pts/0    S+     0:00 python ./trace.py r::__x64_sys_nanosleep
  21767 pts/2    S+     0:00 python ./trace.py t:syscalls:sys_enter_nanosleep
  21800 pts/3    S+     0:00 python ./trace.py p:/home/yhs/a.out:main
  22374 pts/1    S+     0:00 grep --color=auto python ./trace.py

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
tools/bpf/bpftool/Documentation/bpftool-perf.rst [new file with mode: 0644]
tools/bpf/bpftool/Documentation/bpftool.rst
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/main.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/perf.c [new file with mode: 0644]

diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644 (file)
index 0000000..e3eb0ea
--- /dev/null
@@ -0,0 +1,81 @@
+================
+bpftool-perf
+================
+-------------------------------------------------------------------------------
+tool for inspection of perf related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+       **bpftool** [*OPTIONS*] **perf** *COMMAND*
+
+       *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+       *COMMANDS* :=
+       { **show** | **list** | **help** }
+
+PERF COMMANDS
+=============
+
+|      **bpftool** **perf { show | list }**
+|      **bpftool** **perf help**
+
+DESCRIPTION
+===========
+       **bpftool perf { show | list }**
+                 List all raw_tracepoint, tracepoint, kprobe attachment in the system.
+
+                 Output will start with process id and file descriptor in that process,
+                 followed by bpf program id, attachment information, and attachment point.
+                 The attachment point for raw_tracepoint/tracepoint is the trace probe name.
+                 The attachment point for k[ret]probe is either symbol name and offset,
+                 or a kernel virtual address.
+                 The attachment point for u[ret]probe is the file name and the file offset.
+
+       **bpftool perf help**
+                 Print short help message.
+
+OPTIONS
+=======
+       -h, --help
+                 Print short generic help message (similar to **bpftool help**).
+
+       -v, --version
+                 Print version number (similar to **bpftool version**).
+
+       -j, --json
+                 Generate JSON output. For commands that cannot produce JSON, this
+                 option has no effect.
+
+       -p, --pretty
+                 Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool perf**
+
+::
+
+      pid 21711  fd 5: prog_id 5  kprobe  func __x64_sys_write  offset 0
+      pid 21765  fd 5: prog_id 7  kretprobe  func __x64_sys_nanosleep  offset 0
+      pid 21767  fd 5: prog_id 8  tracepoint  sys_enter_nanosleep
+      pid 21800  fd 5: prog_id 9  uprobe  filename /home/yhs/a.out  offset 1159
+
+|
+| **# bpftool -j perf**
+
+::
+
+    [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
+     {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
+     {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
+     {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
+
+
+SEE ALSO
+========
+       **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
index 564cb0d9692b630e58f56509c318d2064c0258fb..b6f5d560460d5554d99df1f979faf6d5e49c7200 100644 (file)
@@ -16,7 +16,7 @@ SYNOPSIS
 
        **bpftool** **version**
 
-       *OBJECT* := { **map** | **program** | **cgroup** }
+       *OBJECT* := { **map** | **program** | **cgroup** | **perf** }
 
        *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
        | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -30,6 +30,8 @@ SYNOPSIS
 
        *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
+       *PERF-COMMANDS* := { **show** | **list** | **help** }
+
 DESCRIPTION
 ===========
        *bpftool* allows for inspection and simple modification of BPF objects
@@ -56,3 +58,4 @@ OPTIONS
 SEE ALSO
 ========
        **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+        **bpftool-perf**\ (8)
index b301c9b315f1e5b11100abb124bc60bc2730f219..7bc198d60de2d0b0dcd606a1303212ffe0791a73 100644 (file)
@@ -448,6 +448,15 @@ _bpftool()
                     ;;
             esac
             ;;
+        perf)
+            case $command in
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help \
+                            show list' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
     esac
 } &&
 complete -F _bpftool bpftool
index 1ec852d21d441b7015830def2ee8fdabba564bfe..eea7f14355f3273f2885e370cbd5a47a99d0bded 100644 (file)
@@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
                "       %s batch file FILE\n"
                "       %s version\n"
                "\n"
-               "       OBJECT := { prog | map | cgroup }\n"
+               "       OBJECT := { prog | map | cgroup | perf }\n"
                "       " HELP_SPEC_OPTIONS "\n"
                "",
                bin_name, bin_name, bin_name);
@@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
        { "prog",       do_prog },
        { "map",        do_map },
        { "cgroup",     do_cgroup },
+       { "perf",       do_perf },
        { "version",    do_version },
        { 0 }
 };
index 6173cd997e7a41670990dad3b637acb343a135ec..63fdb310b9a4aceddad6f5c935c4075d3f9b522e 100644 (file)
@@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
 int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644 (file)
index 0000000..ac6b1a1
--- /dev/null
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <yhs@fb.com>
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+/* 0: undecided, 1: supported, 2: not supported */
+static int perf_query_supported;
+static bool has_perf_query_support(void)
+{
+       __u64 probe_offset, probe_addr;
+       __u32 len, prog_id, fd_type;
+       char buf[256];
+       int fd;
+
+       if (perf_query_supported)
+               goto out;
+
+       fd = open(bin_name, O_RDONLY);
+       if (fd < 0) {
+               p_err("perf_query_support: %s", strerror(errno));
+               goto out;
+       }
+
+       /* the following query will fail as no bpf attachment,
+        * the expected errno is ENOTSUPP
+        */
+       errno = 0;
+       len = sizeof(buf);
+       bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
+                         &fd_type, &probe_offset, &probe_addr);
+
+       if (errno == 524 /* ENOTSUPP */) {
+               perf_query_supported = 1;
+               goto close_fd;
+       }
+
+       perf_query_supported = 2;
+       p_err("perf_query_support: %s", strerror(errno));
+       fprintf(stderr,
+               "HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
+
+close_fd:
+       close(fd);
+out:
+       return perf_query_supported == 1;
+}
+
+static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
+                           char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+       jsonw_start_object(json_wtr);
+       jsonw_int_field(json_wtr, "pid", pid);
+       jsonw_int_field(json_wtr, "fd", fd);
+       jsonw_uint_field(json_wtr, "prog_id", prog_id);
+       switch (fd_type) {
+       case BPF_FD_TYPE_RAW_TRACEPOINT:
+               jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
+               jsonw_string_field(json_wtr, "tracepoint", buf);
+               break;
+       case BPF_FD_TYPE_TRACEPOINT:
+               jsonw_string_field(json_wtr, "fd_type", "tracepoint");
+               jsonw_string_field(json_wtr, "tracepoint", buf);
+               break;
+       case BPF_FD_TYPE_KPROBE:
+               jsonw_string_field(json_wtr, "fd_type", "kprobe");
+               if (buf[0] != '\0') {
+                       jsonw_string_field(json_wtr, "func", buf);
+                       jsonw_lluint_field(json_wtr, "offset", probe_offset);
+               } else {
+                       jsonw_lluint_field(json_wtr, "addr", probe_addr);
+               }
+               break;
+       case BPF_FD_TYPE_KRETPROBE:
+               jsonw_string_field(json_wtr, "fd_type", "kretprobe");
+               if (buf[0] != '\0') {
+                       jsonw_string_field(json_wtr, "func", buf);
+                       jsonw_lluint_field(json_wtr, "offset", probe_offset);
+               } else {
+                       jsonw_lluint_field(json_wtr, "addr", probe_addr);
+               }
+               break;
+       case BPF_FD_TYPE_UPROBE:
+               jsonw_string_field(json_wtr, "fd_type", "uprobe");
+               jsonw_string_field(json_wtr, "filename", buf);
+               jsonw_lluint_field(json_wtr, "offset", probe_offset);
+               break;
+       case BPF_FD_TYPE_URETPROBE:
+               jsonw_string_field(json_wtr, "fd_type", "uretprobe");
+               jsonw_string_field(json_wtr, "filename", buf);
+               jsonw_lluint_field(json_wtr, "offset", probe_offset);
+               break;
+       }
+       jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
+                            char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+       printf("pid %d  fd %d: prog_id %u  ", pid, fd, prog_id);
+       switch (fd_type) {
+       case BPF_FD_TYPE_RAW_TRACEPOINT:
+               printf("raw_tracepoint  %s\n", buf);
+               break;
+       case BPF_FD_TYPE_TRACEPOINT:
+               printf("tracepoint  %s\n", buf);
+               break;
+       case BPF_FD_TYPE_KPROBE:
+               if (buf[0] != '\0')
+                       printf("kprobe  func %s  offset %llu\n", buf,
+                              probe_offset);
+               else
+                       printf("kprobe  addr %llu\n", probe_addr);
+               break;
+       case BPF_FD_TYPE_KRETPROBE:
+               if (buf[0] != '\0')
+                       printf("kretprobe  func %s  offset %llu\n", buf,
+                              probe_offset);
+               else
+                       printf("kretprobe  addr %llu\n", probe_addr);
+               break;
+       case BPF_FD_TYPE_UPROBE:
+               printf("uprobe  filename %s  offset %llu\n", buf, probe_offset);
+               break;
+       case BPF_FD_TYPE_URETPROBE:
+               printf("uretprobe  filename %s  offset %llu\n", buf,
+                      probe_offset);
+               break;
+       }
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+                    int tflag, struct FTW *ftwbuf)
+{
+       __u64 probe_offset, probe_addr;
+       __u32 len, prog_id, fd_type;
+       int err, pid = 0, fd = 0;
+       const char *pch;
+       char buf[4096];
+
+       /* prefix always /proc */
+       pch = fpath + 5;
+       if (*pch == '\0')
+               return 0;
+
+       /* pid should be all numbers */
+       pch++;
+       while (isdigit(*pch)) {
+               pid = pid * 10 + *pch - '0';
+               pch++;
+       }
+       if (*pch == '\0')
+               return 0;
+       if (*pch != '/')
+               return FTW_SKIP_SUBTREE;
+
+       /* check /proc/<pid>/fd directory */
+       pch++;
+       if (strncmp(pch, "fd", 2))
+               return FTW_SKIP_SUBTREE;
+       pch += 2;
+       if (*pch == '\0')
+               return 0;
+       if (*pch != '/')
+               return FTW_SKIP_SUBTREE;
+
+       /* check /proc/<pid>/fd/<fd_num> */
+       pch++;
+       while (isdigit(*pch)) {
+               fd = fd * 10 + *pch - '0';
+               pch++;
+       }
+       if (*pch != '\0')
+               return FTW_SKIP_SUBTREE;
+
+       /* query (pid, fd) for potential perf events */
+       len = sizeof(buf);
+       err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
+                               &probe_offset, &probe_addr);
+       if (err < 0)
+               return 0;
+
+       if (json_output)
+               print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
+                               probe_addr);
+       else
+               print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
+                                probe_addr);
+
+       return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+       int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+       int err = 0, nopenfd = 16;
+
+       if (!has_perf_query_support())
+               return -1;
+
+       if (json_output)
+               jsonw_start_array(json_wtr);
+       if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+               p_err("%s", strerror(errno));
+               err = -1;
+       }
+       if (json_output)
+               jsonw_end_array(json_wtr);
+
+       return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+       fprintf(stderr,
+               "Usage: %s %s { show | list | help }\n"
+               "",
+               bin_name, argv[-2]);
+
+       return 0;
+}
+
+static const struct cmd cmds[] = {
+       { "show",       do_show },
+       { "list",       do_show },
+       { "help",       do_help },
+       { 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+       return cmd_select(cmds, argc, argv, do_help);
+}