perf script: Support -F brstack,dso and brstacksym,dso
authorMark Santaniello <marksan@fb.com>
Mon, 19 Jun 2017 16:38:24 +0000 (09:38 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 20 Jun 2017 01:05:40 +0000 (22:05 -0300)
Perf script can report the dso for "addr" and "ip" fields.

This adds the same support for the "brstack" and "brstacksym" fields.
This can be helpful for AutoFDO: we can ignore LBR entries unless the
source and target address are both in the target module we are about to
build.

I built a small test akin to "while(1) { do_nothing(); }" where the
do_nothing function is loaded from a dso:

  $ cat burncpu.cpp
  #include <dlfcn.h>

  int main() {
    void* handle = dlopen("./dso.so", RTLD_LAZY);
    if (!handle) return -1;

    typedef void (*fp)();
    fp do_nothing = (fp) dlsym(handle, "do_nothing");

    while(1) {
      do_nothing();
    }
  }

  $ cat dso.cpp
  extern "C" void do_nothing() {}

  $ cat build.sh
  #!/bin/bash
  g++ -shared dso.cpp -o dso.so
  g++ burncpu.cpp -o burncpu -ldl

I sampled the execution with perf record -b.  Using the new perf script
functionality I can easily find cases where there was a transition from one
dso to another:

  $ perf record -a -b -- sleep 5
  [ perf record: Woken up 55 times to write data ]
  [ perf record: Captured and wrote 18.815 MB perf.data (43593 samples) ]

  $ perf script -F brstack,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1
  0x7f967139b6aa(/tmp/burncpu/dso.so)/0x4006b1(/tmp/burncpu/exe)/P/-/-/0

  $ perf script -F brstacksym,dso | sed 's/\/0 /\/0\n/g' | grep burncpu | grep dso.so | head -n 1
  do_nothing+0x5(/tmp/burncpu/dso.so)/main+0x44(/tmp/burncpu/exe)/P/-/-/0

Signed-off-by: Mark Santaniello <marksan@fb.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170619163825.2012979-1-marksan@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-script.c

index afa84debc5c4df6645636cf33c050b084747fc8e..3c21089f52736af7162d717bad51487317479cf9 100644 (file)
@@ -298,10 +298,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
                       "selected.\n");
                return -EINVAL;
        }
-       if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
-               pr_err("Display of DSO requested but neither sample IP nor "
-                          "sample address\nis selected. Hence, no addresses to convert "
-                      "to DSO.\n");
+       if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
+           !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM)) {
+               pr_err("Display of DSO requested but none of sample IP, sample address, "
+                      "brstack\nor brstacksym are selected. Hence, no addresses to "
+                      "convert to DSO.\n");
                return -EINVAL;
        }
        if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -514,18 +515,43 @@ mispred_str(struct branch_entry *br)
        return br->flags.predicted ? 'P' : 'M';
 }
 
-static void print_sample_brstack(struct perf_sample *sample)
+static void print_sample_brstack(struct perf_sample *sample,
+                                struct thread *thread,
+                                struct perf_event_attr *attr)
 {
        struct branch_stack *br = sample->branch_stack;
-       u64 i;
+       struct addr_location alf, alt;
+       u64 i, from, to;
 
        if (!(br && br->nr))
                return;
 
        for (i = 0; i < br->nr; i++) {
-               printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ",
-                       br->entries[i].from,
-                       br->entries[i].to,
+               from = br->entries[i].from;
+               to   = br->entries[i].to;
+
+               if (PRINT_FIELD(DSO)) {
+                       memset(&alf, 0, sizeof(alf));
+                       memset(&alt, 0, sizeof(alt));
+                       thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
+                       thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+               }
+
+               printf("0x%"PRIx64, from);
+               if (PRINT_FIELD(DSO)) {
+                       printf("(");
+                       map__fprintf_dsoname(alf.map, stdout);
+                       printf(")");
+               }
+
+               printf("/0x%"PRIx64, to);
+               if (PRINT_FIELD(DSO)) {
+                       printf("(");
+                       map__fprintf_dsoname(alt.map, stdout);
+                       printf(")");
+               }
+
+               printf("/%c/%c/%c/%d ",
                        mispred_str( br->entries + i),
                        br->entries[i].flags.in_tx? 'X' : '-',
                        br->entries[i].flags.abort? 'A' : '-',
@@ -534,7 +560,8 @@ static void print_sample_brstack(struct perf_sample *sample)
 }
 
 static void print_sample_brstacksym(struct perf_sample *sample,
-                                   struct thread *thread)
+                                   struct thread *thread,
+                                   struct perf_event_attr *attr)
 {
        struct branch_stack *br = sample->branch_stack;
        struct addr_location alf, alt;
@@ -559,8 +586,18 @@ static void print_sample_brstacksym(struct perf_sample *sample,
                        alt.sym = map__find_symbol(alt.map, alt.addr);
 
                symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
+               if (PRINT_FIELD(DSO)) {
+                       printf("(");
+                       map__fprintf_dsoname(alf.map, stdout);
+                       printf(")");
+               }
                putchar('/');
                symbol__fprintf_symname_offs(alt.sym, &alt, stdout);
+               if (PRINT_FIELD(DSO)) {
+                       printf("(");
+                       map__fprintf_dsoname(alt.map, stdout);
+                       printf(")");
+               }
                printf("/%c/%c/%c/%d ",
                        mispred_str( br->entries + i),
                        br->entries[i].flags.in_tx? 'X' : '-',
@@ -1187,9 +1224,9 @@ static void process_event(struct perf_script *script,
                print_sample_iregs(sample, attr);
 
        if (PRINT_FIELD(BRSTACK))
-               print_sample_brstack(sample);
+               print_sample_brstack(sample, thread, attr);
        else if (PRINT_FIELD(BRSTACKSYM))
-               print_sample_brstacksym(sample, thread);
+               print_sample_brstacksym(sample, thread, attr);
 
        if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
                print_sample_bpf_output(sample);