perf trace: Add --print-sample
authorArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 22 Jan 2018 14:38:54 +0000 (11:38 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 25 Jan 2018 09:37:29 +0000 (06:37 -0300)
To help with debugging, like the interrupted out of order issue that
will be dealt with in the next patch in this series, changing the code
to deal with:

raw_syscalls:sys_enter 411967179.269 Timer 9609/9626 [2]
raw_syscalls:sys_enter 411967179.213 file:// Content 9609/9609 [3]
   328.038 (18446744073709.496 ms): Timer/9626 futex(uaddr: 0x7fc0d4027044, op: WAIT|PRIV, utime: 0x7fc0b0ffdb50     ) ...
 raw_syscalls:sys_exit 411967179.225 file:// Content 9609/9609 [3]
   327.982 ( 0.012 ms): file:// Conten/9609 futex(uaddr: 0x7fc0d4027040, op: WAKE|PRIV, val: 1                    ) = 1

That long duration is the bug.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-fljqiibjn7wet24jd1ed7abc@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-trace.txt
tools/perf/builtin-trace.c

index 6909cf1e0eea293669432685009e94b9d6166355..33a88e984e660153c1f16e6dbf46c270248b7a84 100644 (file)
@@ -163,6 +163,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
         Implies '--call-graph dwarf' when --call-graph not present on the
         command line, on systems where DWARF unwinding was built in.
 
+--print-sample::
+       Print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info for the
+       raw_syscalls:sys_{enter,exit} tracepoints, for debugging.
+
 --proc-map-timeout::
        When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
        because the file may be huge. A time out is needed in such cases.
index 7dece5e0cdbb465938d06453079aba26c92c4885..322c2b15e4071d4fd30c4930bbfefd826df0c8c2 100644 (file)
@@ -110,6 +110,7 @@ struct trace {
        bool                    summary;
        bool                    summary_only;
        bool                    show_comm;
+       bool                    print_sample;
        bool                    show_tool_stats;
        bool                    trace_syscalls;
        bool                    kernel_syscallchains;
@@ -1578,6 +1579,23 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp
        return printed;
 }
 
+static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
+                                struct perf_sample *sample, struct thread *thread)
+{
+       int printed = 0;
+
+       if (trace->print_sample) {
+               double ts = (double)sample->time / NSEC_PER_MSEC;
+
+               printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
+                                  perf_evsel__name(evsel), ts,
+                                  thread__comm_str(thread),
+                                  sample->pid, sample->tid, sample->cpu);
+       }
+
+       return printed;
+}
+
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                            union perf_event *event __maybe_unused,
                            struct perf_sample *sample)
@@ -1598,6 +1616,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
        if (ttrace == NULL)
                goto out_put;
 
+       trace__fprintf_sample(trace, evsel, sample, thread);
+
        args = perf_evsel__sc_tp_ptr(evsel, args, sample);
 
        if (ttrace->entry_str == NULL) {
@@ -1688,6 +1708,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        if (ttrace == NULL)
                goto out_put;
 
+       trace__fprintf_sample(trace, evsel, sample, thread);
+
        if (trace->summary)
                thread__update_stats(ttrace, id, sample);
 
@@ -3034,6 +3056,8 @@ int cmd_trace(int argc, const char **argv)
                     "Set the maximum stack depth when parsing the callchain, "
                     "anything beyond the specified depth will be ignored. "
                     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+       OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
+                       "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
                        "per thread proc mmap processing timeout in ms"),
        OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,