perf sched timehist: Show callchains for idle stat
authorNamhyung Kim <namhyung@kernel.org>
Thu, 8 Dec 2016 14:47:55 +0000 (23:47 +0900)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 15 Dec 2016 19:25:45 +0000 (16:25 -0300)
When --idle-hist option is used with --summary, it now shows idle stats
with callchains like below:

  Idle stats by callchain:
  CPU  0:   902.195 msec
  Idle time (msec)    Count Callchains
  ----------------  ------- --------------------------------------------------
           370.589       69 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath
           178.799       17 worker_thread <- kthread <- ret_from_fork
           128.352       17 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork
           125.111       19 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select
            71.599       50 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll
            23.146        1 rcu_gp_kthread <- kthread <- ret_from_fork
             4.510        1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64
             0.085        1 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- do_restart_poll
  ...

Committer notes:

Extra testing:

  # uname -a
  Linux jouet 4.8.8-300.fc25.x86_64 #1 SMP Tue Nov 15 18:10:06 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

1) Run 'perf sched record -g'

2) Run 'perf sched timehist --idle --summary'

<SNIP>
  Idle stats by callchain:
  CPU  0: 13456.840 msec
  Idle time (msec) Count Callchains
  ---------------- ----- --------------------------------------------------
          5386.637  3283 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_poll
          2750.238  2299 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- do_syscall_64
          1275.672  1287 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- entry_SYSCALL_64_fastpath
           936.322   452 worker_thread <- kthread <- ret_from_fork
           741.311   385 rcu_nocb_kthread <- kthread <- ret_from_fork
           729.385   248 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_sys_poll <- sys_ppoll
           365.386   229 irq_thread <- kthread <- ret_from_fork
           338.934   265 futex_wait_queue_me <- futex_wait <- do_futex <- sys_futex <- entry_SYSCALL_64_fastpath
           219.488   201 schedule_timeout <- rcu_gp_kthread <- kthread <- ret_from_fork
           186.839   410 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- ep_poll <- sys_epoll_wait <- do_syscall_64
           142.541    59 kvm_vcpu_block <- kvm_arch_vcpu_ioctl_run <- kvm_vcpu_ioctl <- do_vfs_ioctl <- sys_ioctl
            83.887    92 smpboot_thread_fn <- kthread <- ret_from_fork
            62.722    96 do_exit <- do_group_exit <- 0x2a5594 <- entry_SYSCALL_64_fastpath
            47.894    83 pipe_wait <- pipe_read <- __vfs_read <- vfs_read <- sys_read
            46.554    61 rcu_gp_kthread <- kthread <- ret_from_fork
            34.337    21 schedule_timeout <- intel_fbc_work_fn <- process_one_work <- worker_thread <- kthread
            29.521    14 schedule_hrtimeout_range_clock <- schedule_hrtimeout_range <- poll_schedule_timeout <- do_select <- core_sys_select
            20.274    10 schedule_timeout <- io_schedule_timeout <- bit_wait_io <- __wait_on_bit <- out_of_line_wait_on_bit
            15.085    55 schedule_timeout <- unix_stream_read_generic <- unix_stream_recvmsg <- sock_recvmsg <- SYSC_recvfrom
<SNIP>

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20161208144755.16673-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-sched.c

index 0b14265432df5410116ff679edc5d15b0710e9e3..c1c07bfe132ca978d6dc67ce84cac883d9031810 100644 (file)
@@ -2448,6 +2448,9 @@ static int timehist_sched_change_event(struct perf_tool *tool,
                        last_tr->dt_wait = 0;
                        last_tr->dt_delay = 0;
 
+                       if (itr->cursor.nr)
+                               callchain_append(&itr->callchain, &itr->cursor, t - tprev);
+
                        itr->last_thread = NULL;
                }
        }
@@ -2557,6 +2560,60 @@ static int show_deadthread_runtime(struct thread *t, void *priv)
        return __show_thread_runtime(t, priv);
 }
 
+static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
+{
+       const char *sep = " <- ";
+       struct callchain_list *chain;
+       size_t ret = 0;
+       char bf[1024];
+       bool first;
+
+       if (node == NULL)
+               return 0;
+
+       ret = callchain__fprintf_folded(fp, node->parent);
+       first = (ret == 0);
+
+       list_for_each_entry(chain, &node->val, list) {
+               if (chain->ip >= PERF_CONTEXT_MAX)
+                       continue;
+               if (chain->ms.sym && chain->ms.sym->ignore)
+                       continue;
+               ret += fprintf(fp, "%s%s", first ? "" : sep,
+                              callchain_list__sym_name(chain, bf, sizeof(bf),
+                                                       false));
+               first = false;
+       }
+
+       return ret;
+}
+
+static size_t timehist_print_idlehist_callchain(struct rb_root *root)
+{
+       size_t ret = 0;
+       FILE *fp = stdout;
+       struct callchain_node *chain;
+       struct rb_node *rb_node = rb_first(root);
+
+       printf("  %16s  %8s  %s\n", "Idle time (msec)", "Count", "Callchains");
+       printf("  %.16s  %.8s  %.50s\n", graph_dotted_line, graph_dotted_line,
+              graph_dotted_line);
+
+       while (rb_node) {
+               chain = rb_entry(rb_node, struct callchain_node, rb_node);
+               rb_node = rb_next(rb_node);
+
+               ret += fprintf(fp, "  ");
+               print_sched_time(chain->hit, 12);
+               ret += 16;  /* print_sched_time returns 2nd arg + 4 */
+               ret += fprintf(fp, " %8d  ", chain->count);
+               ret += callchain__fprintf_folded(fp, chain);
+               ret += fprintf(fp, "\n");
+       }
+
+       return ret;
+}
+
 static void timehist_print_summary(struct perf_sched *sched,
                                   struct perf_session *session)
 {
@@ -2615,6 +2672,35 @@ static void timehist_print_summary(struct perf_sched *sched,
                        printf("    CPU %2d idle entire time window\n", i);
        }
 
+       if (sched->idle_hist && symbol_conf.use_callchain) {
+               callchain_param.mode  = CHAIN_FOLDED;
+               callchain_param.value = CCVAL_PERIOD;
+
+               callchain_register_param(&callchain_param);
+
+               printf("\nIdle stats by callchain:\n");
+               for (i = 0; i < idle_max_cpu; ++i) {
+                       struct idle_thread_runtime *itr;
+
+                       t = idle_threads[i];
+                       if (!t)
+                               continue;
+
+                       itr = thread__priv(t);
+                       if (itr == NULL)
+                               continue;
+
+                       callchain_param.sort(&itr->sorted_root, &itr->callchain,
+                                            0, &callchain_param);
+
+                       printf("  CPU %2d:", i);
+                       print_sched_time(itr->tr.total_run_time, 6);
+                       printf(" msec\n");
+                       timehist_print_idlehist_callchain(&itr->sorted_root);
+                       printf("\n");
+               }
+       }
+
        printf("\n"
               "    Total number of unique tasks: %" PRIu64 "\n"
               "Total number of context switches: %" PRIu64 "\n"