perf sched timehist: Add --prio option
authorYang Jihong <yangjihong@bytedance.com>
Mon, 19 Aug 2024 03:30:16 +0000 (11:30 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 3 Sep 2024 18:45:59 +0000 (15:45 -0300)
The --prio option is used to only show events for the given task priority(ies).
The default is to show events for all priority tasks, which is consistent with
the previous behavior.

Testcase:
  # perf sched record nice -n 9 perf bench sched messaging -l 10000
  # Running 'sched/messaging' benchmark:
  # 20 sender and receiver processes per group
  # 10 groups == 400 processes run

       Total time: 3.435 [sec]
  [ perf record: Woken up 270 times to write data ]
  [ perf record: Captured and wrote 618.688 MB perf.data (5729036 samples) ]

  # perf sched timehist -h

   Usage: perf sched timehist [<options>]

      -C, --cpu <cpu>       list of cpus to profile
      -D, --dump-raw-trace  dump raw trace in ASCII
      -f, --force           don't complain, do it
      -g, --call-graph      Display call chains if present (default on)
      -I, --idle-hist       Show idle events only
      -i, --input <file>    input file name
      -k, --vmlinux <file>  vmlinux pathname
      -M, --migrations      Show migration events
      -n, --next            Show next task
      -p, --pid <pid[,pid...]>
                            analyze events only for given process id(s)
      -s, --summary         Show only syscall summary with statistics
      -S, --with-summary    Show all syscalls and summary with statistics
      -t, --tid <tid[,tid...]>
                            analyze events only for given thread id(s)
      -V, --cpu-visual      Add CPU visual
      -v, --verbose         be more verbose (show symbol address, etc)
      -w, --wakeups         Show wakeup events
          --kallsyms <file>
                            kallsyms pathname
          --max-stack <n>   Maximum number of functions to display backtrace.
          --prio <prio>     analyze events only for given task priority(ies)
          --show-prio       Show task priority
          --state           Show task state when sched-out
          --symfs <directory>
                            Look for files with symbols relative to this directory
          --time <str>      Time span for analysis (start,stop)

  # perf sched timehist --prio 140
  Samples of sched_switch event do not have callchains.
  Invalid prio string

  # perf sched timehist --show-prio --prio 129
  Samples of sched_switch event do not have callchains.
             time    cpu  task name                       prio      wait time  sch delay   run time
                          [tid/pid]                                    (msec)     (msec)     (msec)
  --------------- ------  ------------------------------  --------  ---------  ---------  ---------
   2090450.765421 [0002]  sched-messaging[1229618]        129           0.000      0.000      0.029
   2090450.765445 [0007]  sched-messaging[1229616]        129           0.000      0.062      0.043
   2090450.765448 [0014]  sched-messaging[1229619]        129           0.000      0.000      0.032
   2090450.765478 [0013]  sched-messaging[1229617]        129           0.000      0.065      0.048
   2090450.765503 [0014]  sched-messaging[1229622]        129           0.000      0.000      0.017
   2090450.765550 [0002]  sched-messaging[1229624]        129           0.000      0.000      0.021
   2090450.765562 [0007]  sched-messaging[1229621]        129           0.000      0.071      0.028
   2090450.765570 [0005]  sched-messaging[1229620]        129           0.000      0.064      0.066
   2090450.765583 [0001]  sched-messaging[1229625]        129           0.000      0.001      0.031
   2090450.765595 [0013]  sched-messaging[1229623]        129           0.000      0.060      0.028
   2090450.765637 [0014]  sched-messaging[1229628]        129           0.000      0.000      0.019
   2090450.765665 [0007]  sched-messaging[1229627]        129           0.000      0.038      0.030
  <SNIP>

  # perf sched timehist --show-prio --prio 0,120-129
  Samples of sched_switch event do not have callchains.
             time    cpu  task name                       prio      wait time  sch delay   run time
                          [tid/pid]                                    (msec)     (msec)     (msec)
  --------------- ------  ------------------------------  --------  ---------  ---------  ---------
   2090450.763231 [0000]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763235 [0000]  migration/0[15]                 0             0.000      0.001      0.003
   2090450.763263 [0001]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763268 [0001]  migration/1[21]                 0             0.000      0.001      0.004
   2090450.763302 [0002]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763309 [0002]  migration/2[27]                 0             0.000      0.001      0.007
   2090450.763338 [0003]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763343 [0003]  migration/3[33]                 0             0.000      0.001      0.004
   2090450.763459 [0004]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763469 [0004]  migration/4[39]                 0             0.000      0.002      0.010
   2090450.763496 [0005]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763501 [0005]  migration/5[45]                 0             0.000      0.001      0.004
   2090450.763613 [0006]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763622 [0006]  migration/6[51]                 0             0.000      0.001      0.008
   2090450.763652 [0007]  perf[1229608]                   120           0.000      0.000      0.000
   2090450.763660 [0007]  migration/7[57]                 0             0.000      0.001      0.008
  <SNIP>
   2090450.765665 [0001]  <idle>                          120           0.031      0.031      0.081
   2090450.765665 [0007]  sched-messaging[1229627]        129           0.000      0.038      0.030
   2090450.765667 [0000]  s1-perf[8235/7168]              120           0.008      0.000      0.004
   2090450.765684 [0013]  <idle>                          120           0.028      0.028      0.088
   2090450.765685 [0001]  sched-messaging[1229630]        129           0.000      0.001      0.020
   2090450.765688 [0000]  <idle>                          120           0.004      0.004      0.020
   2090450.765689 [0002]  <idle>                          120           0.021      0.021      0.138
   2090450.765691 [0005]  sched-messaging[1229626]        129           0.000      0.085      0.029

Signed-off-by: Yang Jihong <yangjihong@bytedance.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240819033016.2427235-3-yangjihong@bytedance.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-sched.txt
tools/perf/builtin-sched.c

index 3efa5c58418d3937c38dee5275de0c7323242589..3db64954a267a39892515b0785568b0b6d2df595 100644 (file)
@@ -215,6 +215,12 @@ OPTIONS for 'perf sched timehist'
 --show-prio::
        Show task priority.
 
+--prio::
+       Only show events for given task priority(ies). Multiple priorities can be
+       provided as a comma-separated list with no spaces: 0,120. Ranges of
+       priorities are specified with -: 120-129. A combination of both can also be
+       provided: 0,120-129.
+
 OPTIONS for 'perf sched replay'
 ------------------------------
 
index ba6563dc93d092cfe7071022cb37e7a076ea795a..0a7b2b2acd5697926bb1039316892c1415af7c29 100644 (file)
@@ -51,6 +51,7 @@
 #define COMM_LEN               20
 #define SYM_LEN                        129
 #define MAX_PID                        1024000
+#define MAX_PRIO               140
 
 static const char *cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -234,6 +235,8 @@ struct perf_sched {
        struct perf_time_interval ptime;
        struct perf_time_interval hist_time;
        volatile bool   thread_funcs_exit;
+       const char      *prio_str;
+       DECLARE_BITMAP(prio_bitmap, MAX_PRIO);
 };
 
 /* per thread run time data */
@@ -2504,12 +2507,33 @@ static bool timehist_skip_sample(struct perf_sched *sched,
                                 struct perf_sample *sample)
 {
        bool rc = false;
+       int prio = -1;
+       struct thread_runtime *tr = NULL;
 
        if (thread__is_filtered(thread)) {
                rc = true;
                sched->skipped_samples++;
        }
 
+       if (sched->prio_str) {
+               /*
+                * Because priority may be changed during task execution,
+                * first read priority from prev sched_in event for current task.
+                * If prev sched_in event is not saved, then read priority from
+                * current task sched_out event.
+                */
+               tr = thread__get_runtime(thread);
+               if (tr && tr->prio != -1)
+                       prio = tr->prio;
+               else if (evsel__name_is(evsel, "sched:sched_switch"))
+                       prio = evsel__intval(evsel, sample, "prev_prio");
+
+               if (prio != -1 && !test_bit(prio, sched->prio_bitmap)) {
+                       rc = true;
+                       sched->skipped_samples++;
+               }
+       }
+
        if (sched->idle_hist) {
                if (!evsel__name_is(evsel, "sched:sched_switch"))
                        rc = true;
@@ -2723,7 +2747,7 @@ static int timehist_sched_change_event(const struct perf_tool *tool,
                goto out;
        }
 
-       if (sched->show_prio)
+       if (sched->show_prio || sched->prio_str)
                timehist_update_task_prio(evsel, sample, machine);
 
        thread = timehist_get_thread(sched, sample, machine, evsel);
@@ -3143,6 +3167,47 @@ static int timehist_check_attr(struct perf_sched *sched,
        return 0;
 }
 
+static int timehist_parse_prio_str(struct perf_sched *sched)
+{
+       char *p;
+       unsigned long start_prio, end_prio;
+       const char *str = sched->prio_str;
+
+       if (!str)
+               return 0;
+
+       while (isdigit(*str)) {
+               p = NULL;
+               start_prio = strtoul(str, &p, 0);
+               if (start_prio >= MAX_PRIO || (*p != '\0' && *p != ',' && *p != '-'))
+                       return -1;
+
+               if (*p == '-') {
+                       str = ++p;
+                       p = NULL;
+                       end_prio = strtoul(str, &p, 0);
+
+                       if (end_prio >= MAX_PRIO || (*p != '\0' && *p != ','))
+                               return -1;
+
+                       if (end_prio < start_prio)
+                               return -1;
+               } else {
+                       end_prio = start_prio;
+               }
+
+               for (; start_prio <= end_prio; start_prio++)
+                       __set_bit(start_prio, sched->prio_bitmap);
+
+               if (*p)
+                       ++p;
+
+               str = p;
+       }
+
+       return 0;
+}
+
 static int perf_sched__timehist(struct perf_sched *sched)
 {
        struct evsel_str_handler handlers[] = {
@@ -3204,6 +3269,11 @@ static int perf_sched__timehist(struct perf_sched *sched)
        if (timehist_check_attr(sched, evlist) != 0)
                goto out;
 
+       if (timehist_parse_prio_str(sched) != 0) {
+               pr_err("Invalid prio string\n");
+               goto out;
+       }
+
        setup_pager();
 
        /* prefer sched_waking if it is captured */
@@ -3761,6 +3831,8 @@ int cmd_sched(int argc, const char **argv)
                   "analyze events only for given thread id(s)"),
        OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
        OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"),
+       OPT_STRING(0, "prio", &sched.prio_str, "prio",
+                  "analyze events only for given task priority(ies)"),
        OPT_PARENT(sched_options)
        };