Merge tag 'vfs-6.7.misc' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs
[linux-block.git] / tools / tracing / rtla / src / timerlat_aa.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 #include <unistd.h>
12
13 enum timelat_state {
14         TIMERLAT_INIT = 0,
15         TIMERLAT_WAITING_IRQ,
16         TIMERLAT_WAITING_THREAD,
17 };
18
19 #define MAX_COMM                24
20
21 /*
22  * Per-cpu data statistics and data.
23  */
24 struct timerlat_aa_data {
25         /* Current CPU state */
26         int                     curr_state;
27
28         /* timerlat IRQ latency */
29         unsigned long long      tlat_irq_seqnum;
30         unsigned long long      tlat_irq_latency;
31         unsigned long long      tlat_irq_timstamp;
32
33         /* timerlat Thread latency */
34         unsigned long long      tlat_thread_seqnum;
35         unsigned long long      tlat_thread_latency;
36         unsigned long long      tlat_thread_timstamp;
37
38         /*
39          * Information about the thread running when the IRQ
40          * arrived.
41          *
42          * This can be blocking or interference, depending on the
43          * priority of the thread. Assuming timerlat is the highest
44          * prio, it is blocking. If timerlat has a lower prio, it is
45          * interference.
46          * note: "unsigned long long" because they are fetch using tep_get_field_val();
47          */
48         unsigned long long      run_thread_pid;
49         char                    run_thread_comm[MAX_COMM];
50         unsigned long long      thread_blocking_duration;
51         unsigned long long      max_exit_idle_latency;
52
53         /* Information about the timerlat timer irq */
54         unsigned long long      timer_irq_start_time;
55         unsigned long long      timer_irq_start_delay;
56         unsigned long long      timer_irq_duration;
57         unsigned long long      timer_exit_from_idle;
58
59         /*
60          * Information about the last IRQ before the timerlat irq
61          * arrived.
62          *
63          * If now - timestamp is <= latency, it might have influenced
64          * in the timerlat irq latency. Otherwise, ignore it.
65          */
66         unsigned long long      prev_irq_duration;
67         unsigned long long      prev_irq_timstamp;
68
69         /*
70          * Interference sum.
71          */
72         unsigned long long      thread_nmi_sum;
73         unsigned long long      thread_irq_sum;
74         unsigned long long      thread_softirq_sum;
75         unsigned long long      thread_thread_sum;
76
77         /*
78          * Interference task information.
79          */
80         struct trace_seq        *prev_irqs_seq;
81         struct trace_seq        *nmi_seq;
82         struct trace_seq        *irqs_seq;
83         struct trace_seq        *softirqs_seq;
84         struct trace_seq        *threads_seq;
85         struct trace_seq        *stack_seq;
86
87         /*
88          * Current thread.
89          */
90         char                    current_comm[MAX_COMM];
91         unsigned long long      current_pid;
92
93         /*
94          * Is the system running a kworker?
95          */
96         unsigned long long      kworker;
97         unsigned long long      kworker_func;
98 };
99
100 /*
101  * The analysis context and system wide view
102  */
103 struct timerlat_aa_context {
104         int nr_cpus;
105         int dump_tasks;
106
107         /* per CPU data */
108         struct timerlat_aa_data *taa_data;
109
110         /*
111          * required to translate function names and register
112          * events.
113          */
114         struct osnoise_tool *tool;
115 };
116
117 /*
118  * The data is stored as a local variable, but accessed via a helper function.
119  *
120  * It could be stored inside the trace context. But every access would
121  * require container_of() + a series of pointers. Do we need it? Not sure.
122  *
123  * For now keep it simple. If needed, store it in the tool, add the *context
124  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
125  */
126 static struct timerlat_aa_context *__timerlat_aa_ctx;
127
128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
129 {
130         return __timerlat_aa_ctx;
131 }
132
133 /*
134  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
135  */
136 static struct timerlat_aa_data
137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
138 {
139         return &taa_ctx->taa_data[cpu];
140 }
141
142 /*
143  * timerlat_aa_irq_latency - Handles timerlat IRQ event
144  */
145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
146                                    struct trace_seq *s, struct tep_record *record,
147                                    struct tep_event *event)
148 {
149         /*
150          * For interference, we start now looking for things that can delay
151          * the thread.
152          */
153         taa_data->curr_state = TIMERLAT_WAITING_THREAD;
154         taa_data->tlat_irq_timstamp = record->ts;
155
156         /*
157          * Zero values.
158          */
159         taa_data->thread_nmi_sum = 0;
160         taa_data->thread_irq_sum = 0;
161         taa_data->thread_softirq_sum = 0;
162         taa_data->thread_thread_sum = 0;
163         taa_data->thread_blocking_duration = 0;
164         taa_data->timer_irq_start_time = 0;
165         taa_data->timer_irq_duration = 0;
166         taa_data->timer_exit_from_idle = 0;
167
168         /*
169          * Zero interference tasks.
170          */
171         trace_seq_reset(taa_data->nmi_seq);
172         trace_seq_reset(taa_data->irqs_seq);
173         trace_seq_reset(taa_data->softirqs_seq);
174         trace_seq_reset(taa_data->threads_seq);
175
176         /* IRQ latency values */
177         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
178         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
179
180         /* The thread that can cause blocking */
181         tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
182
183         /*
184          * Get exit from idle case.
185          *
186          * If it is not idle thread:
187          */
188         if (taa_data->run_thread_pid)
189                 return 0;
190
191         /*
192          * if the latency is shorter than the known exit from idle:
193          */
194         if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
195                 return 0;
196
197         /*
198          * To be safe, ignore the cases in which an IRQ/NMI could have
199          * interfered with the timerlat IRQ.
200          */
201         if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
202             < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
203                 return 0;
204
205         taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
206
207         return 0;
208 }
209
210 /*
211  * timerlat_aa_thread_latency - Handles timerlat thread event
212  */
213 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
214                                       struct trace_seq *s, struct tep_record *record,
215                                       struct tep_event *event)
216 {
217         /*
218          * For interference, we start now looking for things that can delay
219          * the IRQ of the next cycle.
220          */
221         taa_data->curr_state = TIMERLAT_WAITING_IRQ;
222         taa_data->tlat_thread_timstamp = record->ts;
223
224         /* Thread latency values */
225         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
226         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
227
228         return 0;
229 }
230
231 /*
232  * timerlat_aa_handler - Handle timerlat events
233  *
234  * This function is called to handle timerlat events recording statistics.
235  *
236  * Returns 0 on success, -1 otherwise.
237  */
238 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
239                         struct tep_event *event, void *context)
240 {
241         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
242         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
243         unsigned long long thread;
244
245         if (!taa_data)
246                 return -1;
247
248         tep_get_field_val(s, event, "context", record, &thread, 1);
249         if (!thread)
250                 return timerlat_aa_irq_latency(taa_data, s, record, event);
251         else
252                 return timerlat_aa_thread_latency(taa_data, s, record, event);
253 }
254
255 /*
256  * timerlat_aa_nmi_handler - Handles NMI noise
257  *
258  * It is used to collect information about interferences from NMI. It is
259  * hooked to the osnoise:nmi_noise event.
260  */
261 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
262                                    struct tep_event *event, void *context)
263 {
264         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
265         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
266         unsigned long long duration;
267         unsigned long long start;
268
269         tep_get_field_val(s, event, "duration", record, &duration, 1);
270         tep_get_field_val(s, event, "start", record, &start, 1);
271
272         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
273                 taa_data->prev_irq_duration = duration;
274                 taa_data->prev_irq_timstamp = start;
275
276                 trace_seq_reset(taa_data->prev_irqs_seq);
277                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s       \t\t\t%9.2f us\n",
278                          "nmi", ns_to_usf(duration));
279                 return 0;
280         }
281
282         taa_data->thread_nmi_sum += duration;
283         trace_seq_printf(taa_data->nmi_seq, "   %24s    \t\t\t%9.2f us\n",
284                  "nmi", ns_to_usf(duration));
285
286         return 0;
287 }
288
289 /*
290  * timerlat_aa_irq_handler - Handles IRQ noise
291  *
292  * It is used to collect information about interferences from IRQ. It is
293  * hooked to the osnoise:irq_noise event.
294  *
295  * It is a little bit more complex than the other because it measures:
296  *      - The IRQs that can delay the timer IRQ before it happened.
297  *      - The Timerlat IRQ handler
298  *      - The IRQs that happened between the timerlat IRQ and the timerlat thread
299  *        (IRQ interference).
300  */
301 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
302                                    struct tep_event *event, void *context)
303 {
304         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
305         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
306         unsigned long long expected_start;
307         unsigned long long duration;
308         unsigned long long vector;
309         unsigned long long start;
310         char *desc;
311         int val;
312
313         tep_get_field_val(s, event, "duration", record, &duration, 1);
314         tep_get_field_val(s, event, "start", record, &start, 1);
315         tep_get_field_val(s, event, "vector", record, &vector, 1);
316         desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
317
318         /*
319          * Before the timerlat IRQ.
320          */
321         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
322                 taa_data->prev_irq_duration = duration;
323                 taa_data->prev_irq_timstamp = start;
324
325                 trace_seq_reset(taa_data->prev_irqs_seq);
326                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu        \t\t%9.2f us\n",
327                                  desc, vector, ns_to_usf(duration));
328                 return 0;
329         }
330
331         /*
332          * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
333          * the timerlat irq handler.
334          */
335         if (!taa_data->timer_irq_start_time) {
336                 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
337
338                 taa_data->timer_irq_start_time = start;
339                 taa_data->timer_irq_duration = duration;
340
341                 /*
342                  * We are dealing with two different clock sources: the
343                  * external clock source that timerlat uses as a reference
344                  * and the clock used by the tracer. There are also two
345                  * moments: the time reading the clock and the timer in
346                  * which the event is placed in the buffer (the trace
347                  * event timestamp). If the processor is slow or there
348                  * is some hardware noise, the difference between the
349                  * timestamp and the external clock read can be longer
350                  * than the IRQ handler delay, resulting in a negative
351                  * time. If so, set IRQ start delay as 0. In the end,
352                  * it is less relevant than the noise.
353                  */
354                 if (expected_start < taa_data->timer_irq_start_time)
355                         taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
356                 else
357                         taa_data->timer_irq_start_delay = 0;
358
359                 /*
360                  * not exit from idle.
361                  */
362                 if (taa_data->run_thread_pid)
363                         return 0;
364
365                 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
366                         taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
367
368                 return 0;
369         }
370
371         /*
372          * IRQ interference.
373          */
374         taa_data->thread_irq_sum += duration;
375         trace_seq_printf(taa_data->irqs_seq, "  %24s:%-3llu     \t      %9.2f us\n",
376                          desc, vector, ns_to_usf(duration));
377
378         return 0;
379 }
380
381 static char *softirq_name[] = { "HI", "TIMER",  "NET_TX", "NET_RX", "BLOCK",
382                                 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
383
384
385 /*
386  * timerlat_aa_softirq_handler - Handles Softirq noise
387  *
388  * It is used to collect information about interferences from Softirq. It is
389  * hooked to the osnoise:softirq_noise event.
390  *
391  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
392  */
393 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
394                                        struct tep_event *event, void *context)
395 {
396         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
397         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
398         unsigned long long duration;
399         unsigned long long vector;
400         unsigned long long start;
401
402         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
403                 return 0;
404
405         tep_get_field_val(s, event, "duration", record, &duration, 1);
406         tep_get_field_val(s, event, "start", record, &start, 1);
407         tep_get_field_val(s, event, "vector", record, &vector, 1);
408
409         taa_data->thread_softirq_sum += duration;
410
411         trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t      %9.2f us\n",
412                          softirq_name[vector], vector, ns_to_usf(duration));
413         return 0;
414 }
415
416 /*
417  * timerlat_aa_softirq_handler - Handles thread noise
418  *
419  * It is used to collect information about interferences from threads. It is
420  * hooked to the osnoise:thread_noise event.
421  *
422  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
423  */
424 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
425                                       struct tep_event *event, void *context)
426 {
427         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
428         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
429         unsigned long long duration;
430         unsigned long long start;
431         unsigned long long pid;
432         const char *comm;
433         int val;
434
435         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
436                 return 0;
437
438         tep_get_field_val(s, event, "duration", record, &duration, 1);
439         tep_get_field_val(s, event, "start", record, &start, 1);
440
441         tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
442         comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
443
444         if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
445                 taa_data->thread_blocking_duration = duration;
446
447                 if (comm)
448                         strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
449                 else
450                         sprintf(taa_data->run_thread_comm, "<...>");
451
452         } else {
453                 taa_data->thread_thread_sum += duration;
454
455                 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu  \t\t%9.2f us\n",
456                          comm, pid, ns_to_usf(duration));
457         }
458
459         return 0;
460 }
461
462 /*
463  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
464  *
465  * Saves and parse the stack trace generated by the timerlat IRQ.
466  */
467 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
468                               struct tep_event *event, void *context)
469 {
470         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
471         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
472         unsigned long *caller;
473         const char *function;
474         int val, i;
475
476         trace_seq_reset(taa_data->stack_seq);
477
478         trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
479         caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
480         if (caller) {
481                 for (i = 0; ; i++) {
482                         function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
483                         if (!function)
484                                 break;
485                         trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
486                 }
487         }
488         return 0;
489 }
490
491 /*
492  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
493  *
494  * Handles the sched:sched_switch event to trace the current thread running on the
495  * CPU. It is used to display the threads running on the other CPUs when the trace
496  * stops.
497  */
498 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
499                                             struct tep_event *event, void *context)
500 {
501         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
502         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
503         const char *comm;
504         int val;
505
506         tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
507         comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
508
509         strncpy(taa_data->current_comm, comm, MAX_COMM);
510
511         /*
512          * If this was a kworker, clean the last kworkers that ran.
513          */
514         taa_data->kworker = 0;
515         taa_data->kworker_func = 0;
516
517         return 0;
518 }
519
520 /*
521  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
522  *
523  * Handles workqueue:workqueue_execute_start event, keeping track of
524  * the job that a kworker could be doing in the CPU.
525  *
526  * We already catch problems of hardware related latencies caused by work queues
527  * running driver code that causes hardware stall. For example, with DRM drivers.
528  */
529 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
530                                              struct tep_event *event, void *context)
531 {
532         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
533         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
534
535         tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
536         tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
537         return 0;
538 }
539
540 /*
541  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
542  *
543  * This is the core of the analysis.
544  */
545 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
546                                      int irq_thresh, int thread_thresh)
547 {
548         long long exp_irq_ts;
549         int total;
550         int irq;
551
552         /*
553          * IRQ latency or Thread latency?
554          */
555         if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
556                 irq = 1;
557                 total = taa_data->tlat_irq_latency;
558         } else {
559                 irq = 0;
560                 total = taa_data->tlat_thread_latency;
561         }
562
563         /*
564          * Expected IRQ arrival time using the trace clock as the base.
565          *
566          * TODO: Add a list of previous IRQ, and then run the list backwards.
567          */
568         exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
569         if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
570                 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
571                         printf("  Previous IRQ interference:    \t\t up to  %9.2f us\n",
572                                 ns_to_usf(taa_data->prev_irq_duration));
573         }
574
575         /*
576          * The delay that the IRQ suffered before starting.
577          */
578         printf("  IRQ handler delay:            %16s    %9.2f us (%.2f %%)\n",
579                 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
580                 ns_to_usf(taa_data->timer_irq_start_delay),
581                 ns_to_per(total, taa_data->timer_irq_start_delay));
582
583         /*
584          * Timerlat IRQ.
585          */
586         printf("  IRQ latency:  \t\t\t\t        %9.2f us\n",
587                 ns_to_usf(taa_data->tlat_irq_latency));
588
589         if (irq) {
590                 /*
591                  * If the trace stopped due to IRQ, the other events will not happen
592                  * because... the trace stopped :-).
593                  *
594                  * That is all folks, the stack trace was printed before the stop,
595                  * so it will be displayed, it is the key.
596                  */
597                 printf("  Blocking thread:\n");
598                 printf("        %24s:%-9llu\n",
599                         taa_data->run_thread_comm, taa_data->run_thread_pid);
600         } else  {
601                 /*
602                  * The duration of the IRQ handler that handled the timerlat IRQ.
603                  */
604                 printf("  Timerlat IRQ duration:        \t\t    %9.2f us (%.2f %%)\n",
605                         ns_to_usf(taa_data->timer_irq_duration),
606                         ns_to_per(total, taa_data->timer_irq_duration));
607
608                 /*
609                  * The amount of time that the current thread postponed the scheduler.
610                  *
611                  * Recalling that it is net from NMI/IRQ/Softirq interference, so there
612                  * is no need to compute values here.
613                  */
614                 printf("  Blocking thread:      \t\t\t  %9.2f us (%.2f %%)\n",
615                         ns_to_usf(taa_data->thread_blocking_duration),
616                         ns_to_per(total, taa_data->thread_blocking_duration));
617
618                 printf("        %24s:%-9llu             %9.2f us\n",
619                         taa_data->run_thread_comm, taa_data->run_thread_pid,
620                         ns_to_usf(taa_data->thread_blocking_duration));
621         }
622
623         /*
624          * Print the stack trace!
625          */
626         trace_seq_do_printf(taa_data->stack_seq);
627
628         /*
629          * NMIs can happen during the IRQ, so they are always possible.
630          */
631         if (taa_data->thread_nmi_sum)
632                 printf("  NMI interference      \t\t\t  %9.2f us (%.2f %%)\n",
633                         ns_to_usf(taa_data->thread_nmi_sum),
634                         ns_to_per(total, taa_data->thread_nmi_sum));
635
636         /*
637          * If it is an IRQ latency, the other factors can be skipped.
638          */
639         if (irq)
640                 goto print_total;
641
642         /*
643          * Prints the interference caused by IRQs to the thread latency.
644          */
645         if (taa_data->thread_irq_sum) {
646                 printf("  IRQ interference      \t\t\t  %9.2f us (%.2f %%)\n",
647                         ns_to_usf(taa_data->thread_irq_sum),
648                         ns_to_per(total, taa_data->thread_irq_sum));
649
650                 trace_seq_do_printf(taa_data->irqs_seq);
651         }
652
653         /*
654          * Prints the interference caused by Softirqs to the thread latency.
655          */
656         if (taa_data->thread_softirq_sum) {
657                 printf("  Softirq interference  \t\t\t  %9.2f us (%.2f %%)\n",
658                         ns_to_usf(taa_data->thread_softirq_sum),
659                         ns_to_per(total, taa_data->thread_softirq_sum));
660
661                 trace_seq_do_printf(taa_data->softirqs_seq);
662         }
663
664         /*
665          * Prints the interference caused by other threads to the thread latency.
666          *
667          * If this happens, your timerlat is not the highest prio. OK, migration
668          * thread can happen. But otherwise, you are not measuring the "scheduling
669          * latency" only, and here is the difference from scheduling latency and
670          * timer handling latency.
671          */
672         if (taa_data->thread_thread_sum) {
673                 printf("  Thread interference   \t\t\t  %9.2f us (%.2f %%)\n",
674                         ns_to_usf(taa_data->thread_thread_sum),
675                         ns_to_per(total, taa_data->thread_thread_sum));
676
677                 trace_seq_do_printf(taa_data->threads_seq);
678         }
679
680         /*
681          * Done.
682          */
683 print_total:
684         printf("------------------------------------------------------------------------\n");
685         printf("  %s latency:   \t\t\t  %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
686                 ns_to_usf(total));
687 }
688
689 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
690 {
691         struct trace_instance *trace = &taa_ctx->tool->trace;
692         int retval;
693
694         retval = tracefs_iterate_raw_events(trace->tep,
695                                             trace->inst,
696                                             NULL,
697                                             0,
698                                             collect_registered_events,
699                                             trace);
700                 if (retval < 0) {
701                         err_msg("Error iterating on events\n");
702                         return 0;
703                 }
704
705         return 1;
706 }
707
708 /**
709  * timerlat_auto_analysis - Analyze the collected data
710  */
711 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
712 {
713         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
714         unsigned long long max_exit_from_idle = 0;
715         struct timerlat_aa_data *taa_data;
716         int max_exit_from_idle_cpu;
717         struct tep_handle *tep;
718         int cpu;
719
720         timerlat_auto_analysis_collect_trace(taa_ctx);
721
722         /* bring stop tracing to the ns scale */
723         irq_thresh = irq_thresh * 1000;
724         thread_thresh = thread_thresh * 1000;
725
726         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
727                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
728
729                 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
730                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
731                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
732                 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
733                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
734                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
735                 }
736
737                 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
738                         max_exit_from_idle = taa_data->max_exit_idle_latency;
739                         max_exit_from_idle_cpu = cpu;
740                 }
741
742         }
743
744         if (max_exit_from_idle) {
745                 printf("\n");
746                 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
747                         ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
748         }
749         if (!taa_ctx->dump_tasks)
750                 return;
751
752         printf("\n");
753         printf("Printing CPU tasks:\n");
754         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
755                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
756                 tep = taa_ctx->tool->trace.tep;
757
758                 printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
759
760                 if (taa_data->kworker_func)
761                         printf(" kworker:%s:%s",
762                                 tep_find_function(tep, taa_data->kworker) ? : "<...>",
763                                 tep_find_function(tep, taa_data->kworker_func));
764                 printf("\n");
765         }
766
767 }
768
769 /*
770  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
771  */
772 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
773 {
774         struct timerlat_aa_data *taa_data;
775         int i;
776
777         if (!taa_ctx->taa_data)
778                 return;
779
780         for (i = 0; i < taa_ctx->nr_cpus; i++) {
781                 taa_data = timerlat_aa_get_data(taa_ctx, i);
782
783                 if (taa_data->prev_irqs_seq) {
784                         trace_seq_destroy(taa_data->prev_irqs_seq);
785                         free(taa_data->prev_irqs_seq);
786                 }
787
788                 if (taa_data->nmi_seq) {
789                         trace_seq_destroy(taa_data->nmi_seq);
790                         free(taa_data->nmi_seq);
791                 }
792
793                 if (taa_data->irqs_seq) {
794                         trace_seq_destroy(taa_data->irqs_seq);
795                         free(taa_data->irqs_seq);
796                 }
797
798                 if (taa_data->softirqs_seq) {
799                         trace_seq_destroy(taa_data->softirqs_seq);
800                         free(taa_data->softirqs_seq);
801                 }
802
803                 if (taa_data->threads_seq) {
804                         trace_seq_destroy(taa_data->threads_seq);
805                         free(taa_data->threads_seq);
806                 }
807
808                 if (taa_data->stack_seq) {
809                         trace_seq_destroy(taa_data->stack_seq);
810                         free(taa_data->stack_seq);
811                 }
812         }
813 }
814
815 /*
816  * timerlat_aa_init_seqs - Init seq files used to store parsed information
817  *
818  * Instead of keeping data structures to store raw data, use seq files to
819  * store parsed data.
820  *
821  * Allocates and initialize seq files.
822  *
823  * Returns 0 on success, -1 otherwise.
824  */
825 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
826 {
827         struct timerlat_aa_data *taa_data;
828         int i;
829
830         for (i = 0; i < taa_ctx->nr_cpus; i++) {
831
832                 taa_data = timerlat_aa_get_data(taa_ctx, i);
833
834                 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
835                 if (!taa_data->prev_irqs_seq)
836                         goto out_err;
837
838                 trace_seq_init(taa_data->prev_irqs_seq);
839
840                 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
841                 if (!taa_data->nmi_seq)
842                         goto out_err;
843
844                 trace_seq_init(taa_data->nmi_seq);
845
846                 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
847                 if (!taa_data->irqs_seq)
848                         goto out_err;
849
850                 trace_seq_init(taa_data->irqs_seq);
851
852                 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
853                 if (!taa_data->softirqs_seq)
854                         goto out_err;
855
856                 trace_seq_init(taa_data->softirqs_seq);
857
858                 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
859                 if (!taa_data->threads_seq)
860                         goto out_err;
861
862                 trace_seq_init(taa_data->threads_seq);
863
864                 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
865                 if (!taa_data->stack_seq)
866                         goto out_err;
867
868                 trace_seq_init(taa_data->stack_seq);
869         }
870
871         return 0;
872
873 out_err:
874         timerlat_aa_destroy_seqs(taa_ctx);
875         return -1;
876 }
877
878 /*
879  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
880  */
881 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
882 {
883
884         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
885                                      timerlat_aa_handler, tool);
886
887         tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
888
889         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
890                                      timerlat_aa_nmi_handler, tool);
891
892         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
893                                      timerlat_aa_irq_handler, tool);
894
895         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
896                                      timerlat_aa_softirq_handler, tool);
897
898         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
899                                      timerlat_aa_thread_handler, tool);
900
901         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
902                                      timerlat_aa_stack_handler, tool);
903         if (!dump_tasks)
904                 return;
905
906         tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
907         tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
908                                      timerlat_aa_sched_switch_handler, tool);
909
910         tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
911         tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
912                                      timerlat_aa_kworker_start_handler, tool);
913 }
914
915 /*
916  * timerlat_aa_register_events - Register events used in the auto-analysis
917  *
918  * Returns 0 on success, -1 otherwise.
919  */
920 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
921 {
922         int retval;
923
924         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
925                                 timerlat_aa_handler, tool);
926
927
928         /*
929          * register auto-analysis handlers.
930          */
931         retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
932         if (retval < 0 && !errno) {
933                 err_msg("Could not find osnoise events\n");
934                 goto out_err;
935         }
936
937         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
938                                    timerlat_aa_nmi_handler, tool);
939
940         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
941                                    timerlat_aa_irq_handler, tool);
942
943         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
944                                    timerlat_aa_softirq_handler, tool);
945
946         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
947                                    timerlat_aa_thread_handler, tool);
948
949         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
950                                    timerlat_aa_stack_handler, tool);
951
952         if (!dump_tasks)
953                 return 0;
954
955         /*
956          * Dump task events.
957          */
958         retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
959         if (retval < 0 && !errno) {
960                 err_msg("Could not find sched_switch\n");
961                 goto out_err;
962         }
963
964         tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
965                                    timerlat_aa_sched_switch_handler, tool);
966
967         retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
968         if (retval < 0 && !errno) {
969                 err_msg("Could not find workqueue_execute_start\n");
970                 goto out_err;
971         }
972
973         tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
974                                    timerlat_aa_kworker_start_handler, tool);
975
976         return 0;
977
978 out_err:
979         timerlat_aa_unregister_events(tool, dump_tasks);
980         return -1;
981 }
982
983 /**
984  * timerlat_aa_destroy - Destroy timerlat auto-analysis
985  */
986 void timerlat_aa_destroy(void)
987 {
988         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
989
990         if (!taa_ctx)
991                 return;
992
993         if (!taa_ctx->taa_data)
994                 goto out_ctx;
995
996         timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
997         timerlat_aa_destroy_seqs(taa_ctx);
998         free(taa_ctx->taa_data);
999 out_ctx:
1000         free(taa_ctx);
1001 }
1002
1003 /**
1004  * timerlat_aa_init - Initialize timerlat auto-analysis
1005  *
1006  * Returns 0 on success, -1 otherwise.
1007  */
1008 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1009 {
1010         int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1011         struct timerlat_aa_context *taa_ctx;
1012         int retval;
1013
1014         taa_ctx = calloc(1, sizeof(*taa_ctx));
1015         if (!taa_ctx)
1016                 return -1;
1017
1018         __timerlat_aa_ctx = taa_ctx;
1019
1020         taa_ctx->nr_cpus = nr_cpus;
1021         taa_ctx->tool = tool;
1022         taa_ctx->dump_tasks = dump_tasks;
1023
1024         taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1025         if (!taa_ctx->taa_data)
1026                 goto out_err;
1027
1028         retval = timerlat_aa_init_seqs(taa_ctx);
1029         if (retval)
1030                 goto out_err;
1031
1032         retval = timerlat_aa_register_events(tool, dump_tasks);
1033         if (retval)
1034                 goto out_err;
1035
1036         return 0;
1037
1038 out_err:
1039         timerlat_aa_destroy();
1040         return -1;
1041 }