riscv, bpf: Simplify sext and zext logics in branch instructions
[linux-block.git] / tools / tracing / latency / latency-collector.c
CommitLineData
e23db805
VR
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH
4 * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de)
5 */
6
7#define _GNU_SOURCE
8#define _POSIX_C_SOURCE 200809L
9
10#include <ctype.h>
11#include <stdbool.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15
16#include <err.h>
17#include <errno.h>
18#include <fcntl.h>
19#include <getopt.h>
20#include <sched.h>
21#include <linux/unistd.h>
22#include <signal.h>
23#include <sys/inotify.h>
24#include <unistd.h>
25#include <pthread.h>
26#include <tracefs.h>
27
28static const char *prg_name;
29static const char *prg_unknown = "unknown program name";
30
31static int fd_stdout;
32
33static int sched_policy;
34static bool sched_policy_set;
35
36static int sched_pri;
37static bool sched_pri_set;
38
39static bool trace_enable = true;
40static bool setup_ftrace = true;
41static bool use_random_sleep;
42
43#define TRACE_OPTS \
44 C(FUNC_TR, "function-trace"), \
45 C(DISP_GR, "display-graph"), \
46 C(NR, NULL)
47
48#undef C
49#define C(a, b) OPTIDX_##a
50
51enum traceopt {
52 TRACE_OPTS
53};
54
55#undef C
56#define C(a, b) b
57
58static const char *const optstr[] = {
59 TRACE_OPTS
60};
61
62enum errhandling {
63 ERR_EXIT = 0,
64 ERR_WARN,
65 ERR_CLEANUP,
66};
67
68static bool use_options[OPTIDX_NR];
69
70static char inotify_buffer[655360];
71
72#define likely(x) __builtin_expect(!!(x), 1)
73#define unlikely(x) __builtin_expect(!!(x), 0)
74#define bool2str(x) (x ? "true":"false")
75
76#define DEFAULT_NR_PRINTER_THREADS (3)
77static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS;
78
79#define DEFAULT_TABLE_SIZE (2)
80static unsigned int table_startsize = DEFAULT_TABLE_SIZE;
81
82static int verbosity;
83
84#define verbose_sizechange() (verbosity >= 1)
85#define verbose_lostevent() (verbosity >= 2)
86#define verbose_ftrace() (verbosity >= 1)
87
88#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0)
89#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0)
90
91static const char *debug_tracefile;
92static const char *debug_tracefile_dflt;
93static const char *debug_maxlat;
94static const char *debug_maxlat_dflt;
95static const char * const DEBUG_NOFILE = "[file not found]";
96
97static const char * const TR_MAXLAT = "tracing_max_latency";
98static const char * const TR_THRESH = "tracing_thresh";
99static const char * const TR_CURRENT = "current_tracer";
100static const char * const TR_OPTIONS = "trace_options";
101
102static const char * const NOP_TRACER = "nop";
103
104static const char * const OPT_NO_PREFIX = "no";
105
106#define DFLT_THRESHOLD_US "0"
107static const char *threshold = DFLT_THRESHOLD_US;
108
109#define DEV_URANDOM "/dev/urandom"
110#define RT_DEFAULT_PRI (99)
111#define DEFAULT_PRI (0)
112
113#define USEC_PER_MSEC (1000L)
114#define NSEC_PER_USEC (1000L)
115#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC)
116
117#define MSEC_PER_SEC (1000L)
118#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC)
119#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC)
120
121#define SLEEP_TIME_MS_DEFAULT (1000L)
122#define TRY_PRINTMUTEX_MS (1000)
123
124static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT);
125
126static const char * const queue_full_warning =
127"Could not queue trace for printing. It is likely that events happen faster\n"
128"than what they can be printed. Probably partly because of random sleeping\n";
129
130static const char * const no_tracer_msg =
131"Could not find any tracers! Running this program as root may help!\n";
132
133static const char * const no_latency_tr_msg =
134"No latency tracers are supported by your kernel!\n";
135
136struct policy {
137 const char *name;
138 int policy;
139 int default_pri;
140};
141
142static const struct policy policies[] = {
143 { "other", SCHED_OTHER, DEFAULT_PRI },
144 { "batch", SCHED_BATCH, DEFAULT_PRI },
145 { "idle", SCHED_IDLE, DEFAULT_PRI },
146 { "rr", SCHED_RR, RT_DEFAULT_PRI },
147 { "fifo", SCHED_FIFO, RT_DEFAULT_PRI },
148 { NULL, 0, DEFAULT_PRI }
149};
150
151/*
152 * The default tracer will be the first on this list that is supported by the
153 * currently running Linux kernel.
154 */
155static const char * const relevant_tracers[] = {
156 "preemptirqsoff",
157 "preemptoff",
158 "irqsoff",
159 "wakeup",
160 "wakeup_rt",
161 "wakeup_dl",
162 NULL
163};
164
165/* This is the list of tracers for which random sleep makes sense */
166static const char * const random_tracers[] = {
167 "preemptirqsoff",
168 "preemptoff",
169 "irqsoff",
170 NULL
171};
172
173static const char *current_tracer;
174static bool force_tracer;
175
176struct ftrace_state {
177 char *tracer;
178 char *thresh;
179 bool opt[OPTIDX_NR];
180 bool opt_valid[OPTIDX_NR];
181 pthread_mutex_t mutex;
182};
183
184struct entry {
185 int ticket;
186 int ticket_completed_ref;
187};
188
189struct print_state {
190 int ticket_counter;
191 int ticket_completed;
192 pthread_mutex_t mutex;
193 pthread_cond_t cond;
194 int cnt;
195 pthread_mutex_t cnt_mutex;
196};
197
198struct short_msg {
199 char buf[160];
200 int len;
201};
202
203static struct print_state printstate;
204static struct ftrace_state save_state;
205volatile sig_atomic_t signal_flag;
206
207#define PROB_TABLE_MAX_SIZE (1000)
208
209int probabilities[PROB_TABLE_MAX_SIZE];
210
211struct sleep_table {
212 int *table;
213 int size;
214 pthread_mutex_t mutex;
215};
216
217static struct sleep_table sleeptable;
218
219#define QUEUE_SIZE (10)
220
221struct queue {
222 struct entry entries[QUEUE_SIZE];
223 int next_prod_idx;
224 int next_cons_idx;
225 pthread_mutex_t mutex;
226 pthread_cond_t cond;
227};
228
229#define MAX_THREADS (40)
230
231struct queue printqueue;
232pthread_t printthread[MAX_THREADS];
233pthread_mutex_t print_mtx;
234#define PRINT_BUFFER_SIZE (16 * 1024 * 1024)
235
236static void cleanup_exit(int status);
237static int set_trace_opt(const char *opt, bool value);
238
239static __always_inline void *malloc_or_die(size_t size)
240{
241 void *ptr = malloc(size);
242
243 if (unlikely(ptr == NULL)) {
244 warn("malloc() failed");
245 cleanup_exit(EXIT_FAILURE);
246 }
247 return ptr;
248}
249
250static __always_inline void *malloc_or_die_nocleanup(size_t size)
251{
252 void *ptr = malloc(size);
253
254 if (unlikely(ptr == NULL))
255 err(0, "malloc() failed");
256 return ptr;
257}
258
259static __always_inline void write_or_die(int fd, const char *buf, size_t count)
260{
261 ssize_t r;
262
263 do {
264 r = write(fd, buf, count);
265 if (unlikely(r < 0)) {
266 if (errno == EINTR)
267 continue;
268 warn("write() failed");
269 cleanup_exit(EXIT_FAILURE);
270 }
271 count -= r;
272 buf += r;
273 } while (count > 0);
274}
275
276static __always_inline void clock_gettime_or_die(clockid_t clk_id,
277 struct timespec *tp)
278{
279 int r = clock_gettime(clk_id, tp);
280
281 if (unlikely(r != 0))
282 err(EXIT_FAILURE, "clock_gettime() failed");
283}
284
285static __always_inline void sigemptyset_or_die(sigset_t *s)
286{
287 if (unlikely(sigemptyset(s) != 0)) {
288 warn("sigemptyset() failed");
289 cleanup_exit(EXIT_FAILURE);
290 }
291}
292
293static __always_inline void sigaddset_or_die(sigset_t *s, int signum)
294{
295 if (unlikely(sigaddset(s, signum) != 0)) {
296 warn("sigemptyset() failed");
297 cleanup_exit(EXIT_FAILURE);
298 }
299}
300
301static __always_inline void sigaction_or_die(int signum,
302 const struct sigaction *act,
303 struct sigaction *oldact)
304{
305 if (unlikely(sigaction(signum, act, oldact) != 0)) {
306 warn("sigaction() failed");
307 cleanup_exit(EXIT_FAILURE);
308 }
309}
310
311static void open_stdout(void)
312{
313 if (setvbuf(stdout, NULL, _IONBF, 0) != 0)
314 err(EXIT_FAILURE, "setvbuf() failed");
315 fd_stdout = fileno(stdout);
316 if (fd_stdout < 0)
317 err(EXIT_FAILURE, "fileno() failed");
318}
319
320/*
321 * It's not worth it to call cleanup_exit() from mutex functions because
322 * cleanup_exit() uses mutexes.
323 */
324static __always_inline void mutex_lock(pthread_mutex_t *mtx)
325{
326 errno = pthread_mutex_lock(mtx);
327 if (unlikely(errno))
328 err(EXIT_FAILURE, "pthread_mutex_lock() failed");
329}
330
331
332static __always_inline void mutex_unlock(pthread_mutex_t *mtx)
333{
334 errno = pthread_mutex_unlock(mtx);
335 if (unlikely(errno))
336 err(EXIT_FAILURE, "pthread_mutex_unlock() failed");
337}
338
339static __always_inline void cond_signal(pthread_cond_t *cond)
340{
341 errno = pthread_cond_signal(cond);
342 if (unlikely(errno))
343 err(EXIT_FAILURE, "pthread_cond_signal() failed");
344}
345
346static __always_inline void cond_wait(pthread_cond_t *restrict cond,
347 pthread_mutex_t *restrict mutex)
348{
349 errno = pthread_cond_wait(cond, mutex);
350 if (unlikely(errno))
351 err(EXIT_FAILURE, "pthread_cond_wait() failed");
352}
353
354static __always_inline void cond_broadcast(pthread_cond_t *cond)
355{
356 errno = pthread_cond_broadcast(cond);
357 if (unlikely(errno))
358 err(EXIT_FAILURE, "pthread_cond_broadcast() failed");
359}
360
361static __always_inline void
362mutex_init(pthread_mutex_t *mutex,
363 const pthread_mutexattr_t *attr)
364{
365 errno = pthread_mutex_init(mutex, attr);
366 if (errno)
367 err(EXIT_FAILURE, "pthread_mutex_init() failed");
368}
369
370static __always_inline void mutexattr_init(pthread_mutexattr_t *attr)
371{
372 errno = pthread_mutexattr_init(attr);
373 if (errno)
374 err(EXIT_FAILURE, "pthread_mutexattr_init() failed");
375}
376
377static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr)
378{
379 errno = pthread_mutexattr_destroy(attr);
380 if (errno)
381 err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed");
382}
383
384static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr,
385 int type)
386{
387 errno = pthread_mutexattr_settype(attr, type);
388 if (errno)
389 err(EXIT_FAILURE, "pthread_mutexattr_settype() failed");
390}
391
392static __always_inline void condattr_init(pthread_condattr_t *attr)
393{
394 errno = pthread_condattr_init(attr);
395 if (errno)
396 err(EXIT_FAILURE, "pthread_condattr_init() failed");
397}
398
399static __always_inline void condattr_destroy(pthread_condattr_t *attr)
400{
401 errno = pthread_condattr_destroy(attr);
402 if (errno)
403 err(EXIT_FAILURE, "pthread_condattr_destroy() failed");
404}
405
406static __always_inline void condattr_setclock(pthread_condattr_t *attr,
407 clockid_t clock_id)
408{
409 errno = pthread_condattr_setclock(attr, clock_id);
410 if (unlikely(errno))
411 err(EXIT_FAILURE, "pthread_condattr_setclock() failed");
412}
413
414static __always_inline void cond_init(pthread_cond_t *cond,
415 const pthread_condattr_t *attr)
416{
417 errno = pthread_cond_init(cond, attr);
418 if (errno)
419 err(EXIT_FAILURE, "pthread_cond_init() failed");
420}
421
422static __always_inline int
423cond_timedwait(pthread_cond_t *restrict cond,
424 pthread_mutex_t *restrict mutex,
425 const struct timespec *restrict abstime)
426{
427 errno = pthread_cond_timedwait(cond, mutex, abstime);
428 if (errno && errno != ETIMEDOUT)
429 err(EXIT_FAILURE, "pthread_cond_timedwait() failed");
430 return errno;
431}
432
433static void init_printstate(void)
434{
435 pthread_condattr_t cattr;
436
437 printstate.ticket_counter = 0;
438 printstate.ticket_completed = 0;
439 printstate.cnt = 0;
440
441 mutex_init(&printstate.mutex, NULL);
442
443 condattr_init(&cattr);
444 condattr_setclock(&cattr, CLOCK_MONOTONIC);
445 cond_init(&printstate.cond, &cattr);
446 condattr_destroy(&cattr);
447}
448
449static void init_print_mtx(void)
450{
451 pthread_mutexattr_t mattr;
452
453 mutexattr_init(&mattr);
454 mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
455 mutex_init(&print_mtx, &mattr);
456 mutexattr_destroy(&mattr);
457
458}
459
460static void signal_blocking(int how)
461{
462 sigset_t s;
463
464 sigemptyset_or_die(&s);
465 sigaddset_or_die(&s, SIGHUP);
466 sigaddset_or_die(&s, SIGTERM);
467 sigaddset_or_die(&s, SIGINT);
468
469 errno = pthread_sigmask(how, &s, NULL);
470 if (unlikely(errno)) {
471 warn("pthread_sigmask() failed");
472 cleanup_exit(EXIT_FAILURE);
473 }
474}
475
476static void signal_handler(int num)
477{
478 signal_flag = num;
479}
480
481static void setup_sig_handler(void)
482{
483 struct sigaction sa;
484
485 memset(&sa, 0, sizeof(sa));
486 sa.sa_handler = signal_handler;
487
488 sigaction_or_die(SIGHUP, &sa, NULL);
489 sigaction_or_die(SIGTERM, &sa, NULL);
490 sigaction_or_die(SIGINT, &sa, NULL);
491}
492
493static void process_signal(int signal)
494{
495 char *name;
496
497 name = strsignal(signal);
498 if (name == NULL)
499 printf("Received signal %d\n", signal);
500 else
501 printf("Received signal %d (%s)\n", signal, name);
502 cleanup_exit(EXIT_SUCCESS);
503}
504
505static __always_inline void check_signals(void)
506{
507 int signal = signal_flag;
508
509 if (unlikely(signal))
510 process_signal(signal);
511}
512
513static __always_inline void get_time_in_future(struct timespec *future,
514 long time_us)
515{
516 long nsec;
517
518 clock_gettime_or_die(CLOCK_MONOTONIC, future);
519 future->tv_sec += time_us / USEC_PER_SEC;
520 nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC;
521 if (nsec >= NSEC_PER_SEC) {
522 future->tv_nsec = nsec % NSEC_PER_SEC;
523 future->tv_sec += 1;
524 }
525}
526
527static __always_inline bool time_has_passed(const struct timespec *time)
528{
529 struct timespec now;
530
531 clock_gettime_or_die(CLOCK_MONOTONIC, &now);
532 if (now.tv_sec > time->tv_sec)
533 return true;
534 if (now.tv_sec < time->tv_sec)
535 return false;
536 return (now.tv_nsec >= time->tv_nsec);
537}
538
539static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms)
540{
541 long time_us = time_ms * USEC_PER_MSEC;
542 struct timespec limit;
543
544 get_time_in_future(&limit, time_us);
545 do {
546 errno = pthread_mutex_trylock(mutex);
547 if (errno && errno != EBUSY)
548 err(EXIT_FAILURE, "pthread_mutex_trylock() failed");
549 } while (errno && !time_has_passed(&limit));
550 return errno == 0;
551}
552
553static void restore_trace_opts(const struct ftrace_state *state,
554 const bool *cur)
555{
556 int i;
557 int r;
558
559 for (i = 0; i < OPTIDX_NR; i++)
560 if (state->opt_valid[i] && state->opt[i] != cur[i]) {
561 r = set_trace_opt(optstr[i], state->opt[i]);
562 if (r < 0)
563 warnx("Failed to restore the %s option to %s",
564 optstr[i], bool2str(state->opt[i]));
565 else if (verbose_ftrace())
566 printf("Restored the %s option in %s to %s\n",
567 optstr[i], TR_OPTIONS,
568 bool2str(state->opt[i]));
569 }
570}
571
572static char *read_file(const char *file, enum errhandling h)
573{
574 int psize;
575 char *r;
576 static const char *emsg = "Failed to read the %s file";
577
578 r = tracefs_instance_file_read(NULL, file, &psize);
579 if (!r) {
580 if (h) {
581 warn(emsg, file);
582 if (h == ERR_CLEANUP)
583 cleanup_exit(EXIT_FAILURE);
584 } else
585 errx(EXIT_FAILURE, emsg, file);
586 }
587
588 if (r && r[psize - 1] == '\n')
589 r[psize - 1] = '\0';
590 return r;
591}
592
593static void restore_file(const char *file, char **saved, const char *cur)
594{
595 if (*saved && was_changed(*saved, cur)) {
596 if (tracefs_instance_file_write(NULL, file, *saved) < 0)
597 warnx("Failed to restore %s to %s!", file, *saved);
598 else if (verbose_ftrace())
599 printf("Restored %s to %s\n", file, *saved);
600 free(*saved);
601 *saved = NULL;
602 }
603}
604
605static void restore_ftrace(void)
606{
607 mutex_lock(&save_state.mutex);
608
609 restore_file(TR_CURRENT, &save_state.tracer, current_tracer);
610 restore_file(TR_THRESH, &save_state.thresh, threshold);
611 restore_trace_opts(&save_state, use_options);
612
613 mutex_unlock(&save_state.mutex);
614}
615
616static void cleanup_exit(int status)
617{
618 char *maxlat;
619
620 if (!setup_ftrace)
621 exit(status);
622
623 /*
624 * We try the print_mtx for 1 sec in order to avoid garbled
625 * output if possible, but if it cannot be obtained we proceed anyway.
626 */
627 mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS);
628
629 maxlat = read_file(TR_MAXLAT, ERR_WARN);
630 if (maxlat) {
631 printf("The maximum detected latency was: %sus\n", maxlat);
632 free(maxlat);
633 }
634
635 restore_ftrace();
636 /*
637 * We do not need to unlock the print_mtx here because we will exit at
638 * the end of this function. Unlocking print_mtx causes problems if a
639 * print thread happens to be waiting for the mutex because we have
640 * just changed the ftrace settings to the original and thus the
641 * print thread would output incorrect data from ftrace.
642 */
643 exit(status);
644}
645
646static void init_save_state(void)
647{
648 pthread_mutexattr_t mattr;
649
650 mutexattr_init(&mattr);
651 mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
652 mutex_init(&save_state.mutex, &mattr);
653 mutexattr_destroy(&mattr);
654
655 save_state.tracer = NULL;
656 save_state.thresh = NULL;
657 save_state.opt_valid[OPTIDX_FUNC_TR] = false;
658 save_state.opt_valid[OPTIDX_DISP_GR] = false;
659}
660
661static int printstate_next_ticket(struct entry *req)
662{
663 int r;
664
665 r = ++(printstate.ticket_counter);
666 req->ticket = r;
667 req->ticket_completed_ref = printstate.ticket_completed;
668 cond_broadcast(&printstate.cond);
669 return r;
670}
671
672static __always_inline
673void printstate_mark_req_completed(const struct entry *req)
674{
675 if (req->ticket > printstate.ticket_completed)
676 printstate.ticket_completed = req->ticket;
677}
678
679static __always_inline
680bool printstate_has_new_req_arrived(const struct entry *req)
681{
682 return (printstate.ticket_counter != req->ticket);
683}
684
685static __always_inline int printstate_cnt_inc(void)
686{
687 int value;
688
689 mutex_lock(&printstate.cnt_mutex);
690 value = ++printstate.cnt;
691 mutex_unlock(&printstate.cnt_mutex);
692 return value;
693}
694
695static __always_inline int printstate_cnt_dec(void)
696{
697 int value;
698
699 mutex_lock(&printstate.cnt_mutex);
700 value = --printstate.cnt;
701 mutex_unlock(&printstate.cnt_mutex);
702 return value;
703}
704
705static __always_inline int printstate_cnt_read(void)
706{
707 int value;
708
709 mutex_lock(&printstate.cnt_mutex);
710 value = printstate.cnt;
711 mutex_unlock(&printstate.cnt_mutex);
712 return value;
713}
714
715static __always_inline
716bool prev_req_won_race(const struct entry *req)
717{
718 return (printstate.ticket_completed != req->ticket_completed_ref);
719}
720
721static void sleeptable_resize(int size, bool printout, struct short_msg *msg)
722{
723 int bytes;
724
725 if (printout) {
726 msg->len = 0;
727 if (unlikely(size > PROB_TABLE_MAX_SIZE))
728 bytes = snprintf(msg->buf, sizeof(msg->buf),
729"Cannot increase probability table to %d (maximum size reached)\n", size);
730 else
731 bytes = snprintf(msg->buf, sizeof(msg->buf),
732"Increasing probability table to %d\n", size);
733 if (bytes < 0)
734 warn("snprintf() failed");
735 else
736 msg->len = bytes;
737 }
738
739 if (unlikely(size < 0)) {
740 /* Should never happen */
741 warnx("Bad program state at %s:%d", __FILE__, __LINE__);
742 cleanup_exit(EXIT_FAILURE);
743 return;
744 }
745 sleeptable.size = size;
746 sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size];
747}
748
749static void init_probabilities(void)
750{
751 int i;
752 int j = 1000;
753
754 for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) {
755 probabilities[i] = 1000 / j;
756 j--;
757 }
758 mutex_init(&sleeptable.mutex, NULL);
759}
760
761static int table_get_probability(const struct entry *req,
762 struct short_msg *msg)
763{
764 int diff = req->ticket - req->ticket_completed_ref;
765 int rval = 0;
766
767 msg->len = 0;
768 diff--;
769 /* Should never happen...*/
770 if (unlikely(diff < 0)) {
771 warnx("Programmer assumption error at %s:%d\n", __FILE__,
772 __LINE__);
773 cleanup_exit(EXIT_FAILURE);
774 }
775 mutex_lock(&sleeptable.mutex);
776 if (diff >= (sleeptable.size - 1)) {
777 rval = sleeptable.table[sleeptable.size - 1];
778 sleeptable_resize(sleeptable.size + 1, verbose_sizechange(),
779 msg);
780 } else {
781 rval = sleeptable.table[diff];
782 }
783 mutex_unlock(&sleeptable.mutex);
784 return rval;
785}
786
787static void init_queue(struct queue *q)
788{
789 q->next_prod_idx = 0;
790 q->next_cons_idx = 0;
791 mutex_init(&q->mutex, NULL);
792 errno = pthread_cond_init(&q->cond, NULL);
793 if (errno)
794 err(EXIT_FAILURE, "pthread_cond_init() failed");
795}
796
797static __always_inline int queue_len(const struct queue *q)
798{
799 if (q->next_prod_idx >= q->next_cons_idx)
800 return q->next_prod_idx - q->next_cons_idx;
801 else
802 return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx;
803}
804
805static __always_inline int queue_nr_free(const struct queue *q)
806{
807 int nr_free = QUEUE_SIZE - queue_len(q);
808
809 /*
810 * If there is only one slot left we will anyway lie and claim that the
811 * queue is full because adding an element will make it appear empty
812 */
813 if (nr_free == 1)
814 nr_free = 0;
815 return nr_free;
816}
817
818static __always_inline void queue_idx_inc(int *idx)
819{
820 *idx = (*idx + 1) % QUEUE_SIZE;
821}
822
823static __always_inline void queue_push_to_back(struct queue *q,
824 const struct entry *e)
825{
826 q->entries[q->next_prod_idx] = *e;
827 queue_idx_inc(&q->next_prod_idx);
828}
829
830static __always_inline struct entry queue_pop_from_front(struct queue *q)
831{
832 struct entry e = q->entries[q->next_cons_idx];
833
834 queue_idx_inc(&q->next_cons_idx);
835 return e;
836}
837
838static __always_inline void queue_cond_signal(struct queue *q)
839{
840 cond_signal(&q->cond);
841}
842
843static __always_inline void queue_cond_wait(struct queue *q)
844{
845 cond_wait(&q->cond, &q->mutex);
846}
847
848static __always_inline int queue_try_to_add_entry(struct queue *q,
849 const struct entry *e)
850{
851 int r = 0;
852
853 mutex_lock(&q->mutex);
854 if (queue_nr_free(q) > 0) {
855 queue_push_to_back(q, e);
856 cond_signal(&q->cond);
857 } else
858 r = -1;
859 mutex_unlock(&q->mutex);
860 return r;
861}
862
863static struct entry queue_wait_for_entry(struct queue *q)
864{
865 struct entry e;
866
867 mutex_lock(&q->mutex);
868 while (true) {
869 if (queue_len(&printqueue) > 0) {
870 e = queue_pop_from_front(q);
871 break;
872 }
873 queue_cond_wait(q);
874 }
875 mutex_unlock(&q->mutex);
876
877 return e;
878}
879
880static const struct policy *policy_from_name(const char *name)
881{
882 const struct policy *p = &policies[0];
883
884 while (p->name != NULL) {
885 if (!strcmp(name, p->name))
886 return p;
887 p++;
888 }
889 return NULL;
890}
891
892static const char *policy_name(int policy)
893{
894 const struct policy *p = &policies[0];
895 static const char *rval = "unknown";
896
897 while (p->name != NULL) {
898 if (p->policy == policy)
899 return p->name;
900 p++;
901 }
902 return rval;
903}
904
905static bool is_relevant_tracer(const char *name)
906{
907 unsigned int i;
908
909 for (i = 0; relevant_tracers[i]; i++)
910 if (!strcmp(name, relevant_tracers[i]))
911 return true;
912 return false;
913}
914
915static bool random_makes_sense(const char *name)
916{
917 unsigned int i;
918
919 for (i = 0; random_tracers[i]; i++)
920 if (!strcmp(name, random_tracers[i]))
921 return true;
922 return false;
923}
924
925static void show_available(void)
926{
927 char **tracers;
928 int found = 0;
929 int i;
930
931 tracers = tracefs_tracers(NULL);
932 for (i = 0; tracers && tracers[i]; i++) {
933 if (is_relevant_tracer(tracers[i]))
934 found++;
935 }
936
937 if (!tracers) {
938 warnx(no_tracer_msg);
939 return;
940 }
941
942 if (!found) {
943 warnx(no_latency_tr_msg);
944 tracefs_list_free(tracers);
945 return;
946 }
947
948 printf("The following latency tracers are available on your system:\n");
949 for (i = 0; tracers[i]; i++) {
950 if (is_relevant_tracer(tracers[i]))
951 printf("%s\n", tracers[i]);
952 }
953 tracefs_list_free(tracers);
954}
955
956static bool tracer_valid(const char *name, bool *notracer)
957{
958 char **tracers;
959 int i;
960 bool rval = false;
961
962 *notracer = false;
963 tracers = tracefs_tracers(NULL);
964 if (!tracers) {
965 *notracer = true;
966 return false;
967 }
968 for (i = 0; tracers[i]; i++)
969 if (!strcmp(tracers[i], name)) {
970 rval = true;
971 break;
972 }
973 tracefs_list_free(tracers);
974 return rval;
975}
976
977static const char *find_default_tracer(void)
978{
979 int i;
980 bool notracer;
981 bool valid;
982
983 for (i = 0; relevant_tracers[i]; i++) {
984 valid = tracer_valid(relevant_tracers[i], &notracer);
985 if (notracer)
986 errx(EXIT_FAILURE, no_tracer_msg);
987 if (valid)
988 return relevant_tracers[i];
989 }
990 return NULL;
991}
992
993static bool toss_coin(struct drand48_data *buffer, unsigned int prob)
994{
995 long r;
996
997 if (unlikely(lrand48_r(buffer, &r))) {
998 warnx("lrand48_r() failed");
999 cleanup_exit(EXIT_FAILURE);
1000 }
1001 r = r % 1000L;
1002 if (r < prob)
1003 return true;
1004 else
1005 return false;
1006}
1007
1008
1009static long go_to_sleep(const struct entry *req)
1010{
1011 struct timespec future;
1012 long delay = sleep_time;
1013
1014 get_time_in_future(&future, delay);
1015
1016 mutex_lock(&printstate.mutex);
1017 while (!printstate_has_new_req_arrived(req)) {
1018 cond_timedwait(&printstate.cond, &printstate.mutex, &future);
1019 if (time_has_passed(&future))
1020 break;
421d9d1b 1021 }
e23db805
VR
1022
1023 if (printstate_has_new_req_arrived(req))
1024 delay = -1;
1025 mutex_unlock(&printstate.mutex);
1026
1027 return delay;
1028}
1029
1030
1031static void set_priority(void)
1032{
1033 int r;
1034 pid_t pid;
1035 struct sched_param param;
1036
1037 memset(&param, 0, sizeof(param));
1038 param.sched_priority = sched_pri;
1039
1040 pid = getpid();
1041 r = sched_setscheduler(pid, sched_policy, &param);
1042
1043 if (r != 0)
1044 err(EXIT_FAILURE, "sched_setscheduler() failed");
1045}
1046
1047pid_t latency_collector_gettid(void)
1048{
1049 return (pid_t) syscall(__NR_gettid);
1050}
1051
1052static void print_priority(void)
1053{
1054 pid_t tid;
1055 int policy;
1056 int r;
1057 struct sched_param param;
1058
1059 tid = latency_collector_gettid();
1060 r = pthread_getschedparam(pthread_self(), &policy, &param);
1061 if (r != 0) {
1062 warn("pthread_getschedparam() failed");
1063 cleanup_exit(EXIT_FAILURE);
1064 }
1065 mutex_lock(&print_mtx);
1066 printf("Thread %d runs with scheduling policy %s and priority %d\n",
1067 tid, policy_name(policy), param.sched_priority);
1068 mutex_unlock(&print_mtx);
1069}
1070
1071static __always_inline
1072void __print_skipmessage(const struct short_msg *resize_msg,
1073 const struct timespec *timestamp, char *buffer,
1074 size_t bufspace, const struct entry *req, bool excuse,
1075 const char *str)
1076{
1077 ssize_t bytes = 0;
1078 char *p = &buffer[0];
1079 long us, sec;
1080 int r;
1081
1082 sec = timestamp->tv_sec;
1083 us = timestamp->tv_nsec / 1000;
1084
1085 if (resize_msg != NULL && resize_msg->len > 0) {
1086 strncpy(p, resize_msg->buf, resize_msg->len);
1087 bytes += resize_msg->len;
1088 p += resize_msg->len;
1089 bufspace -= resize_msg->len;
1090 }
1091
1092 if (excuse)
1093 r = snprintf(p, bufspace,
1094"%ld.%06ld Latency %d printout skipped due to %s\n",
1095 sec, us, req->ticket, str);
1096 else
1097 r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n",
1098 sec, us, req->ticket);
1099
1100 if (r < 0)
1101 warn("snprintf() failed");
1102 else
1103 bytes += r;
1104
1105 /* These prints could happen concurrently */
1106 mutex_lock(&print_mtx);
1107 write_or_die(fd_stdout, buffer, bytes);
1108 mutex_unlock(&print_mtx);
1109}
1110
1111static void print_skipmessage(const struct short_msg *resize_msg,
1112 const struct timespec *timestamp, char *buffer,
1113 size_t bufspace, const struct entry *req,
1114 bool excuse)
1115{
1116 __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req,
1117 excuse, "random delay");
1118}
1119
1120static void print_lostmessage(const struct timespec *timestamp, char *buffer,
1121 size_t bufspace, const struct entry *req,
1122 const char *reason)
1123{
1124 __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true,
1125 reason);
1126}
1127
1128static void print_tracefile(const struct short_msg *resize_msg,
1129 const struct timespec *timestamp, char *buffer,
1130 size_t bufspace, long slept,
1131 const struct entry *req)
1132{
1133 static const int reserve = 256;
1134 char *p = &buffer[0];
1135 ssize_t bytes = 0;
1136 ssize_t bytes_tot = 0;
1137 long us, sec;
1138 long slept_ms;
1139 int trace_fd;
1140
1141 /* Save some space for the final string and final null char */
1142 bufspace = bufspace - reserve - 1;
1143
1144 if (resize_msg != NULL && resize_msg->len > 0) {
1145 bytes = resize_msg->len;
1146 strncpy(p, resize_msg->buf, bytes);
1147 bytes_tot += bytes;
1148 p += bytes;
1149 bufspace -= bytes;
1150 }
1151
1152 trace_fd = open(debug_tracefile, O_RDONLY);
1153
1154 if (trace_fd < 0) {
1155 warn("open() failed on %s", debug_tracefile);
1156 return;
1157 }
1158
1159 sec = timestamp->tv_sec;
1160 us = timestamp->tv_nsec / 1000;
1161
1162 if (slept != 0) {
1163 slept_ms = slept / 1000;
1164 bytes = snprintf(p, bufspace,
1165"%ld.%06ld Latency %d randomly sleep for %ld ms before print\n",
1166 sec, us, req->ticket, slept_ms);
1167 } else {
1168 bytes = snprintf(p, bufspace,
1169 "%ld.%06ld Latency %d immediate print\n", sec,
1170 us, req->ticket);
1171 }
1172
1173 if (bytes < 0) {
1174 warn("snprintf() failed");
1175 return;
1176 }
1177 p += bytes;
1178 bufspace -= bytes;
1179 bytes_tot += bytes;
1180
1181 bytes = snprintf(p, bufspace,
1182">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1183 );
1184
1185 if (bytes < 0) {
1186 warn("snprintf() failed");
1187 return;
1188 }
1189
1190 p += bytes;
1191 bufspace -= bytes;
1192 bytes_tot += bytes;
1193
1194 do {
1195 bytes = read(trace_fd, p, bufspace);
1196 if (bytes < 0) {
1197 if (errno == EINTR)
1198 continue;
1199 warn("read() failed on %s", debug_tracefile);
1200 if (unlikely(close(trace_fd) != 0))
1201 warn("close() failed on %s", debug_tracefile);
1202 return;
1203 }
1204 if (bytes == 0)
1205 break;
1206 p += bytes;
1207 bufspace -= bytes;
1208 bytes_tot += bytes;
1209 } while (true);
1210
1211 if (unlikely(close(trace_fd) != 0))
1212 warn("close() failed on %s", debug_tracefile);
1213
1214 printstate_cnt_dec();
1215 /* Add the reserve space back to the budget for the final string */
1216 bufspace += reserve;
1217
1218 bytes = snprintf(p, bufspace,
1219 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n");
1220
1221 if (bytes < 0) {
1222 warn("snprintf() failed");
1223 return;
1224 }
1225
1226 bytes_tot += bytes;
1227
1228 /* These prints could happen concurrently */
1229 mutex_lock(&print_mtx);
1230 write_or_die(fd_stdout, buffer, bytes_tot);
1231 mutex_unlock(&print_mtx);
1232}
1233
1234static char *get_no_opt(const char *opt)
1235{
1236 char *no_opt;
1237 int s;
1238
1239 s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1;
1240 /* We may be called from cleanup_exit() via set_trace_opt() */
1241 no_opt = malloc_or_die_nocleanup(s);
1242 strcpy(no_opt, OPT_NO_PREFIX);
1243 strcat(no_opt, opt);
1244 return no_opt;
1245}
1246
1247static char *find_next_optstr(const char *allopt, const char **next)
1248{
1249 const char *begin;
1250 const char *end;
1251 char *r;
1252 int s = 0;
1253
1254 if (allopt == NULL)
1255 return NULL;
1256
1257 for (begin = allopt; *begin != '\0'; begin++) {
1258 if (isgraph(*begin))
1259 break;
1260 }
1261
1262 if (*begin == '\0')
1263 return NULL;
1264
1265 for (end = begin; *end != '\0' && isgraph(*end); end++)
1266 s++;
1267
1268 r = malloc_or_die_nocleanup(s + 1);
1269 strncpy(r, begin, s);
1270 r[s] = '\0';
1271 *next = begin + s;
1272 return r;
1273}
1274
1275static bool get_trace_opt(const char *allopt, const char *opt, bool *found)
1276{
1277 *found = false;
1278 char *no_opt;
1279 char *str;
1280 const char *next = allopt;
1281 bool rval = false;
1282
1283 no_opt = get_no_opt(opt);
1284
1285 do {
1286 str = find_next_optstr(next, &next);
1287 if (str == NULL)
1288 break;
1289 if (!strcmp(str, opt)) {
1290 *found = true;
1291 rval = true;
1292 free(str);
1293 break;
1294 }
1295 if (!strcmp(str, no_opt)) {
1296 *found = true;
1297 rval = false;
1298 free(str);
1299 break;
1300 }
1301 free(str);
1302 } while (true);
1303 free(no_opt);
1304
1305 return rval;
1306}
1307
1308static int set_trace_opt(const char *opt, bool value)
1309{
1310 char *str;
1311 int r;
1312
1313 if (value)
1314 str = strdup(opt);
1315 else
1316 str = get_no_opt(opt);
1317
1318 r = tracefs_instance_file_write(NULL, TR_OPTIONS, str);
1319 free(str);
1320 return r;
1321}
1322
1323void save_trace_opts(struct ftrace_state *state)
1324{
1325 char *allopt;
1326 int psize;
1327 int i;
1328
1329 allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize);
1330 if (!allopt)
1331 errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS);
1332
1333 for (i = 0; i < OPTIDX_NR; i++)
1334 state->opt[i] = get_trace_opt(allopt, optstr[i],
1335 &state->opt_valid[i]);
1336
1337 free(allopt);
1338}
1339
1340static void write_file(const char *file, const char *cur, const char *new,
1341 enum errhandling h)
1342{
1343 int r;
1344 static const char *emsg = "Failed to write to the %s file!";
1345
1346 /* Do nothing if we now that the current and new value are equal */
1347 if (cur && !needs_change(cur, new))
1348 return;
1349
1350 r = tracefs_instance_file_write(NULL, file, new);
1351 if (r < 0) {
1352 if (h) {
1353 warnx(emsg, file);
1354 if (h == ERR_CLEANUP)
1355 cleanup_exit(EXIT_FAILURE);
1356 } else
1357 errx(EXIT_FAILURE, emsg, file);
1358 }
1359 if (verbose_ftrace()) {
1360 mutex_lock(&print_mtx);
1361 printf("%s was set to %s\n", file, new);
1362 mutex_unlock(&print_mtx);
1363 }
1364}
1365
1366static void reset_max_latency(void)
1367{
1368 write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP);
1369}
1370
1371static void save_and_disable_tracer(void)
1372{
1373 char *orig_th;
1374 char *tracer;
1375 bool need_nop = false;
1376
1377 mutex_lock(&save_state.mutex);
1378
1379 save_trace_opts(&save_state);
1380 tracer = read_file(TR_CURRENT, ERR_EXIT);
1381 orig_th = read_file(TR_THRESH, ERR_EXIT);
1382
1383 if (needs_change(tracer, NOP_TRACER)) {
1384 mutex_lock(&print_mtx);
1385 if (force_tracer) {
1386 printf(
1387 "The %s tracer is already in use but proceeding anyway!\n",
1388 tracer);
1389 } else {
1390 printf(
1391 "The %s tracer is already in use, cowardly bailing out!\n"
1392 "This could indicate that another program or instance is tracing.\n"
1393 "Use the -F [--force] option to disregard the current tracer.\n", tracer);
1394 exit(0);
1395 }
1396 mutex_unlock(&print_mtx);
1397 need_nop = true;
1398 }
1399
1400 save_state.tracer = tracer;
1401 save_state.thresh = orig_th;
1402
1403 if (need_nop)
1404 write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT);
1405
1406 mutex_unlock(&save_state.mutex);
1407}
1408
1409void set_trace_opts(struct ftrace_state *state, bool *new)
1410{
1411 int i;
1412 int r;
1413
1414 /*
1415 * We only set options if we earlier detected that the option exists in
1416 * the trace_options file and that the wanted setting is different from
1417 * the one we saw in save_and_disable_tracer()
1418 */
1419 for (i = 0; i < OPTIDX_NR; i++)
1420 if (state->opt_valid[i] &&
1421 state->opt[i] != new[i]) {
1422 r = set_trace_opt(optstr[i], new[i]);
1423 if (r < 0) {
1424 warnx("Failed to set the %s option to %s",
1425 optstr[i], bool2str(new[i]));
1426 cleanup_exit(EXIT_FAILURE);
1427 }
1428 if (verbose_ftrace()) {
1429 mutex_lock(&print_mtx);
1430 printf("%s in %s was set to %s\n", optstr[i],
1431 TR_OPTIONS, bool2str(new[i]));
1432 mutex_unlock(&print_mtx);
1433 }
1434 }
1435}
1436
1437static void enable_tracer(void)
1438{
1439 mutex_lock(&save_state.mutex);
1440 set_trace_opts(&save_state, use_options);
1441
1442 write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP);
1443 write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP);
1444
1445 mutex_unlock(&save_state.mutex);
1446}
1447
1448static void tracing_loop(void)
1449{
1450 int ifd = inotify_init();
1451 int wd;
1452 const ssize_t bufsize = sizeof(inotify_buffer);
1453 const ssize_t istructsize = sizeof(struct inotify_event);
1454 char *buf = &inotify_buffer[0];
1455 ssize_t nr_read;
1456 char *p;
1457 int modified;
1458 struct inotify_event *event;
1459 struct entry req;
1460 char *buffer;
1461 const size_t bufspace = PRINT_BUFFER_SIZE;
1462 struct timespec timestamp;
1463
1464 print_priority();
1465
1466 buffer = malloc_or_die(bufspace);
1467
1468 if (ifd < 0)
1469 err(EXIT_FAILURE, "inotify_init() failed!");
1470
1471
1472 if (setup_ftrace) {
1473 /*
1474 * We must disable the tracer before resetting the max_latency
1475 */
1476 save_and_disable_tracer();
1477 /*
1478 * We must reset the max_latency before the inotify_add_watch()
1479 * call.
1480 */
1481 reset_max_latency();
1482 }
1483
1484 wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY);
1485 if (wd < 0)
1486 err(EXIT_FAILURE, "inotify_add_watch() failed!");
1487
1488 if (setup_ftrace)
1489 enable_tracer();
1490
1491 signal_blocking(SIG_UNBLOCK);
1492
1493 while (true) {
1494 modified = 0;
1495 check_signals();
1496 nr_read = read(ifd, buf, bufsize);
1497 check_signals();
1498 if (nr_read < 0) {
1499 if (errno == EINTR)
1500 continue;
1501 warn("read() failed on inotify fd!");
1502 cleanup_exit(EXIT_FAILURE);
1503 }
1504 if (nr_read == bufsize)
1505 warnx("inotify() buffer filled, skipping events");
1506 if (nr_read < istructsize) {
1507 warnx("read() returned too few bytes on inotify fd");
1508 cleanup_exit(EXIT_FAILURE);
1509 }
1510
1511 for (p = buf; p < buf + nr_read;) {
1512 event = (struct inotify_event *) p;
1513 if ((event->mask & IN_MODIFY) != 0)
1514 modified++;
1515 p += istructsize + event->len;
1516 }
1517 while (modified > 0) {
1518 check_signals();
1519 mutex_lock(&printstate.mutex);
1520 check_signals();
1521 printstate_next_ticket(&req);
1522 if (printstate_cnt_read() > 0) {
1523 printstate_mark_req_completed(&req);
1524 mutex_unlock(&printstate.mutex);
1525 if (verbose_lostevent()) {
1526 clock_gettime_or_die(CLOCK_MONOTONIC,
1527 &timestamp);
1528 print_lostmessage(&timestamp, buffer,
1529 bufspace, &req,
1530 "inotify loop");
1531 }
1532 break;
1533 }
1534 mutex_unlock(&printstate.mutex);
1535 if (queue_try_to_add_entry(&printqueue, &req) != 0) {
1536 /* These prints could happen concurrently */
1537 check_signals();
1538 mutex_lock(&print_mtx);
1539 check_signals();
1540 write_or_die(fd_stdout, queue_full_warning,
f604de20 1541 strlen(queue_full_warning));
e23db805
VR
1542 mutex_unlock(&print_mtx);
1543 }
1544 modified--;
1545 }
1546 }
1547}
1548
1549static void *do_printloop(void *arg)
1550{
1551 const size_t bufspace = PRINT_BUFFER_SIZE;
1552 char *buffer;
1553 long *rseed = (long *) arg;
1554 struct drand48_data drandbuf;
1555 long slept = 0;
1556 struct entry req;
1557 int prob = 0;
1558 struct timespec timestamp;
1559 struct short_msg resize_msg;
1560
1561 print_priority();
1562
1563 if (srand48_r(*rseed, &drandbuf) != 0) {
1564 warn("srand48_r() failed!\n");
1565 cleanup_exit(EXIT_FAILURE);
1566 }
1567
1568 buffer = malloc_or_die(bufspace);
1569
1570 while (true) {
1571 req = queue_wait_for_entry(&printqueue);
1572 clock_gettime_or_die(CLOCK_MONOTONIC, &timestamp);
1573 mutex_lock(&printstate.mutex);
1574 if (prev_req_won_race(&req)) {
1575 printstate_mark_req_completed(&req);
1576 mutex_unlock(&printstate.mutex);
1577 if (verbose_lostevent())
1578 print_lostmessage(&timestamp, buffer, bufspace,
1579 &req, "print loop");
1580 continue;
1581 }
1582 mutex_unlock(&printstate.mutex);
1583
1584 /*
1585 * Toss a coin to decide if we want to sleep before printing
1586 * out the backtrace. The reason for this is that opening
2455f0e1 1587 * /sys/kernel/tracing/trace will cause a blackout of
e23db805
VR
1588 * hundreds of ms, where no latencies will be noted by the
1589 * latency tracer. Thus by randomly sleeping we try to avoid
1590 * missing traces systematically due to this. With this option
1591 * we will sometimes get the first latency, some other times
1592 * some of the later ones, in case of closely spaced traces.
1593 */
1594 if (trace_enable && use_random_sleep) {
1595 slept = 0;
1596 prob = table_get_probability(&req, &resize_msg);
1597 if (!toss_coin(&drandbuf, prob))
1598 slept = go_to_sleep(&req);
1599 if (slept >= 0) {
1600 /* A print is ongoing */
1601 printstate_cnt_inc();
1602 /*
1603 * We will do the printout below so we have to
1604 * mark it as completed while we still have the
1605 * mutex.
1606 */
1607 mutex_lock(&printstate.mutex);
1608 printstate_mark_req_completed(&req);
1609 mutex_unlock(&printstate.mutex);
1610 }
1611 }
1612 if (trace_enable) {
1613 /*
1614 * slept < 0 means that we detected another
1615 * notification in go_to_sleep() above
1616 */
1617 if (slept >= 0)
1618 /*
1619 * N.B. printstate_cnt_dec(); will be called
1620 * inside print_tracefile()
1621 */
1622 print_tracefile(&resize_msg, &timestamp, buffer,
1623 bufspace, slept, &req);
1624 else
1625 print_skipmessage(&resize_msg, &timestamp,
1626 buffer, bufspace, &req, true);
1627 } else {
1628 print_skipmessage(&resize_msg, &timestamp, buffer,
1629 bufspace, &req, false);
1630 }
1631 }
1632 return NULL;
1633}
1634
1635static void start_printthread(void)
1636{
1637 unsigned int i;
1638 long *seed;
1639 int ufd;
1640
1641 ufd = open(DEV_URANDOM, O_RDONLY);
1642 if (nr_threads > MAX_THREADS) {
1643 warnx(
1644"Number of requested print threads was %d, max number is %d\n",
1645 nr_threads, MAX_THREADS);
1646 nr_threads = MAX_THREADS;
1647 }
1648 for (i = 0; i < nr_threads; i++) {
1649 seed = malloc_or_die(sizeof(*seed));
1650 if (ufd < 0 ||
1651 read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
1652 printf(
c1d96fa6 1653"Warning! Using trivial random number seed, since %s not available\n",
e23db805
VR
1654 DEV_URANDOM);
1655 fflush(stdout);
1656 *seed = i;
1657 }
1658 errno = pthread_create(&printthread[i], NULL, do_printloop,
1659 seed);
1660 if (errno)
1661 err(EXIT_FAILURE, "pthread_create()");
1662 }
1663 if (ufd > 0 && close(ufd) != 0)
1664 warn("close() failed");
1665}
1666
1667static void show_usage(void)
1668{
1669 printf(
1670"Usage: %s [OPTION]...\n\n"
1671"Collect closely occurring latencies from %s\n"
1672"with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, "
1673"wakeup,\nwakeup_dl, or wakeup_rt.\n\n"
1674
1675"The occurrence of a latency is detected by monitoring the file\n"
1676"%s with inotify.\n\n"
1677
1678"The following options are supported:\n\n"
1679
1680"-l, --list\t\tList the latency tracers that are supported by the\n"
1681"\t\t\tcurrently running Linux kernel. If you don't see the\n"
1682"\t\t\ttracer that you want, you will probably need to\n"
1683"\t\t\tchange your kernel config and build a new kernel.\n\n"
1684
1685"-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n"
1686"\t\t\ttracer that is supported by the kernel in the following\n"
1687"\t\t\torder of precedence:\n\n"
1688"\t\t\tpreemptirqsoff\n"
1689"\t\t\tpreemptoff\n"
1690"\t\t\tirqsoff\n"
1691"\t\t\twakeup\n"
1692"\t\t\twakeup_rt\n"
1693"\t\t\twakeup_dl\n"
1694"\n"
1695"\t\t\tIf TR is not on the list above, then a warning will be\n"
1696"\t\t\tprinted.\n\n"
1697
1698"-F, --force\t\tProceed even if another ftrace tracer is active. Without\n"
1699"\t\t\tthis option, the program will refuse to start tracing if\n"
1700"\t\t\tany other tracer than the nop tracer is active.\n\n"
1701
1702"-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n"
1703"\t\t\tfor the tracer. The default is 0, which means that\n"
1704"\t\t\ttracing_max_latency will be used. tracing_max_latency is\n"
1705"\t\t\tset to 0 when the program is started and contains the\n"
1706"\t\t\tmaximum of the latencies that have been encountered.\n\n"
1707
1708"-f, --function\t\tEnable the function-trace option in trace_options. With\n"
1709"\t\t\tthis option, ftrace will trace the functions that are\n"
1710"\t\t\texecuted during a latency, without it we only get the\n"
1711"\t\t\tbeginning, end, and backtrace.\n\n"
1712
1713"-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
c1d96fa6
CIK
1714"\t\t\toption causes ftrace to show the graph of how functions\n"
1715"\t\t\tare calling other functions.\n\n"
e23db805
VR
1716
1717"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
1718"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
1719"\t\t\tusing rr or fifo, remember that these policies may cause\n"
1720"\t\t\tother tasks to experience latencies.\n\n"
1721
1722"-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n"
1723"\t\t\tof PRI depends on the scheduling policy.\n\n"
1724
1725"-n, --notrace\t\tIf latency is detected, do not print out the content of\n"
1726"\t\t\tthe trace file to standard output\n\n"
1727
1728"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
1729
1730"-r, --random\t\tArbitrarily sleep a certain amount of time, default\n"
1731"\t\t\t%ld ms, before reading the trace file. The\n"
1732"\t\t\tprobabilities for sleep are chosen so that the\n"
1733"\t\t\tprobability of obtaining any of a cluster of closely\n"
1734"\t\t\toccurring latencies are equal, i.e. we will randomly\n"
1735"\t\t\tchoose which one we collect from the trace file.\n\n"
1736"\t\t\tThis option is probably only useful with the irqsoff,\n"
1737"\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n"
1738
1739"-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n"
1740"\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n"
1741"\t\t\tlatencies are detected during a run, this value will\n"
1742"\t\t\tautomatically be increased to NRLAT + 1 and then to\n"
1743"\t\t\tNRLAT + 2 and so on. The default is %d. This option\n"
1744"\t\t\timplies -r. We need to know this number in order to\n"
1745"\t\t\tbe able to calculate the probabilities of sleeping.\n"
1746"\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n"
1747"\t\t\tdo an immediate printout will be:\n\n"
1748"\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n"
1749"\t\t\tThe probability of sleeping will be:\n\n"
1750"\t\t\t1 - P, where P is from the series above\n\n"
1751"\t\t\tThis descending probability will cause us to choose\n"
1752"\t\t\tan occurrence at random. Observe that the final\n"
1753"\t\t\tprobability is 0, it is when we reach this probability\n"
1754"\t\t\tthat we increase NRLAT automatically. As an example,\n"
1755"\t\t\twith the default value of 2, the probabilities will be:\n\n"
1756"\t\t\t1/2 0\n\n"
1757"\t\t\tThis means, when a latency is detected we will sleep\n"
1758"\t\t\twith 50%% probability. If we ever detect another latency\n"
1759"\t\t\tduring the sleep period, then the probability of sleep\n"
1760"\t\t\twill be 0%% and the table will be expanded to:\n\n"
1761"\t\t\t1/3 1/2 0\n\n"
1762
1763"-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n"
1764"\t\t\tthen print a message every time that the NRLAT value\n"
1765"\t\t\tis automatically increased. It also causes a message to\n"
1766"\t\t\tbe printed when the ftrace settings are changed. If this\n"
1767"\t\t\toption is given at least twice, then also print a\n"
1768"\t\t\twarning for lost events.\n\n"
1769
1770"-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n"
1771"\t\t\tprinting out the trace from the trace file. The default\n"
1772"\t\t\tis %ld ms. This option implies -r.\n\n"
1773
1774"-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n"
1775"\t\t\tconfigures the ftrace files in sysfs such as\n"
1776"\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n"
1777
1778"-i, --tracefile FILE\tUse FILE as trace file. The default is\n"
1779"\t\t\t%s.\n"
1780"\t\t\tThis options implies -x\n\n"
1781
1782"-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n"
1783"\t\t\t%s.\n"
1784"\t\t\tThis options implies -x\n\n"
1785,
1786prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS,
1787SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT,
1788debug_tracefile_dflt, debug_maxlat_dflt);
1789}
1790
1791static void find_tracefiles(void)
1792{
1793 debug_tracefile_dflt = tracefs_get_tracing_file("trace");
1794 if (debug_tracefile_dflt == NULL) {
1795 /* This is needed in show_usage() */
1796 debug_tracefile_dflt = DEBUG_NOFILE;
1797 }
1798
1799 debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency");
1800 if (debug_maxlat_dflt == NULL) {
1801 /* This is needed in show_usage() */
1802 debug_maxlat_dflt = DEBUG_NOFILE;
1803 }
1804
1805 debug_tracefile = debug_tracefile_dflt;
1806 debug_maxlat = debug_maxlat_dflt;
1807}
1808
1809bool alldigits(const char *s)
1810{
1811 for (; *s != '\0'; s++)
1812 if (!isdigit(*s))
1813 return false;
1814 return true;
1815}
1816
1817void check_alldigits(const char *optarg, const char *argname)
1818{
1819 if (!alldigits(optarg))
1820 errx(EXIT_FAILURE,
1821 "The %s parameter expects a decimal argument\n", argname);
1822}
1823
1824static void scan_arguments(int argc, char *argv[])
1825{
1826 int c;
1827 int i;
1828 int option_idx = 0;
1829
1830 static struct option long_options[] = {
1831 { "list", no_argument, 0, 'l' },
1832 { "tracer", required_argument, 0, 't' },
1833 { "force", no_argument, 0, 'F' },
1834 { "threshold", required_argument, 0, 's' },
1835 { "function", no_argument, 0, 'f' },
1836 { "graph", no_argument, 0, 'g' },
1837 { "policy", required_argument, 0, 'c' },
1838 { "priority", required_argument, 0, 'p' },
1839 { "help", no_argument, 0, 'h' },
1840 { "notrace", no_argument, 0, 'n' },
1841 { "random", no_argument, 0, 'r' },
1842 { "nrlat", required_argument, 0, 'a' },
1843 { "threads", required_argument, 0, 'e' },
1844 { "time", required_argument, 0, 'u' },
1845 { "verbose", no_argument, 0, 'v' },
1846 { "no-ftrace", no_argument, 0, 'x' },
1847 { "tracefile", required_argument, 0, 'i' },
1848 { "max-lat", required_argument, 0, 'm' },
1849 { 0, 0, 0, 0 }
1850 };
1851 const struct policy *p;
1852 int max, min;
1853 int value;
1854 bool notracer, valid;
1855
1856 /*
1857 * We must do this before parsing the arguments because show_usage()
1858 * needs to display these.
1859 */
1860 find_tracefiles();
1861
1862 while (true) {
1863 c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:",
1864 long_options, &option_idx);
1865 if (c == -1)
1866 break;
1867
1868 switch (c) {
1869 case 'l':
1870 show_available();
1871 exit(0);
1872 break;
1873 case 't':
1874 current_tracer = strdup(optarg);
1875 if (!is_relevant_tracer(current_tracer)) {
1876 warnx("%s is not a known latency tracer!\n",
1877 current_tracer);
1878 }
1879 valid = tracer_valid(current_tracer, &notracer);
1880 if (notracer)
1881 errx(EXIT_FAILURE, no_tracer_msg);
1882 if (!valid)
1883 errx(EXIT_FAILURE,
1884"The tracer %s is not supported by your kernel!\n", current_tracer);
1885 break;
1886 case 'F':
1887 force_tracer = true;
1888 break;
1889 case 's':
1890 check_alldigits(optarg, "-s [--threshold]");
1891 threshold = strdup(optarg);
1892 break;
1893 case 'f':
1894 use_options[OPTIDX_FUNC_TR] = true;
1895 break;
1896 case 'g':
1897 use_options[OPTIDX_DISP_GR] = true;
1898 break;
1899 case 'c':
1900 p = policy_from_name(optarg);
1901 if (p != NULL) {
1902 sched_policy = p->policy;
1903 sched_policy_set = true;
1904 if (!sched_pri_set) {
1905 sched_pri = p->default_pri;
1906 sched_pri_set = true;
1907 }
1908 } else {
1909 warnx("Unknown scheduling %s\n", optarg);
1910 show_usage();
1911 exit(0);
1912 }
1913 break;
1914 case 'p':
1915 check_alldigits(optarg, "-p [--priority]");
1916 sched_pri = atoi(optarg);
1917 sched_pri_set = true;
1918 break;
1919 case 'h':
1920 show_usage();
1921 exit(0);
1922 break;
1923 case 'n':
1924 trace_enable = false;
1925 use_random_sleep = false;
1926 break;
1927 case 'e':
1928 check_alldigits(optarg, "-e [--threads]");
1929 value = atoi(optarg);
1930 if (value > 0)
1931 nr_threads = value;
1932 else {
1933 warnx("NRTHR must be > 0\n");
1934 show_usage();
1935 exit(0);
1936 }
1937 break;
1938 case 'u':
1939 check_alldigits(optarg, "-u [--time]");
1940 value = atoi(optarg);
1941 if (value < 0) {
1942 warnx("TIME must be >= 0\n");
1943 show_usage();
421d9d1b 1944 exit(0);
e23db805
VR
1945 }
1946 trace_enable = true;
1947 use_random_sleep = true;
1948 sleep_time = value * USEC_PER_MSEC;
1949 break;
1950 case 'v':
1951 verbosity++;
1952 break;
1953 case 'r':
1954 trace_enable = true;
1955 use_random_sleep = true;
1956 break;
1957 case 'a':
1958 check_alldigits(optarg, "-a [--nrlat]");
1959 value = atoi(optarg);
1960 if (value <= 0) {
1961 warnx("NRLAT must be > 0\n");
1962 show_usage();
1963 exit(0);
1964 }
1965 trace_enable = true;
1966 use_random_sleep = true;
1967 table_startsize = value;
1968 break;
1969 case 'x':
1970 setup_ftrace = false;
1971 break;
1972 case 'i':
1973 setup_ftrace = false;
1974 debug_tracefile = strdup(optarg);
1975 break;
1976 case 'm':
1977 setup_ftrace = false;
1978 debug_maxlat = strdup(optarg);
1979 break;
1980 default:
1981 show_usage();
1982 exit(0);
1983 break;
1984 }
1985 }
1986
1987 if (setup_ftrace) {
1988 if (!current_tracer) {
1989 current_tracer = find_default_tracer();
1990 if (!current_tracer)
1991 errx(EXIT_FAILURE,
1992"No default tracer found and tracer not specified\n");
1993 }
1994
1995 if (use_random_sleep && !random_makes_sense(current_tracer)) {
1996 warnx("WARNING: The tracer is %s and random sleep has",
1997 current_tracer);
1998 fprintf(stderr,
1999"been enabled. Random sleep is intended for the following tracers:\n");
2000 for (i = 0; random_tracers[i]; i++)
2001 fprintf(stderr, "%s\n", random_tracers[i]);
2002 fprintf(stderr, "\n");
2003 }
2004 }
2005
2006 if (debug_tracefile == DEBUG_NOFILE ||
2007 debug_maxlat == DEBUG_NOFILE)
2008 errx(EXIT_FAILURE,
2009"Could not find tracing directory e.g. /sys/kernel/tracing\n");
2010
2011 if (!sched_policy_set) {
2012 sched_policy = SCHED_RR;
2013 sched_policy_set = true;
2014 if (!sched_pri_set) {
2015 sched_pri = RT_DEFAULT_PRI;
2016 sched_pri_set = true;
2017 }
2018 }
2019
2020 max = sched_get_priority_max(sched_policy);
2021 min = sched_get_priority_min(sched_policy);
2022
2023 if (sched_pri < min) {
2024 printf(
2025"ATTENTION: Increasing priority to minimum, which is %d\n", min);
2026 sched_pri = min;
2027 }
2028 if (sched_pri > max) {
2029 printf(
2030"ATTENTION: Reducing priority to maximum, which is %d\n", max);
2031 sched_pri = max;
2032 }
2033}
2034
2035static void show_params(void)
2036{
2037 printf(
2038"\n"
2039"Running with scheduling policy %s and priority %d. Using %d print threads.\n",
2040 policy_name(sched_policy), sched_pri, nr_threads);
2041 if (trace_enable) {
2042 if (use_random_sleep) {
2043 printf(
2044"%s will be printed with random delay\n"
2045"Start size of the probability table:\t\t\t%d\n"
2046"Print a message when the prob. table changes size:\t%s\n"
2047"Print a warning when an event has been lost:\t\t%s\n"
2048"Sleep time is:\t\t\t\t\t\t%ld ms\n",
2049debug_tracefile,
2050table_startsize,
2051bool2str(verbose_sizechange()),
2052bool2str(verbose_lostevent()),
2053sleep_time / USEC_PER_MSEC);
2054 } else {
2055 printf("%s will be printed immediately\n",
2056 debug_tracefile);
2057 }
2058 } else {
2059 printf("%s will not be printed\n",
2060 debug_tracefile);
2061 }
2062 if (setup_ftrace) {
2063 printf("Tracer:\t\t\t\t\t\t\t%s\n"
2064 "%s option:\t\t\t\t\t%s\n"
2065 "%s option:\t\t\t\t\t%s\n",
2066 current_tracer,
2067 optstr[OPTIDX_FUNC_TR],
2068 bool2str(use_options[OPTIDX_FUNC_TR]),
2069 optstr[OPTIDX_DISP_GR],
2070 bool2str(use_options[OPTIDX_DISP_GR]));
2071 if (!strcmp(threshold, "0"))
2072 printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n");
2073 else
2074 printf("Threshold:\t\t\t\t\t\t%s\n", threshold);
2075 }
2076 printf("\n");
2077}
2078
2079int main(int argc, char *argv[])
2080{
2081 init_save_state();
2082 signal_blocking(SIG_BLOCK);
2083 setup_sig_handler();
2084 open_stdout();
2085
2086 if (argc >= 1)
2087 prg_name = argv[0];
2088 else
2089 prg_name = prg_unknown;
2090
2091 scan_arguments(argc, argv);
2092 show_params();
2093
2094 init_printstate();
2095 init_print_mtx();
2096 if (use_random_sleep) {
2097 init_probabilities();
2098 if (verbose_sizechange())
2099 printf("Initializing probability table to %d\n",
2100 table_startsize);
2101 sleeptable_resize(table_startsize, false, NULL);
2102 }
2103 set_priority();
2104 init_queue(&printqueue);
2105 start_printthread();
2106 tracing_loop();
2107 return 0;
2108}