4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
40 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
43 #include <linux/futex.h>
44 #include <linux/err.h>
45 #include <linux/seccomp.h>
46 #include <linux/filter.h>
47 #include <linux/audit.h>
48 #include <sys/ptrace.h>
49 #include <linux/random.h>
51 /* For older distros: */
53 # define MAP_STACK 0x20000
57 # define MADV_HWPOISON 100
61 #ifndef MADV_MERGEABLE
62 # define MADV_MERGEABLE 12
65 #ifndef MADV_UNMERGEABLE
66 # define MADV_UNMERGEABLE 13
70 # define EFD_SEMAPHORE 1
74 # define EFD_NONBLOCK 00004000
78 # define EFD_CLOEXEC 02000000
82 # define O_CLOEXEC 02000000
90 # define SOCK_CLOEXEC 02000000
94 # define SOCK_NONBLOCK 00004000
97 #ifndef MSG_CMSG_CLOEXEC
98 # define MSG_CMSG_CLOEXEC 0x40000000
101 #ifndef PERF_FLAG_FD_NO_GROUP
102 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
105 #ifndef PERF_FLAG_FD_OUTPUT
106 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
109 #ifndef PERF_FLAG_PID_CGROUP
110 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
113 #ifndef PERF_FLAG_FD_CLOEXEC
114 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
118 struct perf_tool tool;
119 struct syscalltbl *sctbl;
122 struct syscall *table;
124 struct perf_evsel *sys_enter,
128 struct record_opts opts;
129 struct perf_evlist *evlist;
130 struct machine *host;
131 struct thread *current;
134 unsigned long nr_events;
135 struct strlist *ev_qualifier;
140 struct intlist *tid_list;
141 struct intlist *pid_list;
146 double duration_filter;
152 bool not_ev_qualifier;
156 bool multiple_threads;
160 bool show_tool_stats;
162 bool kernel_syscallchains;
172 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
173 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
177 #define TP_UINT_FIELD(bits) \
178 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
181 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
190 #define TP_UINT_FIELD__SWAPPED(bits) \
191 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
194 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
195 return bswap_##bits(value);\
198 TP_UINT_FIELD__SWAPPED(16);
199 TP_UINT_FIELD__SWAPPED(32);
200 TP_UINT_FIELD__SWAPPED(64);
202 static int tp_field__init_uint(struct tp_field *field,
203 struct format_field *format_field,
206 field->offset = format_field->offset;
208 switch (format_field->size) {
210 field->integer = tp_field__u8;
213 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
216 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
219 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
228 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
230 return sample->raw_data + field->offset;
233 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
235 field->offset = format_field->offset;
236 field->pointer = tp_field__ptr;
243 struct tp_field args, ret;
247 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
248 struct tp_field *field,
251 struct format_field *format_field = perf_evsel__field(evsel, name);
253 if (format_field == NULL)
256 return tp_field__init_uint(field, format_field, evsel->needs_swap);
259 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
260 ({ struct syscall_tp *sc = evsel->priv;\
261 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
263 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
264 struct tp_field *field,
267 struct format_field *format_field = perf_evsel__field(evsel, name);
269 if (format_field == NULL)
272 return tp_field__init_ptr(field, format_field);
275 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
276 ({ struct syscall_tp *sc = evsel->priv;\
277 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
279 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
282 perf_evsel__delete(evsel);
285 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
287 evsel->priv = malloc(sizeof(struct syscall_tp));
288 if (evsel->priv != NULL) {
289 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
292 evsel->handler = handler;
303 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
305 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
307 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
309 evsel = perf_evsel__newtp("syscalls", direction);
314 if (perf_evsel__init_syscall_tp(evsel, handler))
320 perf_evsel__delete_priv(evsel);
324 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
325 ({ struct syscall_tp *fields = evsel->priv; \
326 fields->name.integer(&fields->name, sample); })
328 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
329 ({ struct syscall_tp *fields = evsel->priv; \
330 fields->name.pointer(&fields->name, sample); })
334 struct thread *thread;
344 const char **entries;
347 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
348 .nr_entries = ARRAY_SIZE(array), \
352 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
354 .nr_entries = ARRAY_SIZE(array), \
358 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
360 struct syscall_arg *arg)
362 struct strarray *sa = arg->parm;
363 int idx = arg->val - sa->offset;
365 if (idx < 0 || idx >= sa->nr_entries)
366 return scnprintf(bf, size, intfmt, arg->val);
368 return scnprintf(bf, size, "%s", sa->entries[idx]);
371 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
372 struct syscall_arg *arg)
374 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
377 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
379 #if defined(__i386__) || defined(__x86_64__)
381 * FIXME: Make this available to all arches as soon as the ioctl beautifier
382 * gets rewritten to support all arches.
384 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
385 struct syscall_arg *arg)
387 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
390 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
391 #endif /* defined(__i386__) || defined(__x86_64__) */
393 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
394 struct syscall_arg *arg);
396 #define SCA_FD syscall_arg__scnprintf_fd
398 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
399 struct syscall_arg *arg)
404 return scnprintf(bf, size, "CWD");
406 return syscall_arg__scnprintf_fd(bf, size, arg);
409 #define SCA_FDAT syscall_arg__scnprintf_fd_at
411 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
412 struct syscall_arg *arg);
414 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
416 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
417 struct syscall_arg *arg)
419 return scnprintf(bf, size, "%#lx", arg->val);
422 #define SCA_HEX syscall_arg__scnprintf_hex
424 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
425 struct syscall_arg *arg)
427 return scnprintf(bf, size, "%d", arg->val);
430 #define SCA_INT syscall_arg__scnprintf_int
432 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
433 struct syscall_arg *arg)
435 int printed = 0, prot = arg->val;
437 if (prot == PROT_NONE)
438 return scnprintf(bf, size, "NONE");
439 #define P_MMAP_PROT(n) \
440 if (prot & PROT_##n) { \
441 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
451 P_MMAP_PROT(GROWSDOWN);
452 P_MMAP_PROT(GROWSUP);
456 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
461 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
463 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
464 struct syscall_arg *arg)
466 int printed = 0, flags = arg->val;
468 #define P_MMAP_FLAG(n) \
469 if (flags & MAP_##n) { \
470 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
475 P_MMAP_FLAG(PRIVATE);
479 P_MMAP_FLAG(ANONYMOUS);
480 P_MMAP_FLAG(DENYWRITE);
481 P_MMAP_FLAG(EXECUTABLE);
484 P_MMAP_FLAG(GROWSDOWN);
486 P_MMAP_FLAG(HUGETLB);
489 P_MMAP_FLAG(NONBLOCK);
490 P_MMAP_FLAG(NORESERVE);
491 P_MMAP_FLAG(POPULATE);
493 #ifdef MAP_UNINITIALIZED
494 P_MMAP_FLAG(UNINITIALIZED);
499 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
504 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
506 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
507 struct syscall_arg *arg)
509 int printed = 0, flags = arg->val;
511 #define P_MREMAP_FLAG(n) \
512 if (flags & MREMAP_##n) { \
513 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
514 flags &= ~MREMAP_##n; \
517 P_MREMAP_FLAG(MAYMOVE);
519 P_MREMAP_FLAG(FIXED);
524 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
529 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
531 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
532 struct syscall_arg *arg)
534 int behavior = arg->val;
537 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
540 P_MADV_BHV(SEQUENTIAL);
541 P_MADV_BHV(WILLNEED);
542 P_MADV_BHV(DONTNEED);
544 P_MADV_BHV(DONTFORK);
546 P_MADV_BHV(HWPOISON);
547 #ifdef MADV_SOFT_OFFLINE
548 P_MADV_BHV(SOFT_OFFLINE);
550 P_MADV_BHV(MERGEABLE);
551 P_MADV_BHV(UNMERGEABLE);
553 P_MADV_BHV(HUGEPAGE);
555 #ifdef MADV_NOHUGEPAGE
556 P_MADV_BHV(NOHUGEPAGE);
559 P_MADV_BHV(DONTDUMP);
568 return scnprintf(bf, size, "%#x", behavior);
571 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
573 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
574 struct syscall_arg *arg)
576 int printed = 0, op = arg->val;
579 return scnprintf(bf, size, "NONE");
581 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
582 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
597 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
602 #define SCA_FLOCK syscall_arg__scnprintf_flock
604 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
606 enum syscall_futex_args {
607 SCF_UADDR = (1 << 0),
610 SCF_TIMEOUT = (1 << 3),
611 SCF_UADDR2 = (1 << 4),
615 int cmd = op & FUTEX_CMD_MASK;
619 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
620 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
621 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
622 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
623 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
624 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
625 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
626 P_FUTEX_OP(WAKE_OP); break;
627 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
628 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
629 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
630 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
631 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
632 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
633 default: printed = scnprintf(bf, size, "%#x", cmd); break;
636 if (op & FUTEX_PRIVATE_FLAG)
637 printed += scnprintf(bf + printed, size - printed, "|PRIV");
639 if (op & FUTEX_CLOCK_REALTIME)
640 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
645 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
647 static const char *bpf_cmd[] = {
648 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
649 "MAP_GET_NEXT_KEY", "PROG_LOAD",
651 static DEFINE_STRARRAY(bpf_cmd);
653 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
654 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
656 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
657 static DEFINE_STRARRAY(itimers);
659 static const char *keyctl_options[] = {
660 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
661 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
662 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
663 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
664 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
666 static DEFINE_STRARRAY(keyctl_options);
668 static const char *whences[] = { "SET", "CUR", "END",
676 static DEFINE_STRARRAY(whences);
678 static const char *fcntl_cmds[] = {
679 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
680 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
681 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
684 static DEFINE_STRARRAY(fcntl_cmds);
686 static const char *rlimit_resources[] = {
687 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
688 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
691 static DEFINE_STRARRAY(rlimit_resources);
693 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
694 static DEFINE_STRARRAY(sighow);
696 static const char *clockid[] = {
697 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
698 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
699 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
701 static DEFINE_STRARRAY(clockid);
703 static const char *socket_families[] = {
704 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
705 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
706 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
707 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
708 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
709 "ALG", "NFC", "VSOCK",
711 static DEFINE_STRARRAY(socket_families);
713 #ifndef SOCK_TYPE_MASK
714 #define SOCK_TYPE_MASK 0xf
717 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
718 struct syscall_arg *arg)
722 flags = type & ~SOCK_TYPE_MASK;
724 type &= SOCK_TYPE_MASK;
726 * Can't use a strarray, MIPS may override for ABI reasons.
729 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
734 P_SK_TYPE(SEQPACKET);
739 printed = scnprintf(bf, size, "%#x", type);
742 #define P_SK_FLAG(n) \
743 if (flags & SOCK_##n) { \
744 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
745 flags &= ~SOCK_##n; \
753 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
758 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
761 #define MSG_PROBE 0x10
763 #ifndef MSG_WAITFORONE
764 #define MSG_WAITFORONE 0x10000
766 #ifndef MSG_SENDPAGE_NOTLAST
767 #define MSG_SENDPAGE_NOTLAST 0x20000
770 #define MSG_FASTOPEN 0x20000000
773 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
774 struct syscall_arg *arg)
776 int printed = 0, flags = arg->val;
779 return scnprintf(bf, size, "NONE");
780 #define P_MSG_FLAG(n) \
781 if (flags & MSG_##n) { \
782 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
788 P_MSG_FLAG(DONTROUTE);
793 P_MSG_FLAG(DONTWAIT);
800 P_MSG_FLAG(ERRQUEUE);
801 P_MSG_FLAG(NOSIGNAL);
803 P_MSG_FLAG(WAITFORONE);
804 P_MSG_FLAG(SENDPAGE_NOTLAST);
805 P_MSG_FLAG(FASTOPEN);
806 P_MSG_FLAG(CMSG_CLOEXEC);
810 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
815 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
817 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
818 struct syscall_arg *arg)
823 if (mode == F_OK) /* 0 */
824 return scnprintf(bf, size, "F");
826 if (mode & n##_OK) { \
827 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
837 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
842 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
844 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
845 struct syscall_arg *arg);
847 #define SCA_FILENAME syscall_arg__scnprintf_filename
849 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
850 struct syscall_arg *arg)
852 int printed = 0, flags = arg->val;
854 if (!(flags & O_CREAT))
855 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
858 return scnprintf(bf, size, "RDONLY");
860 if (flags & O_##n) { \
861 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
885 if ((flags & O_SYNC) == O_SYNC)
886 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
898 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
903 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
905 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
906 struct syscall_arg *arg)
908 int printed = 0, flags = arg->val;
914 if (flags & PERF_FLAG_##n) { \
915 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
916 flags &= ~PERF_FLAG_##n; \
926 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
931 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
933 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
934 struct syscall_arg *arg)
936 int printed = 0, flags = arg->val;
939 return scnprintf(bf, size, "NONE");
941 if (flags & EFD_##n) { \
942 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
952 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
957 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
959 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
960 struct syscall_arg *arg)
962 int printed = 0, flags = arg->val;
965 if (flags & O_##n) { \
966 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
975 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
980 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
982 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
987 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
1030 return scnprintf(bf, size, "%#x", sig);
1033 #define SCA_SIGNUM syscall_arg__scnprintf_signum
1035 #if defined(__i386__) || defined(__x86_64__)
1037 * FIXME: Make this available to all arches.
1039 #define TCGETS 0x5401
1041 static const char *tioctls[] = {
1042 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
1043 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
1044 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
1045 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
1046 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
1047 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
1048 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
1049 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
1050 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
1051 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
1052 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
1053 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
1054 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
1055 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
1056 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1059 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1060 #endif /* defined(__i386__) || defined(__x86_64__) */
1062 #ifndef SECCOMP_SET_MODE_STRICT
1063 #define SECCOMP_SET_MODE_STRICT 0
1065 #ifndef SECCOMP_SET_MODE_FILTER
1066 #define SECCOMP_SET_MODE_FILTER 1
1069 static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
1075 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
1076 P_SECCOMP_SET_MODE_OP(STRICT);
1077 P_SECCOMP_SET_MODE_OP(FILTER);
1078 #undef P_SECCOMP_SET_MODE_OP
1079 default: printed = scnprintf(bf, size, "%#x", op); break;
1085 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
1087 #ifndef SECCOMP_FILTER_FLAG_TSYNC
1088 #define SECCOMP_FILTER_FLAG_TSYNC 1
1091 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
1092 struct syscall_arg *arg)
1094 int printed = 0, flags = arg->val;
1097 if (flags & SECCOMP_FILTER_FLAG_##n) { \
1098 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1099 flags &= ~SECCOMP_FILTER_FLAG_##n; \
1106 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1111 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
1113 #ifndef GRND_NONBLOCK
1114 #define GRND_NONBLOCK 0x0001
1117 #define GRND_RANDOM 0x0002
1120 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
1121 struct syscall_arg *arg)
1123 int printed = 0, flags = arg->val;
1126 if (flags & GRND_##n) { \
1127 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1128 flags &= ~GRND_##n; \
1136 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1141 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
1143 #define STRARRAY(arg, name, array) \
1144 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1145 .arg_parm = { [arg] = &strarray__##array, }
1147 #include "trace/beauty/pid.c"
1148 #include "trace/beauty/mode_t.c"
1149 #include "trace/beauty/sched_policy.c"
1150 #include "trace/beauty/waitid_options.c"
1152 static struct syscall_fmt {
1155 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1161 } syscall_fmts[] = {
1162 { .name = "access", .errmsg = true,
1163 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1164 [1] = SCA_ACCMODE, /* mode */ }, },
1165 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1166 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1167 { .name = "brk", .hexret = true,
1168 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1169 { .name = "chdir", .errmsg = true,
1170 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1171 { .name = "chmod", .errmsg = true,
1172 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1173 { .name = "chroot", .errmsg = true,
1174 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1175 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1176 { .name = "clone", .errpid = true, },
1177 { .name = "close", .errmsg = true,
1178 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1179 { .name = "connect", .errmsg = true, },
1180 { .name = "creat", .errmsg = true,
1181 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1182 { .name = "dup", .errmsg = true,
1183 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1184 { .name = "dup2", .errmsg = true,
1185 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1186 { .name = "dup3", .errmsg = true,
1187 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1188 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1189 { .name = "eventfd2", .errmsg = true,
1190 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1191 { .name = "faccessat", .errmsg = true,
1192 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1193 [1] = SCA_FILENAME, /* filename */ }, },
1194 { .name = "fadvise64", .errmsg = true,
1195 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1196 { .name = "fallocate", .errmsg = true,
1197 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198 { .name = "fchdir", .errmsg = true,
1199 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1200 { .name = "fchmod", .errmsg = true,
1201 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1202 { .name = "fchmodat", .errmsg = true,
1203 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1204 [1] = SCA_FILENAME, /* filename */ }, },
1205 { .name = "fchown", .errmsg = true,
1206 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 { .name = "fchownat", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1209 [1] = SCA_FILENAME, /* filename */ }, },
1210 { .name = "fcntl", .errmsg = true,
1211 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1212 [1] = SCA_STRARRAY, /* cmd */ },
1213 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1214 { .name = "fdatasync", .errmsg = true,
1215 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1216 { .name = "flock", .errmsg = true,
1217 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1218 [1] = SCA_FLOCK, /* cmd */ }, },
1219 { .name = "fsetxattr", .errmsg = true,
1220 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1221 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1222 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1223 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1224 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1225 [1] = SCA_FILENAME, /* filename */ }, },
1226 { .name = "fstatfs", .errmsg = true,
1227 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1228 { .name = "fsync", .errmsg = true,
1229 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1230 { .name = "ftruncate", .errmsg = true,
1231 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1232 { .name = "futex", .errmsg = true,
1233 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1234 { .name = "futimesat", .errmsg = true,
1235 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1236 [1] = SCA_FILENAME, /* filename */ }, },
1237 { .name = "getdents", .errmsg = true,
1238 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1239 { .name = "getdents64", .errmsg = true,
1240 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1241 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1242 { .name = "getpid", .errpid = true, },
1243 { .name = "getpgid", .errpid = true, },
1244 { .name = "getppid", .errpid = true, },
1245 { .name = "getrandom", .errmsg = true,
1246 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
1247 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1248 { .name = "getxattr", .errmsg = true,
1249 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1250 { .name = "inotify_add_watch", .errmsg = true,
1251 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1252 { .name = "ioctl", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1254 #if defined(__i386__) || defined(__x86_64__)
1256 * FIXME: Make this available to all arches.
1258 [1] = SCA_STRHEXARRAY, /* cmd */
1259 [2] = SCA_HEX, /* arg */ },
1260 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1262 [2] = SCA_HEX, /* arg */ }, },
1264 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1265 { .name = "kill", .errmsg = true,
1266 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1267 { .name = "lchown", .errmsg = true,
1268 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1269 { .name = "lgetxattr", .errmsg = true,
1270 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1271 { .name = "linkat", .errmsg = true,
1272 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1273 { .name = "listxattr", .errmsg = true,
1274 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1275 { .name = "llistxattr", .errmsg = true,
1276 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1277 { .name = "lremovexattr", .errmsg = true,
1278 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1279 { .name = "lseek", .errmsg = true,
1280 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1281 [2] = SCA_STRARRAY, /* whence */ },
1282 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1283 { .name = "lsetxattr", .errmsg = true,
1284 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1285 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1286 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1287 { .name = "lsxattr", .errmsg = true,
1288 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1289 { .name = "madvise", .errmsg = true,
1290 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1291 [2] = SCA_MADV_BHV, /* behavior */ }, },
1292 { .name = "mkdir", .errmsg = true,
1293 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1294 { .name = "mkdirat", .errmsg = true,
1295 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1296 [1] = SCA_FILENAME, /* pathname */ }, },
1297 { .name = "mknod", .errmsg = true,
1298 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1299 { .name = "mknodat", .errmsg = true,
1300 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1301 [1] = SCA_FILENAME, /* filename */ }, },
1302 { .name = "mlock", .errmsg = true,
1303 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1304 { .name = "mlockall", .errmsg = true,
1305 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1306 { .name = "mmap", .hexret = true,
1307 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1308 [2] = SCA_MMAP_PROT, /* prot */
1309 [3] = SCA_MMAP_FLAGS, /* flags */
1310 [4] = SCA_FD, /* fd */ }, },
1311 { .name = "mprotect", .errmsg = true,
1312 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1313 [2] = SCA_MMAP_PROT, /* prot */ }, },
1314 { .name = "mq_unlink", .errmsg = true,
1315 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1316 { .name = "mremap", .hexret = true,
1317 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1318 [3] = SCA_MREMAP_FLAGS, /* flags */
1319 [4] = SCA_HEX, /* new_addr */ }, },
1320 { .name = "munlock", .errmsg = true,
1321 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1322 { .name = "munmap", .errmsg = true,
1323 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1324 { .name = "name_to_handle_at", .errmsg = true,
1325 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1326 { .name = "newfstatat", .errmsg = true,
1327 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1328 [1] = SCA_FILENAME, /* filename */ }, },
1329 { .name = "open", .errmsg = true,
1330 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1331 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1332 { .name = "open_by_handle_at", .errmsg = true,
1333 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1334 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1335 { .name = "openat", .errmsg = true,
1336 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1337 [1] = SCA_FILENAME, /* filename */
1338 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1339 { .name = "perf_event_open", .errmsg = true,
1340 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1341 [2] = SCA_INT, /* cpu */
1342 [3] = SCA_FD, /* group_fd */
1343 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1344 { .name = "pipe2", .errmsg = true,
1345 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1346 { .name = "poll", .errmsg = true, .timeout = true, },
1347 { .name = "ppoll", .errmsg = true, .timeout = true, },
1348 { .name = "pread", .errmsg = true, .alias = "pread64",
1349 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1350 { .name = "preadv", .errmsg = true, .alias = "pread",
1351 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1352 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1353 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1354 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1355 { .name = "pwritev", .errmsg = true,
1356 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1357 { .name = "read", .errmsg = true,
1358 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1359 { .name = "readlink", .errmsg = true,
1360 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1361 { .name = "readlinkat", .errmsg = true,
1362 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1363 [1] = SCA_FILENAME, /* pathname */ }, },
1364 { .name = "readv", .errmsg = true,
1365 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1366 { .name = "recvfrom", .errmsg = true,
1367 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1368 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1369 { .name = "recvmmsg", .errmsg = true,
1370 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1371 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1372 { .name = "recvmsg", .errmsg = true,
1373 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1374 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1375 { .name = "removexattr", .errmsg = true,
1376 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1377 { .name = "renameat", .errmsg = true,
1378 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1379 { .name = "rmdir", .errmsg = true,
1380 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1381 { .name = "rt_sigaction", .errmsg = true,
1382 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1383 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1384 { .name = "rt_sigqueueinfo", .errmsg = true,
1385 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1386 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1387 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1388 { .name = "sched_setscheduler", .errmsg = true,
1389 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
1390 { .name = "seccomp", .errmsg = true,
1391 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
1392 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
1393 { .name = "select", .errmsg = true, .timeout = true, },
1394 { .name = "sendmmsg", .errmsg = true,
1395 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1396 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1397 { .name = "sendmsg", .errmsg = true,
1398 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1399 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1400 { .name = "sendto", .errmsg = true,
1401 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1402 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1403 { .name = "set_tid_address", .errpid = true, },
1404 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1405 { .name = "setpgid", .errmsg = true, },
1406 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1407 { .name = "setxattr", .errmsg = true,
1408 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1409 { .name = "shutdown", .errmsg = true,
1410 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1411 { .name = "socket", .errmsg = true,
1412 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1413 [1] = SCA_SK_TYPE, /* type */ },
1414 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1415 { .name = "socketpair", .errmsg = true,
1416 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1417 [1] = SCA_SK_TYPE, /* type */ },
1418 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1419 { .name = "stat", .errmsg = true, .alias = "newstat",
1420 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1421 { .name = "statfs", .errmsg = true,
1422 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1423 { .name = "swapoff", .errmsg = true,
1424 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1425 { .name = "swapon", .errmsg = true,
1426 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1427 { .name = "symlinkat", .errmsg = true,
1428 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1429 { .name = "tgkill", .errmsg = true,
1430 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1431 { .name = "tkill", .errmsg = true,
1432 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1433 { .name = "truncate", .errmsg = true,
1434 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1435 { .name = "uname", .errmsg = true, .alias = "newuname", },
1436 { .name = "unlinkat", .errmsg = true,
1437 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1438 [1] = SCA_FILENAME, /* pathname */ }, },
1439 { .name = "utime", .errmsg = true,
1440 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1441 { .name = "utimensat", .errmsg = true,
1442 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1443 [1] = SCA_FILENAME, /* filename */ }, },
1444 { .name = "utimes", .errmsg = true,
1445 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1446 { .name = "vmsplice", .errmsg = true,
1447 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1448 { .name = "wait4", .errpid = true,
1449 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
1450 { .name = "waitid", .errpid = true,
1451 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
1452 { .name = "write", .errmsg = true,
1453 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1454 { .name = "writev", .errmsg = true,
1455 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1458 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1460 const struct syscall_fmt *fmt = fmtp;
1461 return strcmp(name, fmt->name);
1464 static struct syscall_fmt *syscall_fmt__find(const char *name)
1466 const int nmemb = ARRAY_SIZE(syscall_fmts);
1467 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1471 struct event_format *tp_format;
1473 struct format_field *args;
1476 struct syscall_fmt *fmt;
1477 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1481 static size_t fprintf_duration(unsigned long t, FILE *fp)
1483 double duration = (double)t / NSEC_PER_MSEC;
1484 size_t printed = fprintf(fp, "(");
1486 if (duration >= 1.0)
1487 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1488 else if (duration >= 0.01)
1489 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1491 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1492 return printed + fprintf(fp, "): ");
1496 * filename.ptr: The filename char pointer that will be vfs_getname'd
1497 * filename.entry_str_pos: Where to insert the string translated from
1498 * filename.ptr by the vfs_getname tracepoint/kprobe.
1500 struct thread_trace {
1504 unsigned long nr_events;
1505 unsigned long pfmaj, pfmin;
1510 short int entry_str_pos;
1512 unsigned int namelen;
1520 struct intlist *syscall_stats;
1523 static struct thread_trace *thread_trace__new(void)
1525 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1528 ttrace->paths.max = -1;
1530 ttrace->syscall_stats = intlist__new(NULL);
1535 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1537 struct thread_trace *ttrace;
1542 if (thread__priv(thread) == NULL)
1543 thread__set_priv(thread, thread_trace__new());
1545 if (thread__priv(thread) == NULL)
1548 ttrace = thread__priv(thread);
1549 ++ttrace->nr_events;
1553 color_fprintf(fp, PERF_COLOR_RED,
1554 "WARNING: not enough memory, dropping samples!\n");
1558 #define TRACE_PFMAJ (1 << 0)
1559 #define TRACE_PFMIN (1 << 1)
1561 static const size_t trace__entry_str_size = 2048;
1563 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1565 struct thread_trace *ttrace = thread__priv(thread);
1567 if (fd > ttrace->paths.max) {
1568 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1573 if (ttrace->paths.max != -1) {
1574 memset(npath + ttrace->paths.max + 1, 0,
1575 (fd - ttrace->paths.max) * sizeof(char *));
1577 memset(npath, 0, (fd + 1) * sizeof(char *));
1580 ttrace->paths.table = npath;
1581 ttrace->paths.max = fd;
1584 ttrace->paths.table[fd] = strdup(pathname);
1586 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1589 static int thread__read_fd_path(struct thread *thread, int fd)
1591 char linkname[PATH_MAX], pathname[PATH_MAX];
1595 if (thread->pid_ == thread->tid) {
1596 scnprintf(linkname, sizeof(linkname),
1597 "/proc/%d/fd/%d", thread->pid_, fd);
1599 scnprintf(linkname, sizeof(linkname),
1600 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1603 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1606 ret = readlink(linkname, pathname, sizeof(pathname));
1608 if (ret < 0 || ret > st.st_size)
1611 pathname[ret] = '\0';
1612 return trace__set_fd_pathname(thread, fd, pathname);
1615 static const char *thread__fd_path(struct thread *thread, int fd,
1616 struct trace *trace)
1618 struct thread_trace *ttrace = thread__priv(thread);
1626 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1629 ++trace->stats.proc_getname;
1630 if (thread__read_fd_path(thread, fd))
1634 return ttrace->paths.table[fd];
1637 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1638 struct syscall_arg *arg)
1641 size_t printed = scnprintf(bf, size, "%d", fd);
1642 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1645 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1650 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1651 struct syscall_arg *arg)
1654 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1655 struct thread_trace *ttrace = thread__priv(arg->thread);
1657 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1658 zfree(&ttrace->paths.table[fd]);
1663 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1666 struct thread_trace *ttrace = thread__priv(thread);
1668 ttrace->filename.ptr = ptr;
1669 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1672 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1673 struct syscall_arg *arg)
1675 unsigned long ptr = arg->val;
1677 if (!arg->trace->vfs_getname)
1678 return scnprintf(bf, size, "%#x", ptr);
1680 thread__set_filename_pos(arg->thread, bf, ptr);
1684 static bool trace__filter_duration(struct trace *trace, double t)
1686 return t < (trace->duration_filter * NSEC_PER_MSEC);
1689 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1691 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1693 return fprintf(fp, "%10.3f ", ts);
1696 static bool done = false;
1697 static bool interrupted = false;
1699 static void sig_handler(int sig)
1702 interrupted = sig == SIGINT;
1705 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1706 u64 duration, u64 tstamp, FILE *fp)
1708 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1709 printed += fprintf_duration(duration, fp);
1711 if (trace->multiple_threads) {
1712 if (trace->show_comm)
1713 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1714 printed += fprintf(fp, "%d ", thread->tid);
1720 static int trace__process_event(struct trace *trace, struct machine *machine,
1721 union perf_event *event, struct perf_sample *sample)
1725 switch (event->header.type) {
1726 case PERF_RECORD_LOST:
1727 color_fprintf(trace->output, PERF_COLOR_RED,
1728 "LOST %" PRIu64 " events!\n", event->lost.lost);
1729 ret = machine__process_lost_event(machine, event, sample);
1732 ret = machine__process_event(machine, event, sample);
1739 static int trace__tool_process(struct perf_tool *tool,
1740 union perf_event *event,
1741 struct perf_sample *sample,
1742 struct machine *machine)
1744 struct trace *trace = container_of(tool, struct trace, tool);
1745 return trace__process_event(trace, machine, event, sample);
1748 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1750 int err = symbol__init(NULL);
1755 trace->host = machine__new_host();
1756 if (trace->host == NULL)
1759 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1762 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1763 evlist->threads, trace__tool_process, false,
1764 trace->opts.proc_map_timeout);
1771 static int syscall__set_arg_fmts(struct syscall *sc)
1773 struct format_field *field;
1776 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1777 if (sc->arg_scnprintf == NULL)
1781 sc->arg_parm = sc->fmt->arg_parm;
1783 for (field = sc->args; field; field = field->next) {
1784 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1785 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1786 else if (field->flags & FIELD_IS_POINTER)
1787 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1788 else if (strcmp(field->type, "pid_t") == 0)
1789 sc->arg_scnprintf[idx] = SCA_PID;
1790 else if (strcmp(field->type, "umode_t") == 0)
1791 sc->arg_scnprintf[idx] = SCA_MODE_T;
1798 static int trace__read_syscall_info(struct trace *trace, int id)
1802 const char *name = syscalltbl__name(trace->sctbl, id);
1807 if (id > trace->syscalls.max) {
1808 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1810 if (nsyscalls == NULL)
1813 if (trace->syscalls.max != -1) {
1814 memset(nsyscalls + trace->syscalls.max + 1, 0,
1815 (id - trace->syscalls.max) * sizeof(*sc));
1817 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1820 trace->syscalls.table = nsyscalls;
1821 trace->syscalls.max = id;
1824 sc = trace->syscalls.table + id;
1827 sc->fmt = syscall_fmt__find(sc->name);
1829 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1830 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1832 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1833 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1834 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1837 if (IS_ERR(sc->tp_format))
1840 sc->args = sc->tp_format->format.fields;
1841 sc->nr_args = sc->tp_format->format.nr_fields;
1843 * We need to check and discard the first variable '__syscall_nr'
1844 * or 'nr' that mean the syscall number. It is needless here.
1845 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1847 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1848 sc->args = sc->args->next;
1852 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1854 return syscall__set_arg_fmts(sc);
1857 static int trace__validate_ev_qualifier(struct trace *trace)
1860 struct str_node *pos;
1862 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1863 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1864 sizeof(trace->ev_qualifier_ids.entries[0]));
1866 if (trace->ev_qualifier_ids.entries == NULL) {
1867 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1875 strlist__for_each(pos, trace->ev_qualifier) {
1876 const char *sc = pos->s;
1877 int id = syscalltbl__id(trace->sctbl, sc);
1881 fputs("Error:\tInvalid syscall ", trace->output);
1884 fputs(", ", trace->output);
1887 fputs(sc, trace->output);
1890 trace->ev_qualifier_ids.entries[i++] = id;
1894 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1895 "\nHint:\tand: 'man syscalls'\n", trace->output);
1896 zfree(&trace->ev_qualifier_ids.entries);
1897 trace->ev_qualifier_ids.nr = 0;
1904 * args is to be interpreted as a series of longs but we need to handle
1905 * 8-byte unaligned accesses. args points to raw_data within the event
1906 * and raw_data is guaranteed to be 8-byte unaligned because it is
1907 * preceded by raw_size which is a u32. So we need to copy args to a temp
1908 * variable to read it. Most notably this avoids extended load instructions
1909 * on unaligned addresses
1912 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1913 unsigned char *args, struct trace *trace,
1914 struct thread *thread)
1920 if (sc->args != NULL) {
1921 struct format_field *field;
1923 struct syscall_arg arg = {
1930 for (field = sc->args; field;
1931 field = field->next, ++arg.idx, bit <<= 1) {
1935 /* special care for unaligned accesses */
1936 p = args + sizeof(unsigned long) * arg.idx;
1937 memcpy(&val, p, sizeof(val));
1940 * Suppress this argument if its value is zero and
1941 * and we don't have a string associated in an
1945 !(sc->arg_scnprintf &&
1946 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1947 sc->arg_parm[arg.idx]))
1950 printed += scnprintf(bf + printed, size - printed,
1951 "%s%s: ", printed ? ", " : "", field->name);
1952 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1955 arg.parm = sc->arg_parm[arg.idx];
1956 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1957 size - printed, &arg);
1959 printed += scnprintf(bf + printed, size - printed,
1967 /* special care for unaligned accesses */
1968 p = args + sizeof(unsigned long) * i;
1969 memcpy(&val, p, sizeof(val));
1970 printed += scnprintf(bf + printed, size - printed,
1972 printed ? ", " : "", i, val);
1980 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1981 union perf_event *event,
1982 struct perf_sample *sample);
1984 static struct syscall *trace__syscall_info(struct trace *trace,
1985 struct perf_evsel *evsel, int id)
1991 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1992 * before that, leaving at a higher verbosity level till that is
1993 * explained. Reproduced with plain ftrace with:
1995 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1996 * grep "NR -1 " /t/trace_pipe
1998 * After generating some load on the machine.
2002 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
2003 id, perf_evsel__name(evsel), ++n);
2008 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
2009 trace__read_syscall_info(trace, id))
2012 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
2015 return &trace->syscalls.table[id];
2019 fprintf(trace->output, "Problems reading syscall %d", id);
2020 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
2021 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2022 fputs(" information\n", trace->output);
2027 static void thread__update_stats(struct thread_trace *ttrace,
2028 int id, struct perf_sample *sample)
2030 struct int_node *inode;
2031 struct stats *stats;
2034 inode = intlist__findnew(ttrace->syscall_stats, id);
2038 stats = inode->priv;
2039 if (stats == NULL) {
2040 stats = malloc(sizeof(struct stats));
2044 inode->priv = stats;
2047 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2048 duration = sample->time - ttrace->entry_time;
2050 update_stats(stats, duration);
2053 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
2055 struct thread_trace *ttrace;
2059 if (trace->current == NULL)
2062 ttrace = thread__priv(trace->current);
2064 if (!ttrace->entry_pending)
2067 duration = sample->time - ttrace->entry_time;
2069 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
2070 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
2071 ttrace->entry_pending = false;
2076 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
2077 union perf_event *event __maybe_unused,
2078 struct perf_sample *sample)
2083 struct thread *thread;
2084 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2085 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2086 struct thread_trace *ttrace;
2091 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2092 ttrace = thread__trace(thread, trace->output);
2096 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2098 if (ttrace->entry_str == NULL) {
2099 ttrace->entry_str = malloc(trace__entry_str_size);
2100 if (!ttrace->entry_str)
2104 if (!trace->summary_only)
2105 trace__printf_interrupted_entry(trace, sample);
2107 ttrace->entry_time = sample->time;
2108 msg = ttrace->entry_str;
2109 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2111 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2112 args, trace, thread);
2115 if (!trace->duration_filter && !trace->summary_only) {
2116 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2117 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2120 ttrace->entry_pending = true;
2121 /* See trace__vfs_getname & trace__sys_exit */
2122 ttrace->filename.pending_open = false;
2125 if (trace->current != thread) {
2126 thread__put(trace->current);
2127 trace->current = thread__get(thread);
2131 thread__put(thread);
2135 static int trace__fprintf_callchain(struct trace *trace, struct perf_evsel *evsel,
2136 struct perf_sample *sample)
2138 struct addr_location al;
2139 /* TODO: user-configurable print_opts */
2140 const unsigned int print_opts = EVSEL__PRINT_SYM |
2142 EVSEL__PRINT_UNKNOWN_AS_ADDR;
2144 if (sample->callchain == NULL)
2147 if (machine__resolve(trace->host, &al, sample) < 0) {
2148 pr_err("Problem processing %s callchain, skipping...\n",
2149 perf_evsel__name(evsel));
2153 return perf_evsel__fprintf_callchain(evsel, sample, &al, 38, print_opts,
2154 scripting_max_stack, trace->output);
2157 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2158 union perf_event *event __maybe_unused,
2159 struct perf_sample *sample)
2163 struct thread *thread;
2164 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2165 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2166 struct thread_trace *ttrace;
2171 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2172 ttrace = thread__trace(thread, trace->output);
2177 thread__update_stats(ttrace, id, sample);
2179 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2181 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
2182 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2183 ttrace->filename.pending_open = false;
2184 ++trace->stats.vfs_getname;
2187 ttrace->exit_time = sample->time;
2189 if (ttrace->entry_time) {
2190 duration = sample->time - ttrace->entry_time;
2191 if (trace__filter_duration(trace, duration))
2193 } else if (trace->duration_filter)
2196 if (trace->summary_only)
2199 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2201 if (ttrace->entry_pending) {
2202 fprintf(trace->output, "%-70s", ttrace->entry_str);
2204 fprintf(trace->output, " ... [");
2205 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2206 fprintf(trace->output, "]: %s()", sc->name);
2209 if (sc->fmt == NULL) {
2211 fprintf(trace->output, ") = %ld", ret);
2212 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
2213 char bf[STRERR_BUFSIZE];
2214 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2215 *e = audit_errno_to_name(-ret);
2217 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2218 } else if (ret == 0 && sc->fmt->timeout)
2219 fprintf(trace->output, ") = 0 Timeout");
2220 else if (sc->fmt->hexret)
2221 fprintf(trace->output, ") = %#lx", ret);
2222 else if (sc->fmt->errpid) {
2223 struct thread *child = machine__find_thread(trace->host, ret, ret);
2225 if (child != NULL) {
2226 fprintf(trace->output, ") = %ld", ret);
2227 if (child->comm_set)
2228 fprintf(trace->output, " (%s)", thread__comm_str(child));
2234 fputc('\n', trace->output);
2236 trace__fprintf_callchain(trace, evsel, sample);
2238 ttrace->entry_pending = false;
2241 thread__put(thread);
2245 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2246 union perf_event *event __maybe_unused,
2247 struct perf_sample *sample)
2249 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2250 struct thread_trace *ttrace;
2251 size_t filename_len, entry_str_len, to_move;
2252 ssize_t remaining_space;
2254 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2259 ttrace = thread__priv(thread);
2263 filename_len = strlen(filename);
2265 if (ttrace->filename.namelen < filename_len) {
2266 char *f = realloc(ttrace->filename.name, filename_len + 1);
2271 ttrace->filename.namelen = filename_len;
2272 ttrace->filename.name = f;
2275 strcpy(ttrace->filename.name, filename);
2276 ttrace->filename.pending_open = true;
2278 if (!ttrace->filename.ptr)
2281 entry_str_len = strlen(ttrace->entry_str);
2282 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2283 if (remaining_space <= 0)
2286 if (filename_len > (size_t)remaining_space) {
2287 filename += filename_len - remaining_space;
2288 filename_len = remaining_space;
2291 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2292 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2293 memmove(pos + filename_len, pos, to_move);
2294 memcpy(pos, filename, filename_len);
2296 ttrace->filename.ptr = 0;
2297 ttrace->filename.entry_str_pos = 0;
2302 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2303 union perf_event *event __maybe_unused,
2304 struct perf_sample *sample)
2306 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2307 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2308 struct thread *thread = machine__findnew_thread(trace->host,
2311 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2316 ttrace->runtime_ms += runtime_ms;
2317 trace->runtime_ms += runtime_ms;
2318 thread__put(thread);
2322 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2324 perf_evsel__strval(evsel, sample, "comm"),
2325 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2327 perf_evsel__intval(evsel, sample, "vruntime"));
2328 thread__put(thread);
2332 static void bpf_output__printer(enum binary_printer_ops op,
2333 unsigned int val, void *extra)
2335 FILE *output = extra;
2336 unsigned char ch = (unsigned char)val;
2339 case BINARY_PRINT_CHAR_DATA:
2340 fprintf(output, "%c", isprint(ch) ? ch : '.');
2342 case BINARY_PRINT_DATA_BEGIN:
2343 case BINARY_PRINT_LINE_BEGIN:
2344 case BINARY_PRINT_ADDR:
2345 case BINARY_PRINT_NUM_DATA:
2346 case BINARY_PRINT_NUM_PAD:
2347 case BINARY_PRINT_SEP:
2348 case BINARY_PRINT_CHAR_PAD:
2349 case BINARY_PRINT_LINE_END:
2350 case BINARY_PRINT_DATA_END:
2356 static void bpf_output__fprintf(struct trace *trace,
2357 struct perf_sample *sample)
2359 print_binary(sample->raw_data, sample->raw_size, 8,
2360 bpf_output__printer, trace->output);
2363 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2364 union perf_event *event __maybe_unused,
2365 struct perf_sample *sample)
2367 trace__printf_interrupted_entry(trace, sample);
2368 trace__fprintf_tstamp(trace, sample->time, trace->output);
2370 if (trace->trace_syscalls)
2371 fprintf(trace->output, "( ): ");
2373 fprintf(trace->output, "%s:", evsel->name);
2375 if (perf_evsel__is_bpf_output(evsel)) {
2376 bpf_output__fprintf(trace, sample);
2377 } else if (evsel->tp_format) {
2378 event_format__fprintf(evsel->tp_format, sample->cpu,
2379 sample->raw_data, sample->raw_size,
2383 fprintf(trace->output, ")\n");
2385 trace__fprintf_callchain(trace, evsel, sample);
2390 static void print_location(FILE *f, struct perf_sample *sample,
2391 struct addr_location *al,
2392 bool print_dso, bool print_sym)
2395 if ((verbose || print_dso) && al->map)
2396 fprintf(f, "%s@", al->map->dso->long_name);
2398 if ((verbose || print_sym) && al->sym)
2399 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2400 al->addr - al->sym->start);
2402 fprintf(f, "0x%" PRIx64, al->addr);
2404 fprintf(f, "0x%" PRIx64, sample->addr);
2407 static int trace__pgfault(struct trace *trace,
2408 struct perf_evsel *evsel,
2409 union perf_event *event __maybe_unused,
2410 struct perf_sample *sample)
2412 struct thread *thread;
2413 struct addr_location al;
2414 char map_type = 'd';
2415 struct thread_trace *ttrace;
2418 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2419 ttrace = thread__trace(thread, trace->output);
2423 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2428 if (trace->summary_only)
2431 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2434 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2436 fprintf(trace->output, "%sfault [",
2437 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2440 print_location(trace->output, sample, &al, false, true);
2442 fprintf(trace->output, "] => ");
2444 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2448 thread__find_addr_location(thread, sample->cpumode,
2449 MAP__FUNCTION, sample->addr, &al);
2457 print_location(trace->output, sample, &al, true, false);
2459 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2463 thread__put(thread);
2467 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2469 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2470 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2473 if (trace->pid_list || trace->tid_list)
2479 static void trace__set_base_time(struct trace *trace,
2480 struct perf_evsel *evsel,
2481 struct perf_sample *sample)
2484 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2485 * and don't use sample->time unconditionally, we may end up having
2486 * some other event in the future without PERF_SAMPLE_TIME for good
2487 * reason, i.e. we may not be interested in its timestamps, just in
2488 * it taking place, picking some piece of information when it
2489 * appears in our event stream (vfs_getname comes to mind).
2491 if (trace->base_time == 0 && !trace->full_time &&
2492 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2493 trace->base_time = sample->time;
2496 static int trace__process_sample(struct perf_tool *tool,
2497 union perf_event *event,
2498 struct perf_sample *sample,
2499 struct perf_evsel *evsel,
2500 struct machine *machine __maybe_unused)
2502 struct trace *trace = container_of(tool, struct trace, tool);
2505 tracepoint_handler handler = evsel->handler;
2507 if (skip_sample(trace, sample))
2510 trace__set_base_time(trace, evsel, sample);
2514 handler(trace, evsel, event, sample);
2520 static int parse_target_str(struct trace *trace)
2522 if (trace->opts.target.pid) {
2523 trace->pid_list = intlist__new(trace->opts.target.pid);
2524 if (trace->pid_list == NULL) {
2525 pr_err("Error parsing process id string\n");
2530 if (trace->opts.target.tid) {
2531 trace->tid_list = intlist__new(trace->opts.target.tid);
2532 if (trace->tid_list == NULL) {
2533 pr_err("Error parsing thread id string\n");
2541 static int trace__record(struct trace *trace, int argc, const char **argv)
2543 unsigned int rec_argc, i, j;
2544 const char **rec_argv;
2545 const char * const record_args[] = {
2552 const char * const sc_args[] = { "-e", };
2553 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2554 const char * const majpf_args[] = { "-e", "major-faults" };
2555 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2556 const char * const minpf_args[] = { "-e", "minor-faults" };
2557 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2559 /* +1 is for the event string below */
2560 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2561 majpf_args_nr + minpf_args_nr + argc;
2562 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2564 if (rec_argv == NULL)
2568 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2569 rec_argv[j++] = record_args[i];
2571 if (trace->trace_syscalls) {
2572 for (i = 0; i < sc_args_nr; i++)
2573 rec_argv[j++] = sc_args[i];
2575 /* event string may be different for older kernels - e.g., RHEL6 */
2576 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2577 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2578 else if (is_valid_tracepoint("syscalls:sys_enter"))
2579 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2581 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2586 if (trace->trace_pgfaults & TRACE_PFMAJ)
2587 for (i = 0; i < majpf_args_nr; i++)
2588 rec_argv[j++] = majpf_args[i];
2590 if (trace->trace_pgfaults & TRACE_PFMIN)
2591 for (i = 0; i < minpf_args_nr; i++)
2592 rec_argv[j++] = minpf_args[i];
2594 for (i = 0; i < (unsigned int)argc; i++)
2595 rec_argv[j++] = argv[i];
2597 return cmd_record(j, rec_argv, NULL);
2600 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2602 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2604 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2609 if (perf_evsel__field(evsel, "pathname") == NULL) {
2610 perf_evsel__delete(evsel);
2614 evsel->handler = trace__vfs_getname;
2615 perf_evlist__add(evlist, evsel);
2619 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2622 struct perf_evsel *evsel;
2623 struct perf_event_attr attr = {
2624 .type = PERF_TYPE_SOFTWARE,
2628 attr.config = config;
2629 attr.sample_period = 1;
2631 event_attr_init(&attr);
2633 evsel = perf_evsel__new(&attr);
2637 evsel->handler = trace__pgfault;
2638 perf_evlist__add(evlist, evsel);
2643 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2645 const u32 type = event->header.type;
2646 struct perf_evsel *evsel;
2648 if (type != PERF_RECORD_SAMPLE) {
2649 trace__process_event(trace, trace->host, event, sample);
2653 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2654 if (evsel == NULL) {
2655 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2659 trace__set_base_time(trace, evsel, sample);
2661 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2662 sample->raw_data == NULL) {
2663 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2664 perf_evsel__name(evsel), sample->tid,
2665 sample->cpu, sample->raw_size);
2667 tracepoint_handler handler = evsel->handler;
2668 handler(trace, evsel, event, sample);
2672 static int trace__add_syscall_newtp(struct trace *trace)
2675 struct perf_evlist *evlist = trace->evlist;
2676 struct perf_evsel *sys_enter, *sys_exit;
2678 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2679 if (sys_enter == NULL)
2682 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2683 goto out_delete_sys_enter;
2685 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2686 if (sys_exit == NULL)
2687 goto out_delete_sys_enter;
2689 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2690 goto out_delete_sys_exit;
2692 perf_evlist__add(evlist, sys_enter);
2693 perf_evlist__add(evlist, sys_exit);
2695 if (trace->opts.callgraph_set && !trace->kernel_syscallchains) {
2697 * We're interested only in the user space callchain
2698 * leading to the syscall, allow overriding that for
2699 * debugging reasons using --kernel_syscall_callchains
2701 sys_exit->attr.exclude_callchain_kernel = 1;
2704 trace->syscalls.events.sys_enter = sys_enter;
2705 trace->syscalls.events.sys_exit = sys_exit;
2711 out_delete_sys_exit:
2712 perf_evsel__delete_priv(sys_exit);
2713 out_delete_sys_enter:
2714 perf_evsel__delete_priv(sys_enter);
2718 static int trace__set_ev_qualifier_filter(struct trace *trace)
2721 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2722 trace->ev_qualifier_ids.nr,
2723 trace->ev_qualifier_ids.entries);
2728 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2729 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2739 static int trace__run(struct trace *trace, int argc, const char **argv)
2741 struct perf_evlist *evlist = trace->evlist;
2742 struct perf_evsel *evsel;
2744 unsigned long before;
2745 const bool forks = argc > 0;
2746 bool draining = false;
2750 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2751 goto out_error_raw_syscalls;
2753 if (trace->trace_syscalls)
2754 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2756 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2757 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2761 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2762 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2766 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2767 trace__sched_stat_runtime))
2768 goto out_error_sched_stat_runtime;
2770 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2772 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2773 goto out_delete_evlist;
2776 err = trace__symbols_init(trace, evlist);
2778 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2779 goto out_delete_evlist;
2782 perf_evlist__config(evlist, &trace->opts, NULL);
2784 if (trace->opts.callgraph_set && trace->syscalls.events.sys_exit) {
2785 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2786 &trace->opts, &callchain_param);
2788 * Now we have evsels with different sample_ids, use
2789 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2790 * from a fixed position in each ring buffer record.
2792 * As of this the changeset introducing this comment, this
2793 * isn't strictly needed, as the fields that can come before
2794 * PERF_SAMPLE_ID are all used, but we'll probably disable
2795 * some of those for things like copying the payload of
2796 * pointer syscall arguments, and for vfs_getname we don't
2797 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2798 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2800 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2801 perf_evlist__reset_sample_bit(evlist, ID);
2804 signal(SIGCHLD, sig_handler);
2805 signal(SIGINT, sig_handler);
2808 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2811 fprintf(trace->output, "Couldn't run the workload!\n");
2812 goto out_delete_evlist;
2816 err = perf_evlist__open(evlist);
2818 goto out_error_open;
2820 err = bpf__apply_obj_config();
2822 char errbuf[BUFSIZ];
2824 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2825 pr_err("ERROR: Apply config to BPF failed: %s\n",
2827 goto out_error_open;
2831 * Better not use !target__has_task() here because we need to cover the
2832 * case where no threads were specified in the command line, but a
2833 * workload was, and in that case we will fill in the thread_map when
2834 * we fork the workload in perf_evlist__prepare_workload.
2836 if (trace->filter_pids.nr > 0)
2837 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2838 else if (thread_map__pid(evlist->threads, 0) == -1)
2839 err = perf_evlist__set_filter_pid(evlist, getpid());
2844 if (trace->ev_qualifier_ids.nr > 0) {
2845 err = trace__set_ev_qualifier_filter(trace);
2849 pr_debug("event qualifier tracepoint filter: %s\n",
2850 trace->syscalls.events.sys_exit->filter);
2853 err = perf_evlist__apply_filters(evlist, &evsel);
2855 goto out_error_apply_filters;
2857 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2859 goto out_error_mmap;
2861 if (!target__none(&trace->opts.target))
2862 perf_evlist__enable(evlist);
2865 perf_evlist__start_workload(evlist);
2867 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2868 evlist->threads->nr > 1 ||
2869 perf_evlist__first(evlist)->attr.inherit;
2871 before = trace->nr_events;
2873 for (i = 0; i < evlist->nr_mmaps; i++) {
2874 union perf_event *event;
2876 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2877 struct perf_sample sample;
2881 err = perf_evlist__parse_sample(evlist, event, &sample);
2883 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2887 trace__handle_event(trace, event, &sample);
2889 perf_evlist__mmap_consume(evlist, i);
2894 if (done && !draining) {
2895 perf_evlist__disable(evlist);
2901 if (trace->nr_events == before) {
2902 int timeout = done ? 100 : -1;
2904 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2905 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2915 thread__zput(trace->current);
2917 perf_evlist__disable(evlist);
2921 trace__fprintf_thread_summary(trace, trace->output);
2923 if (trace->show_tool_stats) {
2924 fprintf(trace->output, "Stats:\n "
2925 " vfs_getname : %" PRIu64 "\n"
2926 " proc_getname: %" PRIu64 "\n",
2927 trace->stats.vfs_getname,
2928 trace->stats.proc_getname);
2933 perf_evlist__delete(evlist);
2934 trace->evlist = NULL;
2935 trace->live = false;
2938 char errbuf[BUFSIZ];
2940 out_error_sched_stat_runtime:
2941 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2944 out_error_raw_syscalls:
2945 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2949 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2953 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2956 fprintf(trace->output, "%s\n", errbuf);
2957 goto out_delete_evlist;
2959 out_error_apply_filters:
2960 fprintf(trace->output,
2961 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2962 evsel->filter, perf_evsel__name(evsel), errno,
2963 strerror_r(errno, errbuf, sizeof(errbuf)));
2964 goto out_delete_evlist;
2967 fprintf(trace->output, "Not enough memory to run!\n");
2968 goto out_delete_evlist;
2971 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2972 goto out_delete_evlist;
2975 static int trace__replay(struct trace *trace)
2977 const struct perf_evsel_str_handler handlers[] = {
2978 { "probe:vfs_getname", trace__vfs_getname, },
2980 struct perf_data_file file = {
2982 .mode = PERF_DATA_MODE_READ,
2983 .force = trace->force,
2985 struct perf_session *session;
2986 struct perf_evsel *evsel;
2989 trace->tool.sample = trace__process_sample;
2990 trace->tool.mmap = perf_event__process_mmap;
2991 trace->tool.mmap2 = perf_event__process_mmap2;
2992 trace->tool.comm = perf_event__process_comm;
2993 trace->tool.exit = perf_event__process_exit;
2994 trace->tool.fork = perf_event__process_fork;
2995 trace->tool.attr = perf_event__process_attr;
2996 trace->tool.tracing_data = perf_event__process_tracing_data;
2997 trace->tool.build_id = perf_event__process_build_id;
2999 trace->tool.ordered_events = true;
3000 trace->tool.ordering_requires_timestamps = true;
3002 /* add tid to output */
3003 trace->multiple_threads = true;
3005 session = perf_session__new(&file, false, &trace->tool);
3006 if (session == NULL)
3009 if (symbol__init(&session->header.env) < 0)
3012 trace->host = &session->machines.host;
3014 err = perf_session__set_tracepoints_handlers(session, handlers);
3018 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3019 "raw_syscalls:sys_enter");
3020 /* older kernels have syscalls tp versus raw_syscalls */
3022 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3023 "syscalls:sys_enter");
3026 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
3027 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
3028 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
3032 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3033 "raw_syscalls:sys_exit");
3035 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
3036 "syscalls:sys_exit");
3038 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
3039 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
3040 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
3044 evlist__for_each(session->evlist, evsel) {
3045 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
3046 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
3047 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
3048 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
3049 evsel->handler = trace__pgfault;
3052 err = parse_target_str(trace);
3058 err = perf_session__process_events(session);
3060 pr_err("Failed to process events, error %d", err);
3062 else if (trace->summary)
3063 trace__fprintf_thread_summary(trace, trace->output);
3066 perf_session__delete(session);
3071 static size_t trace__fprintf_threads_header(FILE *fp)
3075 printed = fprintf(fp, "\n Summary of events:\n\n");
3080 static size_t thread__dump_stats(struct thread_trace *ttrace,
3081 struct trace *trace, FILE *fp)
3083 struct stats *stats;
3086 struct int_node *inode = intlist__first(ttrace->syscall_stats);
3091 printed += fprintf(fp, "\n");
3093 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
3094 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
3095 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
3097 /* each int_node is a syscall */
3099 stats = inode->priv;
3101 double min = (double)(stats->min) / NSEC_PER_MSEC;
3102 double max = (double)(stats->max) / NSEC_PER_MSEC;
3103 double avg = avg_stats(stats);
3105 u64 n = (u64) stats->n;
3107 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
3108 avg /= NSEC_PER_MSEC;
3110 sc = &trace->syscalls.table[inode->i];
3111 printed += fprintf(fp, " %-15s", sc->name);
3112 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
3113 n, avg * n, min, avg);
3114 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
3117 inode = intlist__next(inode);
3120 printed += fprintf(fp, "\n\n");
3125 /* struct used to pass data to per-thread function */
3126 struct summary_data {
3128 struct trace *trace;
3132 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
3134 struct summary_data *data = priv;
3135 FILE *fp = data->fp;
3136 size_t printed = data->printed;
3137 struct trace *trace = data->trace;
3138 struct thread_trace *ttrace = thread__priv(thread);
3144 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
3146 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
3147 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
3148 printed += fprintf(fp, "%.1f%%", ratio);
3150 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
3152 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
3153 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
3154 printed += thread__dump_stats(ttrace, trace, fp);
3156 data->printed += printed;
3161 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
3163 struct summary_data data = {
3167 data.printed = trace__fprintf_threads_header(fp);
3169 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
3171 return data.printed;
3174 static int trace__set_duration(const struct option *opt, const char *str,
3175 int unset __maybe_unused)
3177 struct trace *trace = opt->value;
3179 trace->duration_filter = atof(str);
3183 static int trace__set_filter_pids(const struct option *opt, const char *str,
3184 int unset __maybe_unused)
3188 struct trace *trace = opt->value;
3190 * FIXME: introduce a intarray class, plain parse csv and create a
3191 * { int nr, int entries[] } struct...
3193 struct intlist *list = intlist__new(str);
3198 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3199 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3201 if (trace->filter_pids.entries == NULL)
3204 trace->filter_pids.entries[0] = getpid();
3206 for (i = 1; i < trace->filter_pids.nr; ++i)
3207 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3209 intlist__delete(list);
3215 static int trace__open_output(struct trace *trace, const char *filename)
3219 if (!stat(filename, &st) && st.st_size) {
3220 char oldname[PATH_MAX];
3222 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3224 rename(filename, oldname);
3227 trace->output = fopen(filename, "w");
3229 return trace->output == NULL ? -errno : 0;
3232 static int parse_pagefaults(const struct option *opt, const char *str,
3233 int unset __maybe_unused)
3235 int *trace_pgfaults = opt->value;
3237 if (strcmp(str, "all") == 0)
3238 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3239 else if (strcmp(str, "maj") == 0)
3240 *trace_pgfaults |= TRACE_PFMAJ;
3241 else if (strcmp(str, "min") == 0)
3242 *trace_pgfaults |= TRACE_PFMIN;
3249 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3251 struct perf_evsel *evsel;
3253 evlist__for_each(evlist, evsel)
3254 evsel->handler = handler;
3257 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3259 const char *trace_usage[] = {
3260 "perf trace [<options>] [<command>]",
3261 "perf trace [<options>] -- <command> [<options>]",
3262 "perf trace record [<options>] [<command>]",
3263 "perf trace record [<options>] -- <command> [<options>]",
3266 struct trace trace = {
3275 .user_freq = UINT_MAX,
3276 .user_interval = ULLONG_MAX,
3277 .no_buffering = true,
3278 .mmap_pages = UINT_MAX,
3279 .proc_map_timeout = 500,
3283 .trace_syscalls = true,
3284 .kernel_syscallchains = false,
3286 const char *output_name = NULL;
3287 const char *ev_qualifier_str = NULL;
3288 const struct option trace_options[] = {
3289 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3290 "event selector. use 'perf list' to list available events",
3291 parse_events_option),
3292 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3293 "show the thread COMM next to its id"),
3294 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3295 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3296 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3297 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3298 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3299 "trace events on existing process id"),
3300 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3301 "trace events on existing thread id"),
3302 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3303 "pids to filter (by the kernel)", trace__set_filter_pids),
3304 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3305 "system-wide collection from all CPUs"),
3306 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3307 "list of cpus to monitor"),
3308 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3309 "child tasks do not inherit counters"),
3310 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3311 "number of mmap data pages",
3312 perf_evlist__parse_mmap_pages),
3313 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3315 OPT_CALLBACK(0, "duration", &trace, "float",
3316 "show only events with duration > N.M ms",
3317 trace__set_duration),
3318 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3319 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3320 OPT_BOOLEAN('T', "time", &trace.full_time,
3321 "Show full timestamp, not time relative to first start"),
3322 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3323 "Show only syscall summary with statistics"),
3324 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3325 "Show all syscalls and summary with statistics"),
3326 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3327 "Trace pagefaults", parse_pagefaults, "maj"),
3328 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3329 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3330 OPT_CALLBACK(0, "call-graph", &trace.opts,
3331 "record_mode[,record_size]", record_callchain_help,
3332 &record_parse_callchain_opt),
3333 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3334 "Show the kernel callchains on the syscall exit path"),
3335 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3336 "per thread proc mmap processing timeout in ms"),
3339 const char * const trace_subcommands[] = { "record", NULL };
3343 signal(SIGSEGV, sighandler_dump_stack);
3344 signal(SIGFPE, sighandler_dump_stack);
3346 trace.evlist = perf_evlist__new();
3347 trace.sctbl = syscalltbl__new();
3349 if (trace.evlist == NULL || trace.sctbl == NULL) {
3350 pr_err("Not enough memory to run!\n");
3355 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3356 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3358 err = bpf__setup_stdout(trace.evlist);
3360 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3361 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3367 if (trace.trace_pgfaults) {
3368 trace.opts.sample_address = true;
3369 trace.opts.sample_time = true;
3372 if (trace.opts.callgraph_set)
3373 symbol_conf.use_callchain = true;
3375 if (trace.evlist->nr_entries > 0)
3376 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3378 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3379 return trace__record(&trace, argc-1, &argv[1]);
3381 /* summary_only implies summary option, but don't overwrite summary if set */
3382 if (trace.summary_only)
3383 trace.summary = trace.summary_only;
3385 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3386 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3387 pr_err("Please specify something to trace.\n");
3391 if (!trace.trace_syscalls && ev_qualifier_str) {
3392 pr_err("The -e option can't be used with --no-syscalls.\n");
3396 if (output_name != NULL) {
3397 err = trace__open_output(&trace, output_name);
3399 perror("failed to create output file");
3404 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3406 if (ev_qualifier_str != NULL) {
3407 const char *s = ev_qualifier_str;
3408 struct strlist_config slist_config = {
3409 .dirname = system_path(STRACE_GROUPS_DIR),
3412 trace.not_ev_qualifier = *s == '!';
3413 if (trace.not_ev_qualifier)
3415 trace.ev_qualifier = strlist__new(s, &slist_config);
3416 if (trace.ev_qualifier == NULL) {
3417 fputs("Not enough memory to parse event qualifier",
3423 err = trace__validate_ev_qualifier(&trace);
3428 err = target__validate(&trace.opts.target);
3430 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3431 fprintf(trace.output, "%s", bf);
3435 err = target__parse_uid(&trace.opts.target);
3437 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3438 fprintf(trace.output, "%s", bf);
3442 if (!argc && target__none(&trace.opts.target))
3443 trace.opts.target.system_wide = true;
3446 err = trace__replay(&trace);
3448 err = trace__run(&trace, argc, argv);
3451 if (output_name != NULL)
3452 fclose(trace.output);