1 #include <traceevent/event-parse.h>
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
19 #include <sys/eventfd.h>
21 #include <linux/futex.h>
23 /* For older distros: */
25 # define MAP_STACK 0x20000
29 # define MADV_HWPOISON 100
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
41 # define EFD_SEMAPHORE 1
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
55 return *(u##bits *)(sample->raw_data + field->offset); \
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
66 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 return bswap_##bits(value);\
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
74 static int tp_field__init_uint(struct tp_field *field,
75 struct format_field *format_field,
78 field->offset = format_field->offset;
80 switch (format_field->size) {
82 field->integer = tp_field__u8;
85 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
88 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
91 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
102 return sample->raw_data + field->offset;
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
107 field->offset = format_field->offset;
108 field->pointer = tp_field__ptr;
115 struct tp_field args, ret;
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 struct tp_field *field,
123 struct format_field *format_field = perf_evsel__field(evsel, name);
125 if (format_field == NULL)
128 return tp_field__init_uint(field, format_field, evsel->needs_swap);
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 ({ struct syscall_tp *sc = evsel->priv;\
133 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 struct tp_field *field,
139 struct format_field *format_field = perf_evsel__field(evsel, name);
141 if (format_field == NULL)
144 return tp_field__init_ptr(field, format_field);
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 ({ struct syscall_tp *sc = evsel->priv;\
149 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
154 perf_evsel__delete(evsel);
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
159 evsel->priv = malloc(sizeof(struct syscall_tp));
160 if (evsel->priv != NULL) {
161 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
164 evsel->handler = handler;
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
177 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
179 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
181 evsel = perf_evsel__newtp("syscalls", direction);
184 if (perf_evsel__init_syscall_tp(evsel, handler))
191 perf_evsel__delete_priv(evsel);
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.integer(&fields->name, sample); })
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 ({ struct syscall_tp *fields = evsel->priv; \
201 fields->name.pointer(&fields->name, sample); })
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 void *sys_enter_handler,
205 void *sys_exit_handler)
208 struct perf_evsel *sys_enter, *sys_exit;
210 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 if (sys_enter == NULL)
214 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 goto out_delete_sys_enter;
217 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 if (sys_exit == NULL)
219 goto out_delete_sys_enter;
221 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 goto out_delete_sys_exit;
224 perf_evlist__add(evlist, sys_enter);
225 perf_evlist__add(evlist, sys_exit);
232 perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 perf_evsel__delete_priv(sys_enter);
241 struct thread *thread;
251 const char **entries;
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 .nr_entries = ARRAY_SIZE(array), \
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
261 .nr_entries = ARRAY_SIZE(array), \
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
267 struct syscall_arg *arg)
269 struct strarray *sa = arg->parm;
270 int idx = arg->val - sa->offset;
272 if (idx < 0 || idx >= sa->nr_entries)
273 return scnprintf(bf, size, intfmt, arg->val);
275 return scnprintf(bf, size, "%s", sa->entries[idx]);
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 struct syscall_arg *arg)
281 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
286 #if defined(__i386__) || defined(__x86_64__)
288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
289 * gets rewritten to support all arches.
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 struct syscall_arg *arg)
294 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 struct syscall_arg *arg);
303 #define SCA_FD syscall_arg__scnprintf_fd
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 struct syscall_arg *arg)
311 return scnprintf(bf, size, "CWD");
313 return syscall_arg__scnprintf_fd(bf, size, arg);
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 struct syscall_arg *arg);
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 struct syscall_arg *arg)
326 return scnprintf(bf, size, "%#lx", arg->val);
329 #define SCA_HEX syscall_arg__scnprintf_hex
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 struct syscall_arg *arg)
334 int printed = 0, prot = arg->val;
336 if (prot == PROT_NONE)
337 return scnprintf(bf, size, "NONE");
338 #define P_MMAP_PROT(n) \
339 if (prot & PROT_##n) { \
340 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
350 P_MMAP_PROT(GROWSDOWN);
351 P_MMAP_PROT(GROWSUP);
355 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 struct syscall_arg *arg)
365 int printed = 0, flags = arg->val;
367 #define P_MMAP_FLAG(n) \
368 if (flags & MAP_##n) { \
369 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
374 P_MMAP_FLAG(PRIVATE);
378 P_MMAP_FLAG(ANONYMOUS);
379 P_MMAP_FLAG(DENYWRITE);
380 P_MMAP_FLAG(EXECUTABLE);
383 P_MMAP_FLAG(GROWSDOWN);
385 P_MMAP_FLAG(HUGETLB);
388 P_MMAP_FLAG(NONBLOCK);
389 P_MMAP_FLAG(NORESERVE);
390 P_MMAP_FLAG(POPULATE);
392 #ifdef MAP_UNINITIALIZED
393 P_MMAP_FLAG(UNINITIALIZED);
398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406 struct syscall_arg *arg)
408 int behavior = arg->val;
411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
414 P_MADV_BHV(SEQUENTIAL);
415 P_MADV_BHV(WILLNEED);
416 P_MADV_BHV(DONTNEED);
418 P_MADV_BHV(DONTFORK);
420 P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422 P_MADV_BHV(SOFT_OFFLINE);
424 P_MADV_BHV(MERGEABLE);
425 P_MADV_BHV(UNMERGEABLE);
427 P_MADV_BHV(HUGEPAGE);
429 #ifdef MADV_NOHUGEPAGE
430 P_MADV_BHV(NOHUGEPAGE);
433 P_MADV_BHV(DONTDUMP);
442 return scnprintf(bf, size, "%#x", behavior);
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448 struct syscall_arg *arg)
450 int printed = 0, op = arg->val;
453 return scnprintf(bf, size, "NONE");
455 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
471 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
480 enum syscall_futex_args {
481 SCF_UADDR = (1 << 0),
484 SCF_TIMEOUT = (1 << 3),
485 SCF_UADDR2 = (1 << 4),
489 int cmd = op & FUTEX_CMD_MASK;
493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
495 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
498 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
499 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
500 P_FUTEX_OP(WAKE_OP); break;
501 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
504 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
505 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
506 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
507 default: printed = scnprintf(bf, size, "%#x", cmd); break;
510 if (op & FUTEX_PRIVATE_FLAG)
511 printed += scnprintf(bf + printed, size - printed, "|PRIV");
513 if (op & FUTEX_CLOCK_REALTIME)
514 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
519 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
527 static const char *whences[] = { "SET", "CUR", "END",
535 static DEFINE_STRARRAY(whences);
537 static const char *fcntl_cmds[] = {
538 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
543 static DEFINE_STRARRAY(fcntl_cmds);
545 static const char *rlimit_resources[] = {
546 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
550 static DEFINE_STRARRAY(rlimit_resources);
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
555 static const char *clockid[] = {
556 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
559 static DEFINE_STRARRAY(clockid);
561 static const char *socket_families[] = {
562 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567 "ALG", "NFC", "VSOCK",
569 static DEFINE_STRARRAY(socket_families);
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576 struct syscall_arg *arg)
580 flags = type & ~SOCK_TYPE_MASK;
582 type &= SOCK_TYPE_MASK;
584 * Can't use a strarray, MIPS may override for ABI reasons.
587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
592 P_SK_TYPE(SEQPACKET);
597 printed = scnprintf(bf, size, "%#x", type);
600 #define P_SK_FLAG(n) \
601 if (flags & SOCK_##n) { \
602 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603 flags &= ~SOCK_##n; \
611 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
619 #define MSG_PROBE 0x10
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE 0x10000
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
628 #define MSG_FASTOPEN 0x20000000
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632 struct syscall_arg *arg)
634 int printed = 0, flags = arg->val;
637 return scnprintf(bf, size, "NONE");
638 #define P_MSG_FLAG(n) \
639 if (flags & MSG_##n) { \
640 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
646 P_MSG_FLAG(DONTROUTE);
651 P_MSG_FLAG(DONTWAIT);
658 P_MSG_FLAG(ERRQUEUE);
659 P_MSG_FLAG(NOSIGNAL);
661 P_MSG_FLAG(WAITFORONE);
662 P_MSG_FLAG(SENDPAGE_NOTLAST);
663 P_MSG_FLAG(FASTOPEN);
664 P_MSG_FLAG(CMSG_CLOEXEC);
668 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676 struct syscall_arg *arg)
681 if (mode == F_OK) /* 0 */
682 return scnprintf(bf, size, "F");
684 if (mode & n##_OK) { \
685 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
695 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703 struct syscall_arg *arg)
705 int printed = 0, flags = arg->val;
707 if (!(flags & O_CREAT))
708 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
711 return scnprintf(bf, size, "RDONLY");
713 if (flags & O_##n) { \
714 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
738 if ((flags & O_SYNC) == O_SYNC)
739 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759 struct syscall_arg *arg)
761 int printed = 0, flags = arg->val;
764 return scnprintf(bf, size, "NONE");
766 if (flags & EFD_##n) { \
767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
777 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785 struct syscall_arg *arg)
787 int printed = 0, flags = arg->val;
790 if (flags & O_##n) { \
791 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
800 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
847 return scnprintf(bf, size, "%#x", sig);
850 #define SCA_SIGNUM syscall_arg__scnprintf_signum
852 #if defined(__i386__) || defined(__x86_64__)
854 * FIXME: Make this available to all arches.
856 #define TCGETS 0x5401
858 static const char *tioctls[] = {
859 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
860 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
861 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
862 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
863 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
864 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
865 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
866 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
867 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
868 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
869 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
870 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
871 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
872 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
873 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
876 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
877 #endif /* defined(__i386__) || defined(__x86_64__) */
879 #define STRARRAY(arg, name, array) \
880 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
881 .arg_parm = { [arg] = &strarray__##array, }
883 static struct syscall_fmt {
886 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
892 { .name = "access", .errmsg = true,
893 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
894 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
895 { .name = "brk", .hexret = true,
896 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
897 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
898 { .name = "close", .errmsg = true,
899 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
900 { .name = "connect", .errmsg = true, },
901 { .name = "dup", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "dup2", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "dup3", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
907 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
908 { .name = "eventfd2", .errmsg = true,
909 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
910 { .name = "faccessat", .errmsg = true,
911 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
912 { .name = "fadvise64", .errmsg = true,
913 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
914 { .name = "fallocate", .errmsg = true,
915 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
916 { .name = "fchdir", .errmsg = true,
917 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
918 { .name = "fchmod", .errmsg = true,
919 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
920 { .name = "fchmodat", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
922 { .name = "fchown", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fchownat", .errmsg = true,
925 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
926 { .name = "fcntl", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */
928 [1] = SCA_STRARRAY, /* cmd */ },
929 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
930 { .name = "fdatasync", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "flock", .errmsg = true,
933 .arg_scnprintf = { [0] = SCA_FD, /* fd */
934 [1] = SCA_FLOCK, /* cmd */ }, },
935 { .name = "fsetxattr", .errmsg = true,
936 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
937 { .name = "fstat", .errmsg = true, .alias = "newfstat",
938 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
939 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
940 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
941 { .name = "fstatfs", .errmsg = true,
942 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
943 { .name = "fsync", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
945 { .name = "ftruncate", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 { .name = "futex", .errmsg = true,
948 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
949 { .name = "futimesat", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
951 { .name = "getdents", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 { .name = "getdents64", .errmsg = true,
954 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
955 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
956 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
957 { .name = "ioctl", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_FD, /* fd */
959 #if defined(__i386__) || defined(__x86_64__)
961 * FIXME: Make this available to all arches.
963 [1] = SCA_STRHEXARRAY, /* cmd */
964 [2] = SCA_HEX, /* arg */ },
965 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
967 [2] = SCA_HEX, /* arg */ }, },
969 { .name = "kill", .errmsg = true,
970 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
971 { .name = "linkat", .errmsg = true,
972 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
973 { .name = "lseek", .errmsg = true,
974 .arg_scnprintf = { [0] = SCA_FD, /* fd */
975 [2] = SCA_STRARRAY, /* whence */ },
976 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
977 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
978 { .name = "madvise", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_HEX, /* start */
980 [2] = SCA_MADV_BHV, /* behavior */ }, },
981 { .name = "mkdirat", .errmsg = true,
982 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
983 { .name = "mknodat", .errmsg = true,
984 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
985 { .name = "mlock", .errmsg = true,
986 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
987 { .name = "mlockall", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
989 { .name = "mmap", .hexret = true,
990 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
991 [2] = SCA_MMAP_PROT, /* prot */
992 [3] = SCA_MMAP_FLAGS, /* flags */
993 [4] = SCA_FD, /* fd */ }, },
994 { .name = "mprotect", .errmsg = true,
995 .arg_scnprintf = { [0] = SCA_HEX, /* start */
996 [2] = SCA_MMAP_PROT, /* prot */ }, },
997 { .name = "mremap", .hexret = true,
998 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
999 [4] = SCA_HEX, /* new_addr */ }, },
1000 { .name = "munlock", .errmsg = true,
1001 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1002 { .name = "munmap", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1004 { .name = "name_to_handle_at", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1006 { .name = "newfstatat", .errmsg = true,
1007 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1008 { .name = "open", .errmsg = true,
1009 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1010 { .name = "open_by_handle_at", .errmsg = true,
1011 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1012 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1013 { .name = "openat", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1015 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1016 { .name = "pipe2", .errmsg = true,
1017 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1018 { .name = "poll", .errmsg = true, .timeout = true, },
1019 { .name = "ppoll", .errmsg = true, .timeout = true, },
1020 { .name = "pread", .errmsg = true, .alias = "pread64",
1021 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022 { .name = "preadv", .errmsg = true, .alias = "pread",
1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1025 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1026 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1027 { .name = "pwritev", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1029 { .name = "read", .errmsg = true,
1030 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031 { .name = "readlinkat", .errmsg = true,
1032 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1033 { .name = "readv", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 { .name = "recvfrom", .errmsg = true,
1036 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1037 { .name = "recvmmsg", .errmsg = true,
1038 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1039 { .name = "recvmsg", .errmsg = true,
1040 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1041 { .name = "renameat", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1043 { .name = "rt_sigaction", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1045 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1046 { .name = "rt_sigqueueinfo", .errmsg = true,
1047 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1048 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1049 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1050 { .name = "select", .errmsg = true, .timeout = true, },
1051 { .name = "sendmmsg", .errmsg = true,
1052 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1053 { .name = "sendmsg", .errmsg = true,
1054 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1055 { .name = "sendto", .errmsg = true,
1056 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1057 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1058 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1059 { .name = "shutdown", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "socket", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1063 [1] = SCA_SK_TYPE, /* type */ },
1064 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1065 { .name = "socketpair", .errmsg = true,
1066 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1067 [1] = SCA_SK_TYPE, /* type */ },
1068 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1069 { .name = "stat", .errmsg = true, .alias = "newstat", },
1070 { .name = "symlinkat", .errmsg = true,
1071 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1072 { .name = "tgkill", .errmsg = true,
1073 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1074 { .name = "tkill", .errmsg = true,
1075 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1076 { .name = "uname", .errmsg = true, .alias = "newuname", },
1077 { .name = "unlinkat", .errmsg = true,
1078 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1079 { .name = "utimensat", .errmsg = true,
1080 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1081 { .name = "write", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083 { .name = "writev", .errmsg = true,
1084 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1087 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1089 const struct syscall_fmt *fmt = fmtp;
1090 return strcmp(name, fmt->name);
1093 static struct syscall_fmt *syscall_fmt__find(const char *name)
1095 const int nmemb = ARRAY_SIZE(syscall_fmts);
1096 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1100 struct event_format *tp_format;
1103 struct syscall_fmt *fmt;
1104 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1108 static size_t fprintf_duration(unsigned long t, FILE *fp)
1110 double duration = (double)t / NSEC_PER_MSEC;
1111 size_t printed = fprintf(fp, "(");
1113 if (duration >= 1.0)
1114 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1115 else if (duration >= 0.01)
1116 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1118 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1119 return printed + fprintf(fp, "): ");
1122 struct thread_trace {
1126 unsigned long nr_events;
1134 struct intlist *syscall_stats;
1137 static struct thread_trace *thread_trace__new(void)
1139 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1142 ttrace->paths.max = -1;
1144 ttrace->syscall_stats = intlist__new(NULL);
1149 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1151 struct thread_trace *ttrace;
1156 if (thread->priv == NULL)
1157 thread->priv = thread_trace__new();
1159 if (thread->priv == NULL)
1162 ttrace = thread->priv;
1163 ++ttrace->nr_events;
1167 color_fprintf(fp, PERF_COLOR_RED,
1168 "WARNING: not enough memory, dropping samples!\n");
1173 struct perf_tool tool;
1180 struct syscall *table;
1182 struct record_opts opts;
1183 struct machine *host;
1186 unsigned long nr_events;
1187 struct strlist *ev_qualifier;
1188 const char *last_vfs_getname;
1189 struct intlist *tid_list;
1190 struct intlist *pid_list;
1191 double duration_filter;
1197 bool not_ev_qualifier;
1201 bool multiple_threads;
1205 bool show_tool_stats;
1208 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1210 struct thread_trace *ttrace = thread->priv;
1212 if (fd > ttrace->paths.max) {
1213 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1218 if (ttrace->paths.max != -1) {
1219 memset(npath + ttrace->paths.max + 1, 0,
1220 (fd - ttrace->paths.max) * sizeof(char *));
1222 memset(npath, 0, (fd + 1) * sizeof(char *));
1225 ttrace->paths.table = npath;
1226 ttrace->paths.max = fd;
1229 ttrace->paths.table[fd] = strdup(pathname);
1231 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1234 static int thread__read_fd_path(struct thread *thread, int fd)
1236 char linkname[PATH_MAX], pathname[PATH_MAX];
1240 if (thread->pid_ == thread->tid) {
1241 scnprintf(linkname, sizeof(linkname),
1242 "/proc/%d/fd/%d", thread->pid_, fd);
1244 scnprintf(linkname, sizeof(linkname),
1245 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1248 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1251 ret = readlink(linkname, pathname, sizeof(pathname));
1253 if (ret < 0 || ret > st.st_size)
1256 pathname[ret] = '\0';
1257 return trace__set_fd_pathname(thread, fd, pathname);
1260 static const char *thread__fd_path(struct thread *thread, int fd,
1261 struct trace *trace)
1263 struct thread_trace *ttrace = thread->priv;
1271 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1274 ++trace->stats.proc_getname;
1275 if (thread__read_fd_path(thread, fd)) {
1279 return ttrace->paths.table[fd];
1282 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1283 struct syscall_arg *arg)
1286 size_t printed = scnprintf(bf, size, "%d", fd);
1287 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1290 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1295 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1296 struct syscall_arg *arg)
1299 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1300 struct thread_trace *ttrace = arg->thread->priv;
1302 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1303 zfree(&ttrace->paths.table[fd]);
1308 static bool trace__filter_duration(struct trace *trace, double t)
1310 return t < (trace->duration_filter * NSEC_PER_MSEC);
1313 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1315 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1317 return fprintf(fp, "%10.3f ", ts);
1320 static bool done = false;
1321 static bool interrupted = false;
1323 static void sig_handler(int sig)
1326 interrupted = sig == SIGINT;
1329 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1330 u64 duration, u64 tstamp, FILE *fp)
1332 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1333 printed += fprintf_duration(duration, fp);
1335 if (trace->multiple_threads) {
1336 if (trace->show_comm)
1337 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1338 printed += fprintf(fp, "%d ", thread->tid);
1344 static int trace__process_event(struct trace *trace, struct machine *machine,
1345 union perf_event *event, struct perf_sample *sample)
1349 switch (event->header.type) {
1350 case PERF_RECORD_LOST:
1351 color_fprintf(trace->output, PERF_COLOR_RED,
1352 "LOST %" PRIu64 " events!\n", event->lost.lost);
1353 ret = machine__process_lost_event(machine, event, sample);
1355 ret = machine__process_event(machine, event, sample);
1362 static int trace__tool_process(struct perf_tool *tool,
1363 union perf_event *event,
1364 struct perf_sample *sample,
1365 struct machine *machine)
1367 struct trace *trace = container_of(tool, struct trace, tool);
1368 return trace__process_event(trace, machine, event, sample);
1371 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1373 int err = symbol__init();
1378 trace->host = machine__new_host();
1379 if (trace->host == NULL)
1382 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1383 evlist->threads, trace__tool_process, false);
1390 static int syscall__set_arg_fmts(struct syscall *sc)
1392 struct format_field *field;
1395 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1396 if (sc->arg_scnprintf == NULL)
1400 sc->arg_parm = sc->fmt->arg_parm;
1402 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1403 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1404 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1405 else if (field->flags & FIELD_IS_POINTER)
1406 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1413 static int trace__read_syscall_info(struct trace *trace, int id)
1417 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1422 if (id > trace->syscalls.max) {
1423 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1425 if (nsyscalls == NULL)
1428 if (trace->syscalls.max != -1) {
1429 memset(nsyscalls + trace->syscalls.max + 1, 0,
1430 (id - trace->syscalls.max) * sizeof(*sc));
1432 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1435 trace->syscalls.table = nsyscalls;
1436 trace->syscalls.max = id;
1439 sc = trace->syscalls.table + id;
1442 if (trace->ev_qualifier) {
1443 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1445 if (!(in ^ trace->not_ev_qualifier)) {
1446 sc->filtered = true;
1448 * No need to do read tracepoint information since this will be
1455 sc->fmt = syscall_fmt__find(sc->name);
1457 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1458 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1460 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1461 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1462 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1465 if (sc->tp_format == NULL)
1468 return syscall__set_arg_fmts(sc);
1471 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1472 unsigned long *args, struct trace *trace,
1473 struct thread *thread)
1477 if (sc->tp_format != NULL) {
1478 struct format_field *field;
1480 struct syscall_arg arg = {
1487 for (field = sc->tp_format->format.fields->next; field;
1488 field = field->next, ++arg.idx, bit <<= 1) {
1492 * Suppress this argument if its value is zero and
1493 * and we don't have a string associated in an
1496 if (args[arg.idx] == 0 &&
1497 !(sc->arg_scnprintf &&
1498 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1499 sc->arg_parm[arg.idx]))
1502 printed += scnprintf(bf + printed, size - printed,
1503 "%s%s: ", printed ? ", " : "", field->name);
1504 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1505 arg.val = args[arg.idx];
1507 arg.parm = sc->arg_parm[arg.idx];
1508 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1509 size - printed, &arg);
1511 printed += scnprintf(bf + printed, size - printed,
1512 "%ld", args[arg.idx]);
1519 printed += scnprintf(bf + printed, size - printed,
1521 printed ? ", " : "", i, args[i]);
1529 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1530 struct perf_sample *sample);
1532 static struct syscall *trace__syscall_info(struct trace *trace,
1533 struct perf_evsel *evsel, int id)
1539 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1540 * before that, leaving at a higher verbosity level till that is
1541 * explained. Reproduced with plain ftrace with:
1543 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1544 * grep "NR -1 " /t/trace_pipe
1546 * After generating some load on the machine.
1550 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1551 id, perf_evsel__name(evsel), ++n);
1556 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1557 trace__read_syscall_info(trace, id))
1560 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1563 return &trace->syscalls.table[id];
1567 fprintf(trace->output, "Problems reading syscall %d", id);
1568 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1569 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1570 fputs(" information\n", trace->output);
1575 static void thread__update_stats(struct thread_trace *ttrace,
1576 int id, struct perf_sample *sample)
1578 struct int_node *inode;
1579 struct stats *stats;
1582 inode = intlist__findnew(ttrace->syscall_stats, id);
1586 stats = inode->priv;
1587 if (stats == NULL) {
1588 stats = malloc(sizeof(struct stats));
1592 inode->priv = stats;
1595 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1596 duration = sample->time - ttrace->entry_time;
1598 update_stats(stats, duration);
1601 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1602 struct perf_sample *sample)
1607 struct thread *thread;
1608 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1609 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1610 struct thread_trace *ttrace;
1618 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1619 ttrace = thread__trace(thread, trace->output);
1623 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1624 ttrace = thread->priv;
1626 if (ttrace->entry_str == NULL) {
1627 ttrace->entry_str = malloc(1024);
1628 if (!ttrace->entry_str)
1632 ttrace->entry_time = sample->time;
1633 msg = ttrace->entry_str;
1634 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1636 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1637 args, trace, thread);
1639 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1640 if (!trace->duration_filter && !trace->summary_only) {
1641 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1642 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1645 ttrace->entry_pending = true;
1650 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1651 struct perf_sample *sample)
1655 struct thread *thread;
1656 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1657 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1658 struct thread_trace *ttrace;
1666 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1667 ttrace = thread__trace(thread, trace->output);
1672 thread__update_stats(ttrace, id, sample);
1674 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1676 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1677 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1678 trace->last_vfs_getname = NULL;
1679 ++trace->stats.vfs_getname;
1682 ttrace = thread->priv;
1684 ttrace->exit_time = sample->time;
1686 if (ttrace->entry_time) {
1687 duration = sample->time - ttrace->entry_time;
1688 if (trace__filter_duration(trace, duration))
1690 } else if (trace->duration_filter)
1693 if (trace->summary_only)
1696 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1698 if (ttrace->entry_pending) {
1699 fprintf(trace->output, "%-70s", ttrace->entry_str);
1701 fprintf(trace->output, " ... [");
1702 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1703 fprintf(trace->output, "]: %s()", sc->name);
1706 if (sc->fmt == NULL) {
1708 fprintf(trace->output, ") = %d", ret);
1709 } else if (ret < 0 && sc->fmt->errmsg) {
1711 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1712 *e = audit_errno_to_name(-ret);
1714 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1715 } else if (ret == 0 && sc->fmt->timeout)
1716 fprintf(trace->output, ") = 0 Timeout");
1717 else if (sc->fmt->hexret)
1718 fprintf(trace->output, ") = %#x", ret);
1722 fputc('\n', trace->output);
1724 ttrace->entry_pending = false;
1729 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1730 struct perf_sample *sample)
1732 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1736 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1737 struct perf_sample *sample)
1739 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1740 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1741 struct thread *thread = machine__findnew_thread(trace->host,
1744 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1749 ttrace->runtime_ms += runtime_ms;
1750 trace->runtime_ms += runtime_ms;
1754 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1756 perf_evsel__strval(evsel, sample, "comm"),
1757 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1759 perf_evsel__intval(evsel, sample, "vruntime"));
1763 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1765 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1766 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1769 if (trace->pid_list || trace->tid_list)
1775 static int trace__process_sample(struct perf_tool *tool,
1776 union perf_event *event __maybe_unused,
1777 struct perf_sample *sample,
1778 struct perf_evsel *evsel,
1779 struct machine *machine __maybe_unused)
1781 struct trace *trace = container_of(tool, struct trace, tool);
1784 tracepoint_handler handler = evsel->handler;
1786 if (skip_sample(trace, sample))
1789 if (!trace->full_time && trace->base_time == 0)
1790 trace->base_time = sample->time;
1794 handler(trace, evsel, sample);
1800 static int parse_target_str(struct trace *trace)
1802 if (trace->opts.target.pid) {
1803 trace->pid_list = intlist__new(trace->opts.target.pid);
1804 if (trace->pid_list == NULL) {
1805 pr_err("Error parsing process id string\n");
1810 if (trace->opts.target.tid) {
1811 trace->tid_list = intlist__new(trace->opts.target.tid);
1812 if (trace->tid_list == NULL) {
1813 pr_err("Error parsing thread id string\n");
1821 static int trace__record(int argc, const char **argv)
1823 unsigned int rec_argc, i, j;
1824 const char **rec_argv;
1825 const char * const record_args[] = {
1833 /* +1 is for the event string below */
1834 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1835 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1837 if (rec_argv == NULL)
1840 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1841 rec_argv[i] = record_args[i];
1843 /* event string may be different for older kernels - e.g., RHEL6 */
1844 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1845 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1846 else if (is_valid_tracepoint("syscalls:sys_enter"))
1847 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1849 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1854 for (j = 0; j < (unsigned int)argc; j++, i++)
1855 rec_argv[i] = argv[j];
1857 return cmd_record(i, rec_argv, NULL);
1860 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1862 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1864 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1868 if (perf_evsel__field(evsel, "pathname") == NULL) {
1869 perf_evsel__delete(evsel);
1873 evsel->handler = trace__vfs_getname;
1874 perf_evlist__add(evlist, evsel);
1877 static int trace__run(struct trace *trace, int argc, const char **argv)
1879 struct perf_evlist *evlist = perf_evlist__new();
1880 struct perf_evsel *evsel;
1882 unsigned long before;
1883 const bool forks = argc > 0;
1887 if (evlist == NULL) {
1888 fprintf(trace->output, "Not enough memory to run!\n");
1892 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1895 perf_evlist__add_vfs_getname(evlist);
1898 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1899 trace__sched_stat_runtime))
1902 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1904 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1905 goto out_delete_evlist;
1908 err = trace__symbols_init(trace, evlist);
1910 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1911 goto out_delete_evlist;
1914 perf_evlist__config(evlist, &trace->opts);
1916 signal(SIGCHLD, sig_handler);
1917 signal(SIGINT, sig_handler);
1920 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1923 fprintf(trace->output, "Couldn't run the workload!\n");
1924 goto out_delete_evlist;
1928 err = perf_evlist__open(evlist);
1930 goto out_error_open;
1932 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1934 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1935 goto out_delete_evlist;
1938 perf_evlist__enable(evlist);
1941 perf_evlist__start_workload(evlist);
1943 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1945 before = trace->nr_events;
1947 for (i = 0; i < evlist->nr_mmaps; i++) {
1948 union perf_event *event;
1950 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1951 const u32 type = event->header.type;
1952 tracepoint_handler handler;
1953 struct perf_sample sample;
1957 err = perf_evlist__parse_sample(evlist, event, &sample);
1959 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1963 if (!trace->full_time && trace->base_time == 0)
1964 trace->base_time = sample.time;
1966 if (type != PERF_RECORD_SAMPLE) {
1967 trace__process_event(trace, trace->host, event, &sample);
1971 evsel = perf_evlist__id2evsel(evlist, sample.id);
1972 if (evsel == NULL) {
1973 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1977 if (sample.raw_data == NULL) {
1978 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1979 perf_evsel__name(evsel), sample.tid,
1980 sample.cpu, sample.raw_size);
1984 handler = evsel->handler;
1985 handler(trace, evsel, &sample);
1987 perf_evlist__mmap_consume(evlist, i);
1994 if (trace->nr_events == before) {
1995 int timeout = done ? 100 : -1;
1997 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2004 perf_evlist__disable(evlist);
2008 trace__fprintf_thread_summary(trace, trace->output);
2010 if (trace->show_tool_stats) {
2011 fprintf(trace->output, "Stats:\n "
2012 " vfs_getname : %" PRIu64 "\n"
2013 " proc_getname: %" PRIu64 "\n",
2014 trace->stats.vfs_getname,
2015 trace->stats.proc_getname);
2020 perf_evlist__delete(evlist);
2022 trace->live = false;
2025 char errbuf[BUFSIZ];
2028 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2032 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2035 fprintf(trace->output, "%s\n", errbuf);
2036 goto out_delete_evlist;
2040 static int trace__replay(struct trace *trace)
2042 const struct perf_evsel_str_handler handlers[] = {
2043 { "probe:vfs_getname", trace__vfs_getname, },
2045 struct perf_data_file file = {
2047 .mode = PERF_DATA_MODE_READ,
2049 struct perf_session *session;
2050 struct perf_evsel *evsel;
2053 trace->tool.sample = trace__process_sample;
2054 trace->tool.mmap = perf_event__process_mmap;
2055 trace->tool.mmap2 = perf_event__process_mmap2;
2056 trace->tool.comm = perf_event__process_comm;
2057 trace->tool.exit = perf_event__process_exit;
2058 trace->tool.fork = perf_event__process_fork;
2059 trace->tool.attr = perf_event__process_attr;
2060 trace->tool.tracing_data = perf_event__process_tracing_data;
2061 trace->tool.build_id = perf_event__process_build_id;
2063 trace->tool.ordered_samples = true;
2064 trace->tool.ordering_requires_timestamps = true;
2066 /* add tid to output */
2067 trace->multiple_threads = true;
2069 if (symbol__init() < 0)
2072 session = perf_session__new(&file, false, &trace->tool);
2073 if (session == NULL)
2076 trace->host = &session->machines.host;
2078 err = perf_session__set_tracepoints_handlers(session, handlers);
2082 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2083 "raw_syscalls:sys_enter");
2084 /* older kernels have syscalls tp versus raw_syscalls */
2086 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2087 "syscalls:sys_enter");
2088 if (evsel == NULL) {
2089 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2093 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2094 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2095 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2099 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2100 "raw_syscalls:sys_exit");
2102 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2103 "syscalls:sys_exit");
2104 if (evsel == NULL) {
2105 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2109 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2110 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2111 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2115 err = parse_target_str(trace);
2121 err = perf_session__process_events(session, &trace->tool);
2123 pr_err("Failed to process events, error %d", err);
2125 else if (trace->summary)
2126 trace__fprintf_thread_summary(trace, trace->output);
2129 perf_session__delete(session);
2134 static size_t trace__fprintf_threads_header(FILE *fp)
2138 printed = fprintf(fp, "\n Summary of events:\n\n");
2143 static size_t thread__dump_stats(struct thread_trace *ttrace,
2144 struct trace *trace, FILE *fp)
2146 struct stats *stats;
2149 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2154 printed += fprintf(fp, "\n");
2156 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2157 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2158 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2160 /* each int_node is a syscall */
2162 stats = inode->priv;
2164 double min = (double)(stats->min) / NSEC_PER_MSEC;
2165 double max = (double)(stats->max) / NSEC_PER_MSEC;
2166 double avg = avg_stats(stats);
2168 u64 n = (u64) stats->n;
2170 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2171 avg /= NSEC_PER_MSEC;
2173 sc = &trace->syscalls.table[inode->i];
2174 printed += fprintf(fp, " %-15s", sc->name);
2175 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2177 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2180 inode = intlist__next(inode);
2183 printed += fprintf(fp, "\n\n");
2188 /* struct used to pass data to per-thread function */
2189 struct summary_data {
2191 struct trace *trace;
2195 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2197 struct summary_data *data = priv;
2198 FILE *fp = data->fp;
2199 size_t printed = data->printed;
2200 struct trace *trace = data->trace;
2201 struct thread_trace *ttrace = thread->priv;
2207 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2209 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2210 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2211 printed += fprintf(fp, "%.1f%%", ratio);
2212 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2213 printed += thread__dump_stats(ttrace, trace, fp);
2215 data->printed += printed;
2220 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2222 struct summary_data data = {
2226 data.printed = trace__fprintf_threads_header(fp);
2228 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2230 return data.printed;
2233 static int trace__set_duration(const struct option *opt, const char *str,
2234 int unset __maybe_unused)
2236 struct trace *trace = opt->value;
2238 trace->duration_filter = atof(str);
2242 static int trace__open_output(struct trace *trace, const char *filename)
2246 if (!stat(filename, &st) && st.st_size) {
2247 char oldname[PATH_MAX];
2249 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2251 rename(filename, oldname);
2254 trace->output = fopen(filename, "w");
2256 return trace->output == NULL ? -errno : 0;
2259 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2261 const char * const trace_usage[] = {
2262 "perf trace [<options>] [<command>]",
2263 "perf trace [<options>] -- <command> [<options>]",
2264 "perf trace record [<options>] [<command>]",
2265 "perf trace record [<options>] -- <command> [<options>]",
2268 struct trace trace = {
2270 .machine = audit_detect_machine(),
2271 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2281 .user_freq = UINT_MAX,
2282 .user_interval = ULLONG_MAX,
2283 .no_buffering = true,
2289 const char *output_name = NULL;
2290 const char *ev_qualifier_str = NULL;
2291 const struct option trace_options[] = {
2292 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2293 "show the thread COMM next to its id"),
2294 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2295 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2296 "list of events to trace"),
2297 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2298 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2299 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2300 "trace events on existing process id"),
2301 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2302 "trace events on existing thread id"),
2303 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2304 "system-wide collection from all CPUs"),
2305 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2306 "list of cpus to monitor"),
2307 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2308 "child tasks do not inherit counters"),
2309 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2310 "number of mmap data pages",
2311 perf_evlist__parse_mmap_pages),
2312 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2314 OPT_CALLBACK(0, "duration", &trace, "float",
2315 "show only events with duration > N.M ms",
2316 trace__set_duration),
2317 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2318 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2319 OPT_BOOLEAN('T', "time", &trace.full_time,
2320 "Show full timestamp, not time relative to first start"),
2321 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2322 "Show only syscall summary with statistics"),
2323 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2324 "Show all syscalls and summary with statistics"),
2330 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2331 return trace__record(argc-2, &argv[2]);
2333 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2335 /* summary_only implies summary option, but don't overwrite summary if set */
2336 if (trace.summary_only)
2337 trace.summary = trace.summary_only;
2339 if (output_name != NULL) {
2340 err = trace__open_output(&trace, output_name);
2342 perror("failed to create output file");
2347 if (ev_qualifier_str != NULL) {
2348 const char *s = ev_qualifier_str;
2350 trace.not_ev_qualifier = *s == '!';
2351 if (trace.not_ev_qualifier)
2353 trace.ev_qualifier = strlist__new(true, s);
2354 if (trace.ev_qualifier == NULL) {
2355 fputs("Not enough memory to parse event qualifier",
2362 err = target__validate(&trace.opts.target);
2364 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2365 fprintf(trace.output, "%s", bf);
2369 err = target__parse_uid(&trace.opts.target);
2371 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2372 fprintf(trace.output, "%s", bf);
2376 if (!argc && target__none(&trace.opts.target))
2377 trace.opts.target.system_wide = true;
2380 err = trace__replay(&trace);
2382 err = trace__run(&trace, argc, argv);
2385 if (output_name != NULL)
2386 fclose(trace.output);