perf trace: Beautify keyctl's option arg
[linux-2.6-block.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/exec_cmd.h"
7 #include "util/machine.h"
8 #include "util/session.h"
9 #include "util/thread.h"
10 #include "util/parse-options.h"
11 #include "util/strlist.h"
12 #include "util/intlist.h"
13 #include "util/thread_map.h"
14 #include "util/stat.h"
15 #include "trace-event.h"
16 #include "util/parse-events.h"
17
18 #include <libaudit.h>
19 #include <stdlib.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK              0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON          100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE         12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE       13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE          1
42 #endif
43
44 #ifndef EFD_NONBLOCK
45 # define EFD_NONBLOCK           00004000
46 #endif
47
48 #ifndef EFD_CLOEXEC
49 # define EFD_CLOEXEC            02000000
50 #endif
51
52 #ifndef O_CLOEXEC
53 # define O_CLOEXEC              02000000
54 #endif
55
56 #ifndef SOCK_DCCP
57 # define SOCK_DCCP              6
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC           02000000
62 #endif
63
64 #ifndef SOCK_NONBLOCK
65 # define SOCK_NONBLOCK          00004000
66 #endif
67
68 #ifndef MSG_CMSG_CLOEXEC
69 # define MSG_CMSG_CLOEXEC       0x40000000
70 #endif
71
72 #ifndef PERF_FLAG_FD_NO_GROUP
73 # define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
74 #endif
75
76 #ifndef PERF_FLAG_FD_OUTPUT
77 # define PERF_FLAG_FD_OUTPUT            (1UL << 1)
78 #endif
79
80 #ifndef PERF_FLAG_PID_CGROUP
81 # define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
82 #endif
83
84 #ifndef PERF_FLAG_FD_CLOEXEC
85 # define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
86 #endif
87
88
89 struct tp_field {
90         int offset;
91         union {
92                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
93                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
94         };
95 };
96
97 #define TP_UINT_FIELD(bits) \
98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
99 { \
100         u##bits value; \
101         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
102         return value;  \
103 }
104
105 TP_UINT_FIELD(8);
106 TP_UINT_FIELD(16);
107 TP_UINT_FIELD(32);
108 TP_UINT_FIELD(64);
109
110 #define TP_UINT_FIELD__SWAPPED(bits) \
111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
112 { \
113         u##bits value; \
114         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
115         return bswap_##bits(value);\
116 }
117
118 TP_UINT_FIELD__SWAPPED(16);
119 TP_UINT_FIELD__SWAPPED(32);
120 TP_UINT_FIELD__SWAPPED(64);
121
122 static int tp_field__init_uint(struct tp_field *field,
123                                struct format_field *format_field,
124                                bool needs_swap)
125 {
126         field->offset = format_field->offset;
127
128         switch (format_field->size) {
129         case 1:
130                 field->integer = tp_field__u8;
131                 break;
132         case 2:
133                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
134                 break;
135         case 4:
136                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
137                 break;
138         case 8:
139                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
140                 break;
141         default:
142                 return -1;
143         }
144
145         return 0;
146 }
147
148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
149 {
150         return sample->raw_data + field->offset;
151 }
152
153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
154 {
155         field->offset = format_field->offset;
156         field->pointer = tp_field__ptr;
157         return 0;
158 }
159
160 struct syscall_tp {
161         struct tp_field id;
162         union {
163                 struct tp_field args, ret;
164         };
165 };
166
167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
168                                           struct tp_field *field,
169                                           const char *name)
170 {
171         struct format_field *format_field = perf_evsel__field(evsel, name);
172
173         if (format_field == NULL)
174                 return -1;
175
176         return tp_field__init_uint(field, format_field, evsel->needs_swap);
177 }
178
179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
180         ({ struct syscall_tp *sc = evsel->priv;\
181            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
182
183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
184                                          struct tp_field *field,
185                                          const char *name)
186 {
187         struct format_field *format_field = perf_evsel__field(evsel, name);
188
189         if (format_field == NULL)
190                 return -1;
191
192         return tp_field__init_ptr(field, format_field);
193 }
194
195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
196         ({ struct syscall_tp *sc = evsel->priv;\
197            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
198
199 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
200 {
201         zfree(&evsel->priv);
202         perf_evsel__delete(evsel);
203 }
204
205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
206 {
207         evsel->priv = malloc(sizeof(struct syscall_tp));
208         if (evsel->priv != NULL) {
209                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
210                         goto out_delete;
211
212                 evsel->handler = handler;
213                 return 0;
214         }
215
216         return -ENOMEM;
217
218 out_delete:
219         zfree(&evsel->priv);
220         return -ENOENT;
221 }
222
223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
224 {
225         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
226
227         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
228         if (evsel == NULL)
229                 evsel = perf_evsel__newtp("syscalls", direction);
230
231         if (evsel) {
232                 if (perf_evsel__init_syscall_tp(evsel, handler))
233                         goto out_delete;
234         }
235
236         return evsel;
237
238 out_delete:
239         perf_evsel__delete_priv(evsel);
240         return NULL;
241 }
242
243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
244         ({ struct syscall_tp *fields = evsel->priv; \
245            fields->name.integer(&fields->name, sample); })
246
247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
248         ({ struct syscall_tp *fields = evsel->priv; \
249            fields->name.pointer(&fields->name, sample); })
250
251 struct syscall_arg {
252         unsigned long val;
253         struct thread *thread;
254         struct trace  *trace;
255         void          *parm;
256         u8            idx;
257         u8            mask;
258 };
259
260 struct strarray {
261         int         offset;
262         int         nr_entries;
263         const char **entries;
264 };
265
266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
267         .nr_entries = ARRAY_SIZE(array), \
268         .entries = array, \
269 }
270
271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
272         .offset     = off, \
273         .nr_entries = ARRAY_SIZE(array), \
274         .entries = array, \
275 }
276
277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
278                                                 const char *intfmt,
279                                                 struct syscall_arg *arg)
280 {
281         struct strarray *sa = arg->parm;
282         int idx = arg->val - sa->offset;
283
284         if (idx < 0 || idx >= sa->nr_entries)
285                 return scnprintf(bf, size, intfmt, arg->val);
286
287         return scnprintf(bf, size, "%s", sa->entries[idx]);
288 }
289
290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
291                                               struct syscall_arg *arg)
292 {
293         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
294 }
295
296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
297
298 #if defined(__i386__) || defined(__x86_64__)
299 /*
300  * FIXME: Make this available to all arches as soon as the ioctl beautifier
301  *        gets rewritten to support all arches.
302  */
303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
304                                                  struct syscall_arg *arg)
305 {
306         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
307 }
308
309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
310 #endif /* defined(__i386__) || defined(__x86_64__) */
311
312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
313                                         struct syscall_arg *arg);
314
315 #define SCA_FD syscall_arg__scnprintf_fd
316
317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
318                                            struct syscall_arg *arg)
319 {
320         int fd = arg->val;
321
322         if (fd == AT_FDCWD)
323                 return scnprintf(bf, size, "CWD");
324
325         return syscall_arg__scnprintf_fd(bf, size, arg);
326 }
327
328 #define SCA_FDAT syscall_arg__scnprintf_fd_at
329
330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
331                                               struct syscall_arg *arg);
332
333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
334
335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
336                                          struct syscall_arg *arg)
337 {
338         return scnprintf(bf, size, "%#lx", arg->val);
339 }
340
341 #define SCA_HEX syscall_arg__scnprintf_hex
342
343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
344                                          struct syscall_arg *arg)
345 {
346         return scnprintf(bf, size, "%d", arg->val);
347 }
348
349 #define SCA_INT syscall_arg__scnprintf_int
350
351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
352                                                struct syscall_arg *arg)
353 {
354         int printed = 0, prot = arg->val;
355
356         if (prot == PROT_NONE)
357                 return scnprintf(bf, size, "NONE");
358 #define P_MMAP_PROT(n) \
359         if (prot & PROT_##n) { \
360                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
361                 prot &= ~PROT_##n; \
362         }
363
364         P_MMAP_PROT(EXEC);
365         P_MMAP_PROT(READ);
366         P_MMAP_PROT(WRITE);
367 #ifdef PROT_SEM
368         P_MMAP_PROT(SEM);
369 #endif
370         P_MMAP_PROT(GROWSDOWN);
371         P_MMAP_PROT(GROWSUP);
372 #undef P_MMAP_PROT
373
374         if (prot)
375                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
376
377         return printed;
378 }
379
380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
381
382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
383                                                 struct syscall_arg *arg)
384 {
385         int printed = 0, flags = arg->val;
386
387 #define P_MMAP_FLAG(n) \
388         if (flags & MAP_##n) { \
389                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
390                 flags &= ~MAP_##n; \
391         }
392
393         P_MMAP_FLAG(SHARED);
394         P_MMAP_FLAG(PRIVATE);
395 #ifdef MAP_32BIT
396         P_MMAP_FLAG(32BIT);
397 #endif
398         P_MMAP_FLAG(ANONYMOUS);
399         P_MMAP_FLAG(DENYWRITE);
400         P_MMAP_FLAG(EXECUTABLE);
401         P_MMAP_FLAG(FILE);
402         P_MMAP_FLAG(FIXED);
403         P_MMAP_FLAG(GROWSDOWN);
404 #ifdef MAP_HUGETLB
405         P_MMAP_FLAG(HUGETLB);
406 #endif
407         P_MMAP_FLAG(LOCKED);
408         P_MMAP_FLAG(NONBLOCK);
409         P_MMAP_FLAG(NORESERVE);
410         P_MMAP_FLAG(POPULATE);
411         P_MMAP_FLAG(STACK);
412 #ifdef MAP_UNINITIALIZED
413         P_MMAP_FLAG(UNINITIALIZED);
414 #endif
415 #undef P_MMAP_FLAG
416
417         if (flags)
418                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
419
420         return printed;
421 }
422
423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
424
425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
426                                                   struct syscall_arg *arg)
427 {
428         int printed = 0, flags = arg->val;
429
430 #define P_MREMAP_FLAG(n) \
431         if (flags & MREMAP_##n) { \
432                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
433                 flags &= ~MREMAP_##n; \
434         }
435
436         P_MREMAP_FLAG(MAYMOVE);
437 #ifdef MREMAP_FIXED
438         P_MREMAP_FLAG(FIXED);
439 #endif
440 #undef P_MREMAP_FLAG
441
442         if (flags)
443                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
444
445         return printed;
446 }
447
448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
449
450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
451                                                       struct syscall_arg *arg)
452 {
453         int behavior = arg->val;
454
455         switch (behavior) {
456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
457         P_MADV_BHV(NORMAL);
458         P_MADV_BHV(RANDOM);
459         P_MADV_BHV(SEQUENTIAL);
460         P_MADV_BHV(WILLNEED);
461         P_MADV_BHV(DONTNEED);
462         P_MADV_BHV(REMOVE);
463         P_MADV_BHV(DONTFORK);
464         P_MADV_BHV(DOFORK);
465         P_MADV_BHV(HWPOISON);
466 #ifdef MADV_SOFT_OFFLINE
467         P_MADV_BHV(SOFT_OFFLINE);
468 #endif
469         P_MADV_BHV(MERGEABLE);
470         P_MADV_BHV(UNMERGEABLE);
471 #ifdef MADV_HUGEPAGE
472         P_MADV_BHV(HUGEPAGE);
473 #endif
474 #ifdef MADV_NOHUGEPAGE
475         P_MADV_BHV(NOHUGEPAGE);
476 #endif
477 #ifdef MADV_DONTDUMP
478         P_MADV_BHV(DONTDUMP);
479 #endif
480 #ifdef MADV_DODUMP
481         P_MADV_BHV(DODUMP);
482 #endif
483 #undef P_MADV_PHV
484         default: break;
485         }
486
487         return scnprintf(bf, size, "%#x", behavior);
488 }
489
490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
491
492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
493                                            struct syscall_arg *arg)
494 {
495         int printed = 0, op = arg->val;
496
497         if (op == 0)
498                 return scnprintf(bf, size, "NONE");
499 #define P_CMD(cmd) \
500         if ((op & LOCK_##cmd) == LOCK_##cmd) { \
501                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
502                 op &= ~LOCK_##cmd; \
503         }
504
505         P_CMD(SH);
506         P_CMD(EX);
507         P_CMD(NB);
508         P_CMD(UN);
509         P_CMD(MAND);
510         P_CMD(RW);
511         P_CMD(READ);
512         P_CMD(WRITE);
513 #undef P_OP
514
515         if (op)
516                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
517
518         return printed;
519 }
520
521 #define SCA_FLOCK syscall_arg__scnprintf_flock
522
523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
524 {
525         enum syscall_futex_args {
526                 SCF_UADDR   = (1 << 0),
527                 SCF_OP      = (1 << 1),
528                 SCF_VAL     = (1 << 2),
529                 SCF_TIMEOUT = (1 << 3),
530                 SCF_UADDR2  = (1 << 4),
531                 SCF_VAL3    = (1 << 5),
532         };
533         int op = arg->val;
534         int cmd = op & FUTEX_CMD_MASK;
535         size_t printed = 0;
536
537         switch (cmd) {
538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
539         P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
540         P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
541         P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
542         P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
543         P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
544         P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
545         P_FUTEX_OP(WAKE_OP);                                                      break;
546         P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
547         P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
548         P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
549         P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
550         P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
551         P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
552         default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
553         }
554
555         if (op & FUTEX_PRIVATE_FLAG)
556                 printed += scnprintf(bf + printed, size - printed, "|PRIV");
557
558         if (op & FUTEX_CLOCK_REALTIME)
559                 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
560
561         return printed;
562 }
563
564 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
565
566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
568
569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
570 static DEFINE_STRARRAY(itimers);
571
572 static const char *keyctl_options[] = {
573         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
574         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
575         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
576         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
577         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
578 };
579 static DEFINE_STRARRAY(keyctl_options);
580
581 static const char *whences[] = { "SET", "CUR", "END",
582 #ifdef SEEK_DATA
583 "DATA",
584 #endif
585 #ifdef SEEK_HOLE
586 "HOLE",
587 #endif
588 };
589 static DEFINE_STRARRAY(whences);
590
591 static const char *fcntl_cmds[] = {
592         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
593         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
594         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
595         "F_GETOWNER_UIDS",
596 };
597 static DEFINE_STRARRAY(fcntl_cmds);
598
599 static const char *rlimit_resources[] = {
600         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
601         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
602         "RTTIME",
603 };
604 static DEFINE_STRARRAY(rlimit_resources);
605
606 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
607 static DEFINE_STRARRAY(sighow);
608
609 static const char *clockid[] = {
610         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
611         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
612         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
613 };
614 static DEFINE_STRARRAY(clockid);
615
616 static const char *socket_families[] = {
617         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
618         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
619         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
620         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
621         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
622         "ALG", "NFC", "VSOCK",
623 };
624 static DEFINE_STRARRAY(socket_families);
625
626 #ifndef SOCK_TYPE_MASK
627 #define SOCK_TYPE_MASK 0xf
628 #endif
629
630 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
631                                                       struct syscall_arg *arg)
632 {
633         size_t printed;
634         int type = arg->val,
635             flags = type & ~SOCK_TYPE_MASK;
636
637         type &= SOCK_TYPE_MASK;
638         /*
639          * Can't use a strarray, MIPS may override for ABI reasons.
640          */
641         switch (type) {
642 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
643         P_SK_TYPE(STREAM);
644         P_SK_TYPE(DGRAM);
645         P_SK_TYPE(RAW);
646         P_SK_TYPE(RDM);
647         P_SK_TYPE(SEQPACKET);
648         P_SK_TYPE(DCCP);
649         P_SK_TYPE(PACKET);
650 #undef P_SK_TYPE
651         default:
652                 printed = scnprintf(bf, size, "%#x", type);
653         }
654
655 #define P_SK_FLAG(n) \
656         if (flags & SOCK_##n) { \
657                 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
658                 flags &= ~SOCK_##n; \
659         }
660
661         P_SK_FLAG(CLOEXEC);
662         P_SK_FLAG(NONBLOCK);
663 #undef P_SK_FLAG
664
665         if (flags)
666                 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
667
668         return printed;
669 }
670
671 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
672
673 #ifndef MSG_PROBE
674 #define MSG_PROBE            0x10
675 #endif
676 #ifndef MSG_WAITFORONE
677 #define MSG_WAITFORONE  0x10000
678 #endif
679 #ifndef MSG_SENDPAGE_NOTLAST
680 #define MSG_SENDPAGE_NOTLAST 0x20000
681 #endif
682 #ifndef MSG_FASTOPEN
683 #define MSG_FASTOPEN         0x20000000
684 #endif
685
686 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
687                                                struct syscall_arg *arg)
688 {
689         int printed = 0, flags = arg->val;
690
691         if (flags == 0)
692                 return scnprintf(bf, size, "NONE");
693 #define P_MSG_FLAG(n) \
694         if (flags & MSG_##n) { \
695                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
696                 flags &= ~MSG_##n; \
697         }
698
699         P_MSG_FLAG(OOB);
700         P_MSG_FLAG(PEEK);
701         P_MSG_FLAG(DONTROUTE);
702         P_MSG_FLAG(TRYHARD);
703         P_MSG_FLAG(CTRUNC);
704         P_MSG_FLAG(PROBE);
705         P_MSG_FLAG(TRUNC);
706         P_MSG_FLAG(DONTWAIT);
707         P_MSG_FLAG(EOR);
708         P_MSG_FLAG(WAITALL);
709         P_MSG_FLAG(FIN);
710         P_MSG_FLAG(SYN);
711         P_MSG_FLAG(CONFIRM);
712         P_MSG_FLAG(RST);
713         P_MSG_FLAG(ERRQUEUE);
714         P_MSG_FLAG(NOSIGNAL);
715         P_MSG_FLAG(MORE);
716         P_MSG_FLAG(WAITFORONE);
717         P_MSG_FLAG(SENDPAGE_NOTLAST);
718         P_MSG_FLAG(FASTOPEN);
719         P_MSG_FLAG(CMSG_CLOEXEC);
720 #undef P_MSG_FLAG
721
722         if (flags)
723                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
724
725         return printed;
726 }
727
728 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
729
730 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
731                                                  struct syscall_arg *arg)
732 {
733         size_t printed = 0;
734         int mode = arg->val;
735
736         if (mode == F_OK) /* 0 */
737                 return scnprintf(bf, size, "F");
738 #define P_MODE(n) \
739         if (mode & n##_OK) { \
740                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
741                 mode &= ~n##_OK; \
742         }
743
744         P_MODE(R);
745         P_MODE(W);
746         P_MODE(X);
747 #undef P_MODE
748
749         if (mode)
750                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
751
752         return printed;
753 }
754
755 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
756
757 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
758                                               struct syscall_arg *arg);
759
760 #define SCA_FILENAME syscall_arg__scnprintf_filename
761
762 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
763                                                struct syscall_arg *arg)
764 {
765         int printed = 0, flags = arg->val;
766
767         if (!(flags & O_CREAT))
768                 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
769
770         if (flags == 0)
771                 return scnprintf(bf, size, "RDONLY");
772 #define P_FLAG(n) \
773         if (flags & O_##n) { \
774                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
775                 flags &= ~O_##n; \
776         }
777
778         P_FLAG(APPEND);
779         P_FLAG(ASYNC);
780         P_FLAG(CLOEXEC);
781         P_FLAG(CREAT);
782         P_FLAG(DIRECT);
783         P_FLAG(DIRECTORY);
784         P_FLAG(EXCL);
785         P_FLAG(LARGEFILE);
786         P_FLAG(NOATIME);
787         P_FLAG(NOCTTY);
788 #ifdef O_NONBLOCK
789         P_FLAG(NONBLOCK);
790 #elif O_NDELAY
791         P_FLAG(NDELAY);
792 #endif
793 #ifdef O_PATH
794         P_FLAG(PATH);
795 #endif
796         P_FLAG(RDWR);
797 #ifdef O_DSYNC
798         if ((flags & O_SYNC) == O_SYNC)
799                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
800         else {
801                 P_FLAG(DSYNC);
802         }
803 #else
804         P_FLAG(SYNC);
805 #endif
806         P_FLAG(TRUNC);
807         P_FLAG(WRONLY);
808 #undef P_FLAG
809
810         if (flags)
811                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
812
813         return printed;
814 }
815
816 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
817
818 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
819                                                 struct syscall_arg *arg)
820 {
821         int printed = 0, flags = arg->val;
822
823         if (flags == 0)
824                 return 0;
825
826 #define P_FLAG(n) \
827         if (flags & PERF_FLAG_##n) { \
828                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
829                 flags &= ~PERF_FLAG_##n; \
830         }
831
832         P_FLAG(FD_NO_GROUP);
833         P_FLAG(FD_OUTPUT);
834         P_FLAG(PID_CGROUP);
835         P_FLAG(FD_CLOEXEC);
836 #undef P_FLAG
837
838         if (flags)
839                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840
841         return printed;
842 }
843
844 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
845
846 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
847                                                    struct syscall_arg *arg)
848 {
849         int printed = 0, flags = arg->val;
850
851         if (flags == 0)
852                 return scnprintf(bf, size, "NONE");
853 #define P_FLAG(n) \
854         if (flags & EFD_##n) { \
855                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
856                 flags &= ~EFD_##n; \
857         }
858
859         P_FLAG(SEMAPHORE);
860         P_FLAG(CLOEXEC);
861         P_FLAG(NONBLOCK);
862 #undef P_FLAG
863
864         if (flags)
865                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
866
867         return printed;
868 }
869
870 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
871
872 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
873                                                 struct syscall_arg *arg)
874 {
875         int printed = 0, flags = arg->val;
876
877 #define P_FLAG(n) \
878         if (flags & O_##n) { \
879                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
880                 flags &= ~O_##n; \
881         }
882
883         P_FLAG(CLOEXEC);
884         P_FLAG(NONBLOCK);
885 #undef P_FLAG
886
887         if (flags)
888                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
889
890         return printed;
891 }
892
893 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
894
895 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
896 {
897         int sig = arg->val;
898
899         switch (sig) {
900 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
901         P_SIGNUM(HUP);
902         P_SIGNUM(INT);
903         P_SIGNUM(QUIT);
904         P_SIGNUM(ILL);
905         P_SIGNUM(TRAP);
906         P_SIGNUM(ABRT);
907         P_SIGNUM(BUS);
908         P_SIGNUM(FPE);
909         P_SIGNUM(KILL);
910         P_SIGNUM(USR1);
911         P_SIGNUM(SEGV);
912         P_SIGNUM(USR2);
913         P_SIGNUM(PIPE);
914         P_SIGNUM(ALRM);
915         P_SIGNUM(TERM);
916         P_SIGNUM(CHLD);
917         P_SIGNUM(CONT);
918         P_SIGNUM(STOP);
919         P_SIGNUM(TSTP);
920         P_SIGNUM(TTIN);
921         P_SIGNUM(TTOU);
922         P_SIGNUM(URG);
923         P_SIGNUM(XCPU);
924         P_SIGNUM(XFSZ);
925         P_SIGNUM(VTALRM);
926         P_SIGNUM(PROF);
927         P_SIGNUM(WINCH);
928         P_SIGNUM(IO);
929         P_SIGNUM(PWR);
930         P_SIGNUM(SYS);
931 #ifdef SIGEMT
932         P_SIGNUM(EMT);
933 #endif
934 #ifdef SIGSTKFLT
935         P_SIGNUM(STKFLT);
936 #endif
937 #ifdef SIGSWI
938         P_SIGNUM(SWI);
939 #endif
940         default: break;
941         }
942
943         return scnprintf(bf, size, "%#x", sig);
944 }
945
946 #define SCA_SIGNUM syscall_arg__scnprintf_signum
947
948 #if defined(__i386__) || defined(__x86_64__)
949 /*
950  * FIXME: Make this available to all arches.
951  */
952 #define TCGETS          0x5401
953
954 static const char *tioctls[] = {
955         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
956         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
957         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
958         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
959         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
960         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
961         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
962         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
963         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
964         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
965         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
966         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
967         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
968         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
969         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
970 };
971
972 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
973 #endif /* defined(__i386__) || defined(__x86_64__) */
974
975 #define STRARRAY(arg, name, array) \
976           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
977           .arg_parm      = { [arg] = &strarray__##array, }
978
979 static struct syscall_fmt {
980         const char *name;
981         const char *alias;
982         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
983         void       *arg_parm[6];
984         bool       errmsg;
985         bool       timeout;
986         bool       hexret;
987 } syscall_fmts[] = {
988         { .name     = "access",     .errmsg = true,
989           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
990                              [1] = SCA_ACCMODE,  /* mode */ }, },
991         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
992         { .name     = "brk",        .hexret = true,
993           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
994         { .name     = "chdir",      .errmsg = true,
995           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
996         { .name     = "chmod",      .errmsg = true,
997           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
998         { .name     = "chroot",     .errmsg = true,
999           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1000         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1001         { .name     = "close",      .errmsg = true,
1002           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1003         { .name     = "connect",    .errmsg = true, },
1004         { .name     = "creat",      .errmsg = true,
1005           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1006         { .name     = "dup",        .errmsg = true,
1007           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1008         { .name     = "dup2",       .errmsg = true,
1009           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1010         { .name     = "dup3",       .errmsg = true,
1011           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1012         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1013         { .name     = "eventfd2",   .errmsg = true,
1014           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1015         { .name     = "faccessat",  .errmsg = true,
1016           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1017                              [1] = SCA_FILENAME, /* filename */ }, },
1018         { .name     = "fadvise64",  .errmsg = true,
1019           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1020         { .name     = "fallocate",  .errmsg = true,
1021           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022         { .name     = "fchdir",     .errmsg = true,
1023           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1024         { .name     = "fchmod",     .errmsg = true,
1025           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1026         { .name     = "fchmodat",   .errmsg = true,
1027           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1028                              [1] = SCA_FILENAME, /* filename */ }, },
1029         { .name     = "fchown",     .errmsg = true,
1030           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031         { .name     = "fchownat",   .errmsg = true,
1032           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1033                              [1] = SCA_FILENAME, /* filename */ }, },
1034         { .name     = "fcntl",      .errmsg = true,
1035           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1036                              [1] = SCA_STRARRAY, /* cmd */ },
1037           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1038         { .name     = "fdatasync",  .errmsg = true,
1039           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040         { .name     = "flock",      .errmsg = true,
1041           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1042                              [1] = SCA_FLOCK, /* cmd */ }, },
1043         { .name     = "fsetxattr",  .errmsg = true,
1044           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1046           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1047         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1048           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                              [1] = SCA_FILENAME, /* filename */ }, },
1050         { .name     = "fstatfs",    .errmsg = true,
1051           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1052         { .name     = "fsync",    .errmsg = true,
1053           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1054         { .name     = "ftruncate", .errmsg = true,
1055           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056         { .name     = "futex",      .errmsg = true,
1057           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1058         { .name     = "futimesat", .errmsg = true,
1059           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1060                              [1] = SCA_FILENAME, /* filename */ }, },
1061         { .name     = "getdents",   .errmsg = true,
1062           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063         { .name     = "getdents64", .errmsg = true,
1064           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067         { .name     = "getxattr",    .errmsg = true,
1068           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1069         { .name     = "inotify_add_watch",          .errmsg = true,
1070           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1071         { .name     = "ioctl",      .errmsg = true,
1072           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1073 #if defined(__i386__) || defined(__x86_64__)
1074 /*
1075  * FIXME: Make this available to all arches.
1076  */
1077                              [1] = SCA_STRHEXARRAY, /* cmd */
1078                              [2] = SCA_HEX, /* arg */ },
1079           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1080 #else
1081                              [2] = SCA_HEX, /* arg */ }, },
1082 #endif
1083         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
1084         { .name     = "kill",       .errmsg = true,
1085           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1086         { .name     = "lchown",    .errmsg = true,
1087           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1088         { .name     = "lgetxattr",  .errmsg = true,
1089           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1090         { .name     = "linkat",     .errmsg = true,
1091           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1092         { .name     = "listxattr",  .errmsg = true,
1093           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1094         { .name     = "llistxattr", .errmsg = true,
1095           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1096         { .name     = "lremovexattr",  .errmsg = true,
1097           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098         { .name     = "lseek",      .errmsg = true,
1099           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1100                              [2] = SCA_STRARRAY, /* whence */ },
1101           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1102         { .name     = "lsetxattr",  .errmsg = true,
1103           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1104         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1105           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1106         { .name     = "lsxattr",    .errmsg = true,
1107           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1108         { .name     = "madvise",    .errmsg = true,
1109           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1110                              [2] = SCA_MADV_BHV, /* behavior */ }, },
1111         { .name     = "mkdir",    .errmsg = true,
1112           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1113         { .name     = "mkdirat",    .errmsg = true,
1114           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1115                              [1] = SCA_FILENAME, /* pathname */ }, },
1116         { .name     = "mknod",      .errmsg = true,
1117           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1118         { .name     = "mknodat",    .errmsg = true,
1119           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1120                              [1] = SCA_FILENAME, /* filename */ }, },
1121         { .name     = "mlock",      .errmsg = true,
1122           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1123         { .name     = "mlockall",   .errmsg = true,
1124           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1125         { .name     = "mmap",       .hexret = true,
1126           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1127                              [2] = SCA_MMAP_PROT, /* prot */
1128                              [3] = SCA_MMAP_FLAGS, /* flags */
1129                              [4] = SCA_FD,        /* fd */ }, },
1130         { .name     = "mprotect",   .errmsg = true,
1131           .arg_scnprintf = { [0] = SCA_HEX, /* start */
1132                              [2] = SCA_MMAP_PROT, /* prot */ }, },
1133         { .name     = "mq_unlink", .errmsg = true,
1134           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1135         { .name     = "mremap",     .hexret = true,
1136           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1137                              [3] = SCA_MREMAP_FLAGS, /* flags */
1138                              [4] = SCA_HEX, /* new_addr */ }, },
1139         { .name     = "munlock",    .errmsg = true,
1140           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1141         { .name     = "munmap",     .errmsg = true,
1142           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1143         { .name     = "name_to_handle_at", .errmsg = true,
1144           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1145         { .name     = "newfstatat", .errmsg = true,
1146           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1147                              [1] = SCA_FILENAME, /* filename */ }, },
1148         { .name     = "open",       .errmsg = true,
1149           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1150                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1151         { .name     = "open_by_handle_at", .errmsg = true,
1152           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1153                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1154         { .name     = "openat",     .errmsg = true,
1155           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1156                              [1] = SCA_FILENAME, /* filename */
1157                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1158         { .name     = "perf_event_open", .errmsg = true,
1159           .arg_scnprintf = { [1] = SCA_INT, /* pid */
1160                              [2] = SCA_INT, /* cpu */
1161                              [3] = SCA_FD,  /* group_fd */
1162                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1163         { .name     = "pipe2",      .errmsg = true,
1164           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1165         { .name     = "poll",       .errmsg = true, .timeout = true, },
1166         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1167         { .name     = "pread",      .errmsg = true, .alias = "pread64",
1168           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1169         { .name     = "preadv",     .errmsg = true, .alias = "pread",
1170           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1171         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1172         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1173           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1174         { .name     = "pwritev",    .errmsg = true,
1175           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1176         { .name     = "read",       .errmsg = true,
1177           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1178         { .name     = "readlink",   .errmsg = true,
1179           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1180         { .name     = "readlinkat", .errmsg = true,
1181           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1182                              [1] = SCA_FILENAME, /* pathname */ }, },
1183         { .name     = "readv",      .errmsg = true,
1184           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1185         { .name     = "recvfrom",   .errmsg = true,
1186           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1187                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1188         { .name     = "recvmmsg",   .errmsg = true,
1189           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1190                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1191         { .name     = "recvmsg",    .errmsg = true,
1192           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1193                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1194         { .name     = "removexattr", .errmsg = true,
1195           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1196         { .name     = "renameat",   .errmsg = true,
1197           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1198         { .name     = "rmdir",    .errmsg = true,
1199           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1200         { .name     = "rt_sigaction", .errmsg = true,
1201           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1202         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1203         { .name     = "rt_sigqueueinfo", .errmsg = true,
1204           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1205         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1206           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1207         { .name     = "select",     .errmsg = true, .timeout = true, },
1208         { .name     = "sendmmsg",    .errmsg = true,
1209           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1210                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1211         { .name     = "sendmsg",    .errmsg = true,
1212           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1213                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
1214         { .name     = "sendto",     .errmsg = true,
1215           .arg_scnprintf = { [0] = SCA_FD, /* fd */
1216                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
1217         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1218         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1219         { .name     = "setxattr",   .errmsg = true,
1220           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1221         { .name     = "shutdown",   .errmsg = true,
1222           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1223         { .name     = "socket",     .errmsg = true,
1224           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1225                              [1] = SCA_SK_TYPE, /* type */ },
1226           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1227         { .name     = "socketpair", .errmsg = true,
1228           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1229                              [1] = SCA_SK_TYPE, /* type */ },
1230           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1231         { .name     = "stat",       .errmsg = true, .alias = "newstat",
1232           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1233         { .name     = "statfs",     .errmsg = true,
1234           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1235         { .name     = "swapoff",    .errmsg = true,
1236           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1237         { .name     = "swapon",     .errmsg = true,
1238           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1239         { .name     = "symlinkat",  .errmsg = true,
1240           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1241         { .name     = "tgkill",     .errmsg = true,
1242           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1243         { .name     = "tkill",      .errmsg = true,
1244           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1245         { .name     = "truncate",   .errmsg = true,
1246           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1247         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1248         { .name     = "unlinkat",   .errmsg = true,
1249           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1250                              [1] = SCA_FILENAME, /* pathname */ }, },
1251         { .name     = "utime",  .errmsg = true,
1252           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1253         { .name     = "utimensat",  .errmsg = true,
1254           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1255                              [1] = SCA_FILENAME, /* filename */ }, },
1256         { .name     = "utimes",  .errmsg = true,
1257           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1258         { .name     = "vmsplice",  .errmsg = true,
1259           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1260         { .name     = "write",      .errmsg = true,
1261           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1262         { .name     = "writev",     .errmsg = true,
1263           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1264 };
1265
1266 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1267 {
1268         const struct syscall_fmt *fmt = fmtp;
1269         return strcmp(name, fmt->name);
1270 }
1271
1272 static struct syscall_fmt *syscall_fmt__find(const char *name)
1273 {
1274         const int nmemb = ARRAY_SIZE(syscall_fmts);
1275         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1276 }
1277
1278 struct syscall {
1279         struct event_format *tp_format;
1280         int                 nr_args;
1281         struct format_field *args;
1282         const char          *name;
1283         bool                is_exit;
1284         struct syscall_fmt  *fmt;
1285         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1286         void                **arg_parm;
1287 };
1288
1289 static size_t fprintf_duration(unsigned long t, FILE *fp)
1290 {
1291         double duration = (double)t / NSEC_PER_MSEC;
1292         size_t printed = fprintf(fp, "(");
1293
1294         if (duration >= 1.0)
1295                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1296         else if (duration >= 0.01)
1297                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1298         else
1299                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1300         return printed + fprintf(fp, "): ");
1301 }
1302
1303 /**
1304  * filename.ptr: The filename char pointer that will be vfs_getname'd
1305  * filename.entry_str_pos: Where to insert the string translated from
1306  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1307  */
1308 struct thread_trace {
1309         u64               entry_time;
1310         u64               exit_time;
1311         bool              entry_pending;
1312         unsigned long     nr_events;
1313         unsigned long     pfmaj, pfmin;
1314         char              *entry_str;
1315         double            runtime_ms;
1316         struct {
1317                 unsigned long ptr;
1318                 int           entry_str_pos;
1319         } filename;
1320         struct {
1321                 int       max;
1322                 char      **table;
1323         } paths;
1324
1325         struct intlist *syscall_stats;
1326 };
1327
1328 static struct thread_trace *thread_trace__new(void)
1329 {
1330         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1331
1332         if (ttrace)
1333                 ttrace->paths.max = -1;
1334
1335         ttrace->syscall_stats = intlist__new(NULL);
1336
1337         return ttrace;
1338 }
1339
1340 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1341 {
1342         struct thread_trace *ttrace;
1343
1344         if (thread == NULL)
1345                 goto fail;
1346
1347         if (thread__priv(thread) == NULL)
1348                 thread__set_priv(thread, thread_trace__new());
1349
1350         if (thread__priv(thread) == NULL)
1351                 goto fail;
1352
1353         ttrace = thread__priv(thread);
1354         ++ttrace->nr_events;
1355
1356         return ttrace;
1357 fail:
1358         color_fprintf(fp, PERF_COLOR_RED,
1359                       "WARNING: not enough memory, dropping samples!\n");
1360         return NULL;
1361 }
1362
1363 #define TRACE_PFMAJ             (1 << 0)
1364 #define TRACE_PFMIN             (1 << 1)
1365
1366 static const size_t trace__entry_str_size = 2048;
1367
1368 struct trace {
1369         struct perf_tool        tool;
1370         struct {
1371                 int             machine;
1372                 int             open_id;
1373         }                       audit;
1374         struct {
1375                 int             max;
1376                 struct syscall  *table;
1377                 struct {
1378                         struct perf_evsel *sys_enter,
1379                                           *sys_exit;
1380                 }               events;
1381         } syscalls;
1382         struct record_opts      opts;
1383         struct perf_evlist      *evlist;
1384         struct machine          *host;
1385         struct thread           *current;
1386         u64                     base_time;
1387         FILE                    *output;
1388         unsigned long           nr_events;
1389         struct strlist          *ev_qualifier;
1390         struct {
1391                 size_t          nr;
1392                 int             *entries;
1393         }                       ev_qualifier_ids;
1394         const char              *last_vfs_getname;
1395         struct intlist          *tid_list;
1396         struct intlist          *pid_list;
1397         struct {
1398                 size_t          nr;
1399                 pid_t           *entries;
1400         }                       filter_pids;
1401         double                  duration_filter;
1402         double                  runtime_ms;
1403         struct {
1404                 u64             vfs_getname,
1405                                 proc_getname;
1406         } stats;
1407         bool                    not_ev_qualifier;
1408         bool                    live;
1409         bool                    full_time;
1410         bool                    sched;
1411         bool                    multiple_threads;
1412         bool                    summary;
1413         bool                    summary_only;
1414         bool                    show_comm;
1415         bool                    show_tool_stats;
1416         bool                    trace_syscalls;
1417         bool                    force;
1418         bool                    vfs_getname;
1419         int                     trace_pgfaults;
1420 };
1421
1422 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1423 {
1424         struct thread_trace *ttrace = thread__priv(thread);
1425
1426         if (fd > ttrace->paths.max) {
1427                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1428
1429                 if (npath == NULL)
1430                         return -1;
1431
1432                 if (ttrace->paths.max != -1) {
1433                         memset(npath + ttrace->paths.max + 1, 0,
1434                                (fd - ttrace->paths.max) * sizeof(char *));
1435                 } else {
1436                         memset(npath, 0, (fd + 1) * sizeof(char *));
1437                 }
1438
1439                 ttrace->paths.table = npath;
1440                 ttrace->paths.max   = fd;
1441         }
1442
1443         ttrace->paths.table[fd] = strdup(pathname);
1444
1445         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1446 }
1447
1448 static int thread__read_fd_path(struct thread *thread, int fd)
1449 {
1450         char linkname[PATH_MAX], pathname[PATH_MAX];
1451         struct stat st;
1452         int ret;
1453
1454         if (thread->pid_ == thread->tid) {
1455                 scnprintf(linkname, sizeof(linkname),
1456                           "/proc/%d/fd/%d", thread->pid_, fd);
1457         } else {
1458                 scnprintf(linkname, sizeof(linkname),
1459                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1460         }
1461
1462         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1463                 return -1;
1464
1465         ret = readlink(linkname, pathname, sizeof(pathname));
1466
1467         if (ret < 0 || ret > st.st_size)
1468                 return -1;
1469
1470         pathname[ret] = '\0';
1471         return trace__set_fd_pathname(thread, fd, pathname);
1472 }
1473
1474 static const char *thread__fd_path(struct thread *thread, int fd,
1475                                    struct trace *trace)
1476 {
1477         struct thread_trace *ttrace = thread__priv(thread);
1478
1479         if (ttrace == NULL)
1480                 return NULL;
1481
1482         if (fd < 0)
1483                 return NULL;
1484
1485         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1486                 if (!trace->live)
1487                         return NULL;
1488                 ++trace->stats.proc_getname;
1489                 if (thread__read_fd_path(thread, fd))
1490                         return NULL;
1491         }
1492
1493         return ttrace->paths.table[fd];
1494 }
1495
1496 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1497                                         struct syscall_arg *arg)
1498 {
1499         int fd = arg->val;
1500         size_t printed = scnprintf(bf, size, "%d", fd);
1501         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1502
1503         if (path)
1504                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1505
1506         return printed;
1507 }
1508
1509 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1510                                               struct syscall_arg *arg)
1511 {
1512         int fd = arg->val;
1513         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1514         struct thread_trace *ttrace = thread__priv(arg->thread);
1515
1516         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1517                 zfree(&ttrace->paths.table[fd]);
1518
1519         return printed;
1520 }
1521
1522 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1523                                      unsigned long ptr)
1524 {
1525         struct thread_trace *ttrace = thread__priv(thread);
1526
1527         ttrace->filename.ptr = ptr;
1528         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1529 }
1530
1531 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1532                                               struct syscall_arg *arg)
1533 {
1534         unsigned long ptr = arg->val;
1535
1536         if (!arg->trace->vfs_getname)
1537                 return scnprintf(bf, size, "%#x", ptr);
1538
1539         thread__set_filename_pos(arg->thread, bf, ptr);
1540         return 0;
1541 }
1542
1543 static bool trace__filter_duration(struct trace *trace, double t)
1544 {
1545         return t < (trace->duration_filter * NSEC_PER_MSEC);
1546 }
1547
1548 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1549 {
1550         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1551
1552         return fprintf(fp, "%10.3f ", ts);
1553 }
1554
1555 static bool done = false;
1556 static bool interrupted = false;
1557
1558 static void sig_handler(int sig)
1559 {
1560         done = true;
1561         interrupted = sig == SIGINT;
1562 }
1563
1564 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1565                                         u64 duration, u64 tstamp, FILE *fp)
1566 {
1567         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1568         printed += fprintf_duration(duration, fp);
1569
1570         if (trace->multiple_threads) {
1571                 if (trace->show_comm)
1572                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1573                 printed += fprintf(fp, "%d ", thread->tid);
1574         }
1575
1576         return printed;
1577 }
1578
1579 static int trace__process_event(struct trace *trace, struct machine *machine,
1580                                 union perf_event *event, struct perf_sample *sample)
1581 {
1582         int ret = 0;
1583
1584         switch (event->header.type) {
1585         case PERF_RECORD_LOST:
1586                 color_fprintf(trace->output, PERF_COLOR_RED,
1587                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1588                 ret = machine__process_lost_event(machine, event, sample);
1589         default:
1590                 ret = machine__process_event(machine, event, sample);
1591                 break;
1592         }
1593
1594         return ret;
1595 }
1596
1597 static int trace__tool_process(struct perf_tool *tool,
1598                                union perf_event *event,
1599                                struct perf_sample *sample,
1600                                struct machine *machine)
1601 {
1602         struct trace *trace = container_of(tool, struct trace, tool);
1603         return trace__process_event(trace, machine, event, sample);
1604 }
1605
1606 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1607 {
1608         int err = symbol__init(NULL);
1609
1610         if (err)
1611                 return err;
1612
1613         trace->host = machine__new_host();
1614         if (trace->host == NULL)
1615                 return -ENOMEM;
1616
1617         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1618                 return -errno;
1619
1620         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1621                                             evlist->threads, trace__tool_process, false,
1622                                             trace->opts.proc_map_timeout);
1623         if (err)
1624                 symbol__exit();
1625
1626         return err;
1627 }
1628
1629 static int syscall__set_arg_fmts(struct syscall *sc)
1630 {
1631         struct format_field *field;
1632         int idx = 0;
1633
1634         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1635         if (sc->arg_scnprintf == NULL)
1636                 return -1;
1637
1638         if (sc->fmt)
1639                 sc->arg_parm = sc->fmt->arg_parm;
1640
1641         for (field = sc->args; field; field = field->next) {
1642                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1643                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1644                 else if (field->flags & FIELD_IS_POINTER)
1645                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1646                 ++idx;
1647         }
1648
1649         return 0;
1650 }
1651
1652 static int trace__read_syscall_info(struct trace *trace, int id)
1653 {
1654         char tp_name[128];
1655         struct syscall *sc;
1656         const char *name = audit_syscall_to_name(id, trace->audit.machine);
1657
1658         if (name == NULL)
1659                 return -1;
1660
1661         if (id > trace->syscalls.max) {
1662                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1663
1664                 if (nsyscalls == NULL)
1665                         return -1;
1666
1667                 if (trace->syscalls.max != -1) {
1668                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1669                                (id - trace->syscalls.max) * sizeof(*sc));
1670                 } else {
1671                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1672                 }
1673
1674                 trace->syscalls.table = nsyscalls;
1675                 trace->syscalls.max   = id;
1676         }
1677
1678         sc = trace->syscalls.table + id;
1679         sc->name = name;
1680
1681         sc->fmt  = syscall_fmt__find(sc->name);
1682
1683         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1684         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1685
1686         if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1687                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1688                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1689         }
1690
1691         if (sc->tp_format == NULL)
1692                 return -1;
1693
1694         sc->args = sc->tp_format->format.fields;
1695         sc->nr_args = sc->tp_format->format.nr_fields;
1696         /* drop nr field - not relevant here; does not exist on older kernels */
1697         if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1698                 sc->args = sc->args->next;
1699                 --sc->nr_args;
1700         }
1701
1702         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1703
1704         return syscall__set_arg_fmts(sc);
1705 }
1706
1707 static int trace__validate_ev_qualifier(struct trace *trace)
1708 {
1709         int err = 0, i;
1710         struct str_node *pos;
1711
1712         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1713         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1714                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1715
1716         if (trace->ev_qualifier_ids.entries == NULL) {
1717                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1718                        trace->output);
1719                 err = -EINVAL;
1720                 goto out;
1721         }
1722
1723         i = 0;
1724
1725         strlist__for_each(pos, trace->ev_qualifier) {
1726                 const char *sc = pos->s;
1727                 int id = audit_name_to_syscall(sc, trace->audit.machine);
1728
1729                 if (id < 0) {
1730                         if (err == 0) {
1731                                 fputs("Error:\tInvalid syscall ", trace->output);
1732                                 err = -EINVAL;
1733                         } else {
1734                                 fputs(", ", trace->output);
1735                         }
1736
1737                         fputs(sc, trace->output);
1738                 }
1739
1740                 trace->ev_qualifier_ids.entries[i++] = id;
1741         }
1742
1743         if (err < 0) {
1744                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1745                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1746                 zfree(&trace->ev_qualifier_ids.entries);
1747                 trace->ev_qualifier_ids.nr = 0;
1748         }
1749 out:
1750         return err;
1751 }
1752
1753 /*
1754  * args is to be interpreted as a series of longs but we need to handle
1755  * 8-byte unaligned accesses. args points to raw_data within the event
1756  * and raw_data is guaranteed to be 8-byte unaligned because it is
1757  * preceded by raw_size which is a u32. So we need to copy args to a temp
1758  * variable to read it. Most notably this avoids extended load instructions
1759  * on unaligned addresses
1760  */
1761
1762 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1763                                       unsigned char *args, struct trace *trace,
1764                                       struct thread *thread)
1765 {
1766         size_t printed = 0;
1767         unsigned char *p;
1768         unsigned long val;
1769
1770         if (sc->args != NULL) {
1771                 struct format_field *field;
1772                 u8 bit = 1;
1773                 struct syscall_arg arg = {
1774                         .idx    = 0,
1775                         .mask   = 0,
1776                         .trace  = trace,
1777                         .thread = thread,
1778                 };
1779
1780                 for (field = sc->args; field;
1781                      field = field->next, ++arg.idx, bit <<= 1) {
1782                         if (arg.mask & bit)
1783                                 continue;
1784
1785                         /* special care for unaligned accesses */
1786                         p = args + sizeof(unsigned long) * arg.idx;
1787                         memcpy(&val, p, sizeof(val));
1788
1789                         /*
1790                          * Suppress this argument if its value is zero and
1791                          * and we don't have a string associated in an
1792                          * strarray for it.
1793                          */
1794                         if (val == 0 &&
1795                             !(sc->arg_scnprintf &&
1796                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1797                               sc->arg_parm[arg.idx]))
1798                                 continue;
1799
1800                         printed += scnprintf(bf + printed, size - printed,
1801                                              "%s%s: ", printed ? ", " : "", field->name);
1802                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1803                                 arg.val = val;
1804                                 if (sc->arg_parm)
1805                                         arg.parm = sc->arg_parm[arg.idx];
1806                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1807                                                                       size - printed, &arg);
1808                         } else {
1809                                 printed += scnprintf(bf + printed, size - printed,
1810                                                      "%ld", val);
1811                         }
1812                 }
1813         } else {
1814                 int i = 0;
1815
1816                 while (i < 6) {
1817                         /* special care for unaligned accesses */
1818                         p = args + sizeof(unsigned long) * i;
1819                         memcpy(&val, p, sizeof(val));
1820                         printed += scnprintf(bf + printed, size - printed,
1821                                              "%sarg%d: %ld",
1822                                              printed ? ", " : "", i, val);
1823                         ++i;
1824                 }
1825         }
1826
1827         return printed;
1828 }
1829
1830 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1831                                   union perf_event *event,
1832                                   struct perf_sample *sample);
1833
1834 static struct syscall *trace__syscall_info(struct trace *trace,
1835                                            struct perf_evsel *evsel, int id)
1836 {
1837
1838         if (id < 0) {
1839
1840                 /*
1841                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1842                  * before that, leaving at a higher verbosity level till that is
1843                  * explained. Reproduced with plain ftrace with:
1844                  *
1845                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1846                  * grep "NR -1 " /t/trace_pipe
1847                  *
1848                  * After generating some load on the machine.
1849                  */
1850                 if (verbose > 1) {
1851                         static u64 n;
1852                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1853                                 id, perf_evsel__name(evsel), ++n);
1854                 }
1855                 return NULL;
1856         }
1857
1858         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1859             trace__read_syscall_info(trace, id))
1860                 goto out_cant_read;
1861
1862         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1863                 goto out_cant_read;
1864
1865         return &trace->syscalls.table[id];
1866
1867 out_cant_read:
1868         if (verbose) {
1869                 fprintf(trace->output, "Problems reading syscall %d", id);
1870                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1871                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1872                 fputs(" information\n", trace->output);
1873         }
1874         return NULL;
1875 }
1876
1877 static void thread__update_stats(struct thread_trace *ttrace,
1878                                  int id, struct perf_sample *sample)
1879 {
1880         struct int_node *inode;
1881         struct stats *stats;
1882         u64 duration = 0;
1883
1884         inode = intlist__findnew(ttrace->syscall_stats, id);
1885         if (inode == NULL)
1886                 return;
1887
1888         stats = inode->priv;
1889         if (stats == NULL) {
1890                 stats = malloc(sizeof(struct stats));
1891                 if (stats == NULL)
1892                         return;
1893                 init_stats(stats);
1894                 inode->priv = stats;
1895         }
1896
1897         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1898                 duration = sample->time - ttrace->entry_time;
1899
1900         update_stats(stats, duration);
1901 }
1902
1903 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1904 {
1905         struct thread_trace *ttrace;
1906         u64 duration;
1907         size_t printed;
1908
1909         if (trace->current == NULL)
1910                 return 0;
1911
1912         ttrace = thread__priv(trace->current);
1913
1914         if (!ttrace->entry_pending)
1915                 return 0;
1916
1917         duration = sample->time - ttrace->entry_time;
1918
1919         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1920         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1921         ttrace->entry_pending = false;
1922
1923         return printed;
1924 }
1925
1926 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1927                             union perf_event *event __maybe_unused,
1928                             struct perf_sample *sample)
1929 {
1930         char *msg;
1931         void *args;
1932         size_t printed = 0;
1933         struct thread *thread;
1934         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1935         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1936         struct thread_trace *ttrace;
1937
1938         if (sc == NULL)
1939                 return -1;
1940
1941         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1942         ttrace = thread__trace(thread, trace->output);
1943         if (ttrace == NULL)
1944                 goto out_put;
1945
1946         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1947
1948         if (ttrace->entry_str == NULL) {
1949                 ttrace->entry_str = malloc(trace__entry_str_size);
1950                 if (!ttrace->entry_str)
1951                         goto out_put;
1952         }
1953
1954         if (!trace->summary_only)
1955                 trace__printf_interrupted_entry(trace, sample);
1956
1957         ttrace->entry_time = sample->time;
1958         msg = ttrace->entry_str;
1959         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1960
1961         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1962                                            args, trace, thread);
1963
1964         if (sc->is_exit) {
1965                 if (!trace->duration_filter && !trace->summary_only) {
1966                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1967                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1968                 }
1969         } else
1970                 ttrace->entry_pending = true;
1971
1972         if (trace->current != thread) {
1973                 thread__put(trace->current);
1974                 trace->current = thread__get(thread);
1975         }
1976         err = 0;
1977 out_put:
1978         thread__put(thread);
1979         return err;
1980 }
1981
1982 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1983                            union perf_event *event __maybe_unused,
1984                            struct perf_sample *sample)
1985 {
1986         long ret;
1987         u64 duration = 0;
1988         struct thread *thread;
1989         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1990         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1991         struct thread_trace *ttrace;
1992
1993         if (sc == NULL)
1994                 return -1;
1995
1996         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1997         ttrace = thread__trace(thread, trace->output);
1998         if (ttrace == NULL)
1999                 goto out_put;
2000
2001         if (trace->summary)
2002                 thread__update_stats(ttrace, id, sample);
2003
2004         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2005
2006         if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
2007                 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
2008                 trace->last_vfs_getname = NULL;
2009                 ++trace->stats.vfs_getname;
2010         }
2011
2012         ttrace->exit_time = sample->time;
2013
2014         if (ttrace->entry_time) {
2015                 duration = sample->time - ttrace->entry_time;
2016                 if (trace__filter_duration(trace, duration))
2017                         goto out;
2018         } else if (trace->duration_filter)
2019                 goto out;
2020
2021         if (trace->summary_only)
2022                 goto out;
2023
2024         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2025
2026         if (ttrace->entry_pending) {
2027                 fprintf(trace->output, "%-70s", ttrace->entry_str);
2028         } else {
2029                 fprintf(trace->output, " ... [");
2030                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2031                 fprintf(trace->output, "]: %s()", sc->name);
2032         }
2033
2034         if (sc->fmt == NULL) {
2035 signed_print:
2036                 fprintf(trace->output, ") = %ld", ret);
2037         } else if (ret < 0 && sc->fmt->errmsg) {
2038                 char bf[STRERR_BUFSIZE];
2039                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2040                            *e = audit_errno_to_name(-ret);
2041
2042                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2043         } else if (ret == 0 && sc->fmt->timeout)
2044                 fprintf(trace->output, ") = 0 Timeout");
2045         else if (sc->fmt->hexret)
2046                 fprintf(trace->output, ") = %#lx", ret);
2047         else
2048                 goto signed_print;
2049
2050         fputc('\n', trace->output);
2051 out:
2052         ttrace->entry_pending = false;
2053         err = 0;
2054 out_put:
2055         thread__put(thread);
2056         return err;
2057 }
2058
2059 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2060                               union perf_event *event __maybe_unused,
2061                               struct perf_sample *sample)
2062 {
2063         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2064         struct thread_trace *ttrace;
2065         size_t filename_len, entry_str_len, to_move;
2066         ssize_t remaining_space;
2067         char *pos;
2068         const char *filename;
2069
2070         trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
2071
2072         if (!thread)
2073                 goto out;
2074
2075         ttrace = thread__priv(thread);
2076         if (!ttrace)
2077                 goto out;
2078
2079         if (!ttrace->filename.ptr)
2080                 goto out;
2081
2082         entry_str_len = strlen(ttrace->entry_str);
2083         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2084         if (remaining_space <= 0)
2085                 goto out;
2086
2087         filename = trace->last_vfs_getname;
2088         filename_len = strlen(filename);
2089         if (filename_len > (size_t)remaining_space) {
2090                 filename += filename_len - remaining_space;
2091                 filename_len = remaining_space;
2092         }
2093
2094         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2095         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2096         memmove(pos + filename_len, pos, to_move);
2097         memcpy(pos, filename, filename_len);
2098
2099         ttrace->filename.ptr = 0;
2100         ttrace->filename.entry_str_pos = 0;
2101 out:
2102         return 0;
2103 }
2104
2105 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2106                                      union perf_event *event __maybe_unused,
2107                                      struct perf_sample *sample)
2108 {
2109         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2110         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2111         struct thread *thread = machine__findnew_thread(trace->host,
2112                                                         sample->pid,
2113                                                         sample->tid);
2114         struct thread_trace *ttrace = thread__trace(thread, trace->output);
2115
2116         if (ttrace == NULL)
2117                 goto out_dump;
2118
2119         ttrace->runtime_ms += runtime_ms;
2120         trace->runtime_ms += runtime_ms;
2121         thread__put(thread);
2122         return 0;
2123
2124 out_dump:
2125         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2126                evsel->name,
2127                perf_evsel__strval(evsel, sample, "comm"),
2128                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2129                runtime,
2130                perf_evsel__intval(evsel, sample, "vruntime"));
2131         thread__put(thread);
2132         return 0;
2133 }
2134
2135 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2136                                 union perf_event *event __maybe_unused,
2137                                 struct perf_sample *sample)
2138 {
2139         trace__printf_interrupted_entry(trace, sample);
2140         trace__fprintf_tstamp(trace, sample->time, trace->output);
2141
2142         if (trace->trace_syscalls)
2143                 fprintf(trace->output, "(         ): ");
2144
2145         fprintf(trace->output, "%s:", evsel->name);
2146
2147         if (evsel->tp_format) {
2148                 event_format__fprintf(evsel->tp_format, sample->cpu,
2149                                       sample->raw_data, sample->raw_size,
2150                                       trace->output);
2151         }
2152
2153         fprintf(trace->output, ")\n");
2154         return 0;
2155 }
2156
2157 static void print_location(FILE *f, struct perf_sample *sample,
2158                            struct addr_location *al,
2159                            bool print_dso, bool print_sym)
2160 {
2161
2162         if ((verbose || print_dso) && al->map)
2163                 fprintf(f, "%s@", al->map->dso->long_name);
2164
2165         if ((verbose || print_sym) && al->sym)
2166                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2167                         al->addr - al->sym->start);
2168         else if (al->map)
2169                 fprintf(f, "0x%" PRIx64, al->addr);
2170         else
2171                 fprintf(f, "0x%" PRIx64, sample->addr);
2172 }
2173
2174 static int trace__pgfault(struct trace *trace,
2175                           struct perf_evsel *evsel,
2176                           union perf_event *event,
2177                           struct perf_sample *sample)
2178 {
2179         struct thread *thread;
2180         u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2181         struct addr_location al;
2182         char map_type = 'd';
2183         struct thread_trace *ttrace;
2184         int err = -1;
2185
2186         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2187         ttrace = thread__trace(thread, trace->output);
2188         if (ttrace == NULL)
2189                 goto out_put;
2190
2191         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2192                 ttrace->pfmaj++;
2193         else
2194                 ttrace->pfmin++;
2195
2196         if (trace->summary_only)
2197                 goto out;
2198
2199         thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2200                               sample->ip, &al);
2201
2202         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2203
2204         fprintf(trace->output, "%sfault [",
2205                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2206                 "maj" : "min");
2207
2208         print_location(trace->output, sample, &al, false, true);
2209
2210         fprintf(trace->output, "] => ");
2211
2212         thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2213                                    sample->addr, &al);
2214
2215         if (!al.map) {
2216                 thread__find_addr_location(thread, cpumode,
2217                                            MAP__FUNCTION, sample->addr, &al);
2218
2219                 if (al.map)
2220                         map_type = 'x';
2221                 else
2222                         map_type = '?';
2223         }
2224
2225         print_location(trace->output, sample, &al, true, false);
2226
2227         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2228 out:
2229         err = 0;
2230 out_put:
2231         thread__put(thread);
2232         return err;
2233 }
2234
2235 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2236 {
2237         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2238             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2239                 return false;
2240
2241         if (trace->pid_list || trace->tid_list)
2242                 return true;
2243
2244         return false;
2245 }
2246
2247 static int trace__process_sample(struct perf_tool *tool,
2248                                  union perf_event *event,
2249                                  struct perf_sample *sample,
2250                                  struct perf_evsel *evsel,
2251                                  struct machine *machine __maybe_unused)
2252 {
2253         struct trace *trace = container_of(tool, struct trace, tool);
2254         int err = 0;
2255
2256         tracepoint_handler handler = evsel->handler;
2257
2258         if (skip_sample(trace, sample))
2259                 return 0;
2260
2261         if (!trace->full_time && trace->base_time == 0)
2262                 trace->base_time = sample->time;
2263
2264         if (handler) {
2265                 ++trace->nr_events;
2266                 handler(trace, evsel, event, sample);
2267         }
2268
2269         return err;
2270 }
2271
2272 static int parse_target_str(struct trace *trace)
2273 {
2274         if (trace->opts.target.pid) {
2275                 trace->pid_list = intlist__new(trace->opts.target.pid);
2276                 if (trace->pid_list == NULL) {
2277                         pr_err("Error parsing process id string\n");
2278                         return -EINVAL;
2279                 }
2280         }
2281
2282         if (trace->opts.target.tid) {
2283                 trace->tid_list = intlist__new(trace->opts.target.tid);
2284                 if (trace->tid_list == NULL) {
2285                         pr_err("Error parsing thread id string\n");
2286                         return -EINVAL;
2287                 }
2288         }
2289
2290         return 0;
2291 }
2292
2293 static int trace__record(struct trace *trace, int argc, const char **argv)
2294 {
2295         unsigned int rec_argc, i, j;
2296         const char **rec_argv;
2297         const char * const record_args[] = {
2298                 "record",
2299                 "-R",
2300                 "-m", "1024",
2301                 "-c", "1",
2302         };
2303
2304         const char * const sc_args[] = { "-e", };
2305         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2306         const char * const majpf_args[] = { "-e", "major-faults" };
2307         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2308         const char * const minpf_args[] = { "-e", "minor-faults" };
2309         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2310
2311         /* +1 is for the event string below */
2312         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2313                 majpf_args_nr + minpf_args_nr + argc;
2314         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2315
2316         if (rec_argv == NULL)
2317                 return -ENOMEM;
2318
2319         j = 0;
2320         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2321                 rec_argv[j++] = record_args[i];
2322
2323         if (trace->trace_syscalls) {
2324                 for (i = 0; i < sc_args_nr; i++)
2325                         rec_argv[j++] = sc_args[i];
2326
2327                 /* event string may be different for older kernels - e.g., RHEL6 */
2328                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2329                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2330                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2331                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2332                 else {
2333                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2334                         return -1;
2335                 }
2336         }
2337
2338         if (trace->trace_pgfaults & TRACE_PFMAJ)
2339                 for (i = 0; i < majpf_args_nr; i++)
2340                         rec_argv[j++] = majpf_args[i];
2341
2342         if (trace->trace_pgfaults & TRACE_PFMIN)
2343                 for (i = 0; i < minpf_args_nr; i++)
2344                         rec_argv[j++] = minpf_args[i];
2345
2346         for (i = 0; i < (unsigned int)argc; i++)
2347                 rec_argv[j++] = argv[i];
2348
2349         return cmd_record(j, rec_argv, NULL);
2350 }
2351
2352 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2353
2354 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2355 {
2356         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2357         if (evsel == NULL)
2358                 return false;
2359
2360         if (perf_evsel__field(evsel, "pathname") == NULL) {
2361                 perf_evsel__delete(evsel);
2362                 return false;
2363         }
2364
2365         evsel->handler = trace__vfs_getname;
2366         perf_evlist__add(evlist, evsel);
2367         return true;
2368 }
2369
2370 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2371                                     u64 config)
2372 {
2373         struct perf_evsel *evsel;
2374         struct perf_event_attr attr = {
2375                 .type = PERF_TYPE_SOFTWARE,
2376                 .mmap_data = 1,
2377         };
2378
2379         attr.config = config;
2380         attr.sample_period = 1;
2381
2382         event_attr_init(&attr);
2383
2384         evsel = perf_evsel__new(&attr);
2385         if (!evsel)
2386                 return -ENOMEM;
2387
2388         evsel->handler = trace__pgfault;
2389         perf_evlist__add(evlist, evsel);
2390
2391         return 0;
2392 }
2393
2394 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2395 {
2396         const u32 type = event->header.type;
2397         struct perf_evsel *evsel;
2398
2399         if (!trace->full_time && trace->base_time == 0)
2400                 trace->base_time = sample->time;
2401
2402         if (type != PERF_RECORD_SAMPLE) {
2403                 trace__process_event(trace, trace->host, event, sample);
2404                 return;
2405         }
2406
2407         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2408         if (evsel == NULL) {
2409                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2410                 return;
2411         }
2412
2413         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2414             sample->raw_data == NULL) {
2415                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2416                        perf_evsel__name(evsel), sample->tid,
2417                        sample->cpu, sample->raw_size);
2418         } else {
2419                 tracepoint_handler handler = evsel->handler;
2420                 handler(trace, evsel, event, sample);
2421         }
2422 }
2423
2424 static int trace__add_syscall_newtp(struct trace *trace)
2425 {
2426         int ret = -1;
2427         struct perf_evlist *evlist = trace->evlist;
2428         struct perf_evsel *sys_enter, *sys_exit;
2429
2430         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2431         if (sys_enter == NULL)
2432                 goto out;
2433
2434         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2435                 goto out_delete_sys_enter;
2436
2437         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2438         if (sys_exit == NULL)
2439                 goto out_delete_sys_enter;
2440
2441         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2442                 goto out_delete_sys_exit;
2443
2444         perf_evlist__add(evlist, sys_enter);
2445         perf_evlist__add(evlist, sys_exit);
2446
2447         trace->syscalls.events.sys_enter = sys_enter;
2448         trace->syscalls.events.sys_exit  = sys_exit;
2449
2450         ret = 0;
2451 out:
2452         return ret;
2453
2454 out_delete_sys_exit:
2455         perf_evsel__delete_priv(sys_exit);
2456 out_delete_sys_enter:
2457         perf_evsel__delete_priv(sys_enter);
2458         goto out;
2459 }
2460
2461 static int trace__set_ev_qualifier_filter(struct trace *trace)
2462 {
2463         int err = -1;
2464         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2465                                                 trace->ev_qualifier_ids.nr,
2466                                                 trace->ev_qualifier_ids.entries);
2467
2468         if (filter == NULL)
2469                 goto out_enomem;
2470
2471         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2472                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2473
2474         free(filter);
2475 out:
2476         return err;
2477 out_enomem:
2478         errno = ENOMEM;
2479         goto out;
2480 }
2481
2482 static int trace__run(struct trace *trace, int argc, const char **argv)
2483 {
2484         struct perf_evlist *evlist = trace->evlist;
2485         struct perf_evsel *evsel;
2486         int err = -1, i;
2487         unsigned long before;
2488         const bool forks = argc > 0;
2489         bool draining = false;
2490
2491         trace->live = true;
2492
2493         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2494                 goto out_error_raw_syscalls;
2495
2496         if (trace->trace_syscalls)
2497                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2498
2499         if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2500             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2501                 goto out_error_mem;
2502         }
2503
2504         if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2505             perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2506                 goto out_error_mem;
2507
2508         if (trace->sched &&
2509             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2510                                    trace__sched_stat_runtime))
2511                 goto out_error_sched_stat_runtime;
2512
2513         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2514         if (err < 0) {
2515                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2516                 goto out_delete_evlist;
2517         }
2518
2519         err = trace__symbols_init(trace, evlist);
2520         if (err < 0) {
2521                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2522                 goto out_delete_evlist;
2523         }
2524
2525         perf_evlist__config(evlist, &trace->opts);
2526
2527         signal(SIGCHLD, sig_handler);
2528         signal(SIGINT, sig_handler);
2529
2530         if (forks) {
2531                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2532                                                     argv, false, NULL);
2533                 if (err < 0) {
2534                         fprintf(trace->output, "Couldn't run the workload!\n");
2535                         goto out_delete_evlist;
2536                 }
2537         }
2538
2539         err = perf_evlist__open(evlist);
2540         if (err < 0)
2541                 goto out_error_open;
2542
2543         /*
2544          * Better not use !target__has_task() here because we need to cover the
2545          * case where no threads were specified in the command line, but a
2546          * workload was, and in that case we will fill in the thread_map when
2547          * we fork the workload in perf_evlist__prepare_workload.
2548          */
2549         if (trace->filter_pids.nr > 0)
2550                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2551         else if (thread_map__pid(evlist->threads, 0) == -1)
2552                 err = perf_evlist__set_filter_pid(evlist, getpid());
2553
2554         if (err < 0)
2555                 goto out_error_mem;
2556
2557         if (trace->ev_qualifier_ids.nr > 0) {
2558                 err = trace__set_ev_qualifier_filter(trace);
2559                 if (err < 0)
2560                         goto out_errno;
2561
2562                 pr_debug("event qualifier tracepoint filter: %s\n",
2563                          trace->syscalls.events.sys_exit->filter);
2564         }
2565
2566         err = perf_evlist__apply_filters(evlist, &evsel);
2567         if (err < 0)
2568                 goto out_error_apply_filters;
2569
2570         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2571         if (err < 0)
2572                 goto out_error_mmap;
2573
2574         if (!target__none(&trace->opts.target))
2575                 perf_evlist__enable(evlist);
2576
2577         if (forks)
2578                 perf_evlist__start_workload(evlist);
2579
2580         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2581                                   evlist->threads->nr > 1 ||
2582                                   perf_evlist__first(evlist)->attr.inherit;
2583 again:
2584         before = trace->nr_events;
2585
2586         for (i = 0; i < evlist->nr_mmaps; i++) {
2587                 union perf_event *event;
2588
2589                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2590                         struct perf_sample sample;
2591
2592                         ++trace->nr_events;
2593
2594                         err = perf_evlist__parse_sample(evlist, event, &sample);
2595                         if (err) {
2596                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2597                                 goto next_event;
2598                         }
2599
2600                         trace__handle_event(trace, event, &sample);
2601 next_event:
2602                         perf_evlist__mmap_consume(evlist, i);
2603
2604                         if (interrupted)
2605                                 goto out_disable;
2606
2607                         if (done && !draining) {
2608                                 perf_evlist__disable(evlist);
2609                                 draining = true;
2610                         }
2611                 }
2612         }
2613
2614         if (trace->nr_events == before) {
2615                 int timeout = done ? 100 : -1;
2616
2617                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2618                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2619                                 draining = true;
2620
2621                         goto again;
2622                 }
2623         } else {
2624                 goto again;
2625         }
2626
2627 out_disable:
2628         thread__zput(trace->current);
2629
2630         perf_evlist__disable(evlist);
2631
2632         if (!err) {
2633                 if (trace->summary)
2634                         trace__fprintf_thread_summary(trace, trace->output);
2635
2636                 if (trace->show_tool_stats) {
2637                         fprintf(trace->output, "Stats:\n "
2638                                                " vfs_getname : %" PRIu64 "\n"
2639                                                " proc_getname: %" PRIu64 "\n",
2640                                 trace->stats.vfs_getname,
2641                                 trace->stats.proc_getname);
2642                 }
2643         }
2644
2645 out_delete_evlist:
2646         perf_evlist__delete(evlist);
2647         trace->evlist = NULL;
2648         trace->live = false;
2649         return err;
2650 {
2651         char errbuf[BUFSIZ];
2652
2653 out_error_sched_stat_runtime:
2654         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2655         goto out_error;
2656
2657 out_error_raw_syscalls:
2658         debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2659         goto out_error;
2660
2661 out_error_mmap:
2662         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2663         goto out_error;
2664
2665 out_error_open:
2666         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2667
2668 out_error:
2669         fprintf(trace->output, "%s\n", errbuf);
2670         goto out_delete_evlist;
2671
2672 out_error_apply_filters:
2673         fprintf(trace->output,
2674                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2675                 evsel->filter, perf_evsel__name(evsel), errno,
2676                 strerror_r(errno, errbuf, sizeof(errbuf)));
2677         goto out_delete_evlist;
2678 }
2679 out_error_mem:
2680         fprintf(trace->output, "Not enough memory to run!\n");
2681         goto out_delete_evlist;
2682
2683 out_errno:
2684         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2685         goto out_delete_evlist;
2686 }
2687
2688 static int trace__replay(struct trace *trace)
2689 {
2690         const struct perf_evsel_str_handler handlers[] = {
2691                 { "probe:vfs_getname",       trace__vfs_getname, },
2692         };
2693         struct perf_data_file file = {
2694                 .path  = input_name,
2695                 .mode  = PERF_DATA_MODE_READ,
2696                 .force = trace->force,
2697         };
2698         struct perf_session *session;
2699         struct perf_evsel *evsel;
2700         int err = -1;
2701
2702         trace->tool.sample        = trace__process_sample;
2703         trace->tool.mmap          = perf_event__process_mmap;
2704         trace->tool.mmap2         = perf_event__process_mmap2;
2705         trace->tool.comm          = perf_event__process_comm;
2706         trace->tool.exit          = perf_event__process_exit;
2707         trace->tool.fork          = perf_event__process_fork;
2708         trace->tool.attr          = perf_event__process_attr;
2709         trace->tool.tracing_data = perf_event__process_tracing_data;
2710         trace->tool.build_id      = perf_event__process_build_id;
2711
2712         trace->tool.ordered_events = true;
2713         trace->tool.ordering_requires_timestamps = true;
2714
2715         /* add tid to output */
2716         trace->multiple_threads = true;
2717
2718         session = perf_session__new(&file, false, &trace->tool);
2719         if (session == NULL)
2720                 return -1;
2721
2722         if (symbol__init(&session->header.env) < 0)
2723                 goto out;
2724
2725         trace->host = &session->machines.host;
2726
2727         err = perf_session__set_tracepoints_handlers(session, handlers);
2728         if (err)
2729                 goto out;
2730
2731         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2732                                                      "raw_syscalls:sys_enter");
2733         /* older kernels have syscalls tp versus raw_syscalls */
2734         if (evsel == NULL)
2735                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2736                                                              "syscalls:sys_enter");
2737
2738         if (evsel &&
2739             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2740             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2741                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2742                 goto out;
2743         }
2744
2745         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2746                                                      "raw_syscalls:sys_exit");
2747         if (evsel == NULL)
2748                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2749                                                              "syscalls:sys_exit");
2750         if (evsel &&
2751             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2752             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2753                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2754                 goto out;
2755         }
2756
2757         evlist__for_each(session->evlist, evsel) {
2758                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2759                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2760                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2761                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2762                         evsel->handler = trace__pgfault;
2763         }
2764
2765         err = parse_target_str(trace);
2766         if (err != 0)
2767                 goto out;
2768
2769         setup_pager();
2770
2771         err = perf_session__process_events(session);
2772         if (err)
2773                 pr_err("Failed to process events, error %d", err);
2774
2775         else if (trace->summary)
2776                 trace__fprintf_thread_summary(trace, trace->output);
2777
2778 out:
2779         perf_session__delete(session);
2780
2781         return err;
2782 }
2783
2784 static size_t trace__fprintf_threads_header(FILE *fp)
2785 {
2786         size_t printed;
2787
2788         printed  = fprintf(fp, "\n Summary of events:\n\n");
2789
2790         return printed;
2791 }
2792
2793 static size_t thread__dump_stats(struct thread_trace *ttrace,
2794                                  struct trace *trace, FILE *fp)
2795 {
2796         struct stats *stats;
2797         size_t printed = 0;
2798         struct syscall *sc;
2799         struct int_node *inode = intlist__first(ttrace->syscall_stats);
2800
2801         if (inode == NULL)
2802                 return 0;
2803
2804         printed += fprintf(fp, "\n");
2805
2806         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2807         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2808         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2809
2810         /* each int_node is a syscall */
2811         while (inode) {
2812                 stats = inode->priv;
2813                 if (stats) {
2814                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2815                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2816                         double avg = avg_stats(stats);
2817                         double pct;
2818                         u64 n = (u64) stats->n;
2819
2820                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2821                         avg /= NSEC_PER_MSEC;
2822
2823                         sc = &trace->syscalls.table[inode->i];
2824                         printed += fprintf(fp, "   %-15s", sc->name);
2825                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2826                                            n, avg * n, min, avg);
2827                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2828                 }
2829
2830                 inode = intlist__next(inode);
2831         }
2832
2833         printed += fprintf(fp, "\n\n");
2834
2835         return printed;
2836 }
2837
2838 /* struct used to pass data to per-thread function */
2839 struct summary_data {
2840         FILE *fp;
2841         struct trace *trace;
2842         size_t printed;
2843 };
2844
2845 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2846 {
2847         struct summary_data *data = priv;
2848         FILE *fp = data->fp;
2849         size_t printed = data->printed;
2850         struct trace *trace = data->trace;
2851         struct thread_trace *ttrace = thread__priv(thread);
2852         double ratio;
2853
2854         if (ttrace == NULL)
2855                 return 0;
2856
2857         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2858
2859         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2860         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2861         printed += fprintf(fp, "%.1f%%", ratio);
2862         if (ttrace->pfmaj)
2863                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2864         if (ttrace->pfmin)
2865                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2866         printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2867         printed += thread__dump_stats(ttrace, trace, fp);
2868
2869         data->printed += printed;
2870
2871         return 0;
2872 }
2873
2874 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2875 {
2876         struct summary_data data = {
2877                 .fp = fp,
2878                 .trace = trace
2879         };
2880         data.printed = trace__fprintf_threads_header(fp);
2881
2882         machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2883
2884         return data.printed;
2885 }
2886
2887 static int trace__set_duration(const struct option *opt, const char *str,
2888                                int unset __maybe_unused)
2889 {
2890         struct trace *trace = opt->value;
2891
2892         trace->duration_filter = atof(str);
2893         return 0;
2894 }
2895
2896 static int trace__set_filter_pids(const struct option *opt, const char *str,
2897                                   int unset __maybe_unused)
2898 {
2899         int ret = -1;
2900         size_t i;
2901         struct trace *trace = opt->value;
2902         /*
2903          * FIXME: introduce a intarray class, plain parse csv and create a
2904          * { int nr, int entries[] } struct...
2905          */
2906         struct intlist *list = intlist__new(str);
2907
2908         if (list == NULL)
2909                 return -1;
2910
2911         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2912         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2913
2914         if (trace->filter_pids.entries == NULL)
2915                 goto out;
2916
2917         trace->filter_pids.entries[0] = getpid();
2918
2919         for (i = 1; i < trace->filter_pids.nr; ++i)
2920                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2921
2922         intlist__delete(list);
2923         ret = 0;
2924 out:
2925         return ret;
2926 }
2927
2928 static int trace__open_output(struct trace *trace, const char *filename)
2929 {
2930         struct stat st;
2931
2932         if (!stat(filename, &st) && st.st_size) {
2933                 char oldname[PATH_MAX];
2934
2935                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2936                 unlink(oldname);
2937                 rename(filename, oldname);
2938         }
2939
2940         trace->output = fopen(filename, "w");
2941
2942         return trace->output == NULL ? -errno : 0;
2943 }
2944
2945 static int parse_pagefaults(const struct option *opt, const char *str,
2946                             int unset __maybe_unused)
2947 {
2948         int *trace_pgfaults = opt->value;
2949
2950         if (strcmp(str, "all") == 0)
2951                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2952         else if (strcmp(str, "maj") == 0)
2953                 *trace_pgfaults |= TRACE_PFMAJ;
2954         else if (strcmp(str, "min") == 0)
2955                 *trace_pgfaults |= TRACE_PFMIN;
2956         else
2957                 return -1;
2958
2959         return 0;
2960 }
2961
2962 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2963 {
2964         struct perf_evsel *evsel;
2965
2966         evlist__for_each(evlist, evsel)
2967                 evsel->handler = handler;
2968 }
2969
2970 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2971 {
2972         const char *trace_usage[] = {
2973                 "perf trace [<options>] [<command>]",
2974                 "perf trace [<options>] -- <command> [<options>]",
2975                 "perf trace record [<options>] [<command>]",
2976                 "perf trace record [<options>] -- <command> [<options>]",
2977                 NULL
2978         };
2979         struct trace trace = {
2980                 .audit = {
2981                         .machine = audit_detect_machine(),
2982                         .open_id = audit_name_to_syscall("open", trace.audit.machine),
2983                 },
2984                 .syscalls = {
2985                         . max = -1,
2986                 },
2987                 .opts = {
2988                         .target = {
2989                                 .uid       = UINT_MAX,
2990                                 .uses_mmap = true,
2991                         },
2992                         .user_freq     = UINT_MAX,
2993                         .user_interval = ULLONG_MAX,
2994                         .no_buffering  = true,
2995                         .mmap_pages    = UINT_MAX,
2996                         .proc_map_timeout  = 500,
2997                 },
2998                 .output = stderr,
2999                 .show_comm = true,
3000                 .trace_syscalls = true,
3001         };
3002         const char *output_name = NULL;
3003         const char *ev_qualifier_str = NULL;
3004         const struct option trace_options[] = {
3005         OPT_CALLBACK(0, "event", &trace.evlist, "event",
3006                      "event selector. use 'perf list' to list available events",
3007                      parse_events_option),
3008         OPT_BOOLEAN(0, "comm", &trace.show_comm,
3009                     "show the thread COMM next to its id"),
3010         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3011         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3012         OPT_STRING('o', "output", &output_name, "file", "output file name"),
3013         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3014         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3015                     "trace events on existing process id"),
3016         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3017                     "trace events on existing thread id"),
3018         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3019                      "pids to filter (by the kernel)", trace__set_filter_pids),
3020         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3021                     "system-wide collection from all CPUs"),
3022         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3023                     "list of cpus to monitor"),
3024         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3025                     "child tasks do not inherit counters"),
3026         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3027                      "number of mmap data pages",
3028                      perf_evlist__parse_mmap_pages),
3029         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3030                    "user to profile"),
3031         OPT_CALLBACK(0, "duration", &trace, "float",
3032                      "show only events with duration > N.M ms",
3033                      trace__set_duration),
3034         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3035         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3036         OPT_BOOLEAN('T', "time", &trace.full_time,
3037                     "Show full timestamp, not time relative to first start"),
3038         OPT_BOOLEAN('s', "summary", &trace.summary_only,
3039                     "Show only syscall summary with statistics"),
3040         OPT_BOOLEAN('S', "with-summary", &trace.summary,
3041                     "Show all syscalls and summary with statistics"),
3042         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3043                      "Trace pagefaults", parse_pagefaults, "maj"),
3044         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3045         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3046         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3047                         "per thread proc mmap processing timeout in ms"),
3048         OPT_END()
3049         };
3050         const char * const trace_subcommands[] = { "record", NULL };
3051         int err;
3052         char bf[BUFSIZ];
3053
3054         signal(SIGSEGV, sighandler_dump_stack);
3055         signal(SIGFPE, sighandler_dump_stack);
3056
3057         trace.evlist = perf_evlist__new();
3058
3059         if (trace.evlist == NULL) {
3060                 pr_err("Not enough memory to run!\n");
3061                 err = -ENOMEM;
3062                 goto out;
3063         }
3064
3065         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3066                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3067
3068         if (trace.trace_pgfaults) {
3069                 trace.opts.sample_address = true;
3070                 trace.opts.sample_time = true;
3071         }
3072
3073         if (trace.evlist->nr_entries > 0)
3074                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3075
3076         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3077                 return trace__record(&trace, argc-1, &argv[1]);
3078
3079         /* summary_only implies summary option, but don't overwrite summary if set */
3080         if (trace.summary_only)
3081                 trace.summary = trace.summary_only;
3082
3083         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3084             trace.evlist->nr_entries == 0 /* Was --events used? */) {
3085                 pr_err("Please specify something to trace.\n");
3086                 return -1;
3087         }
3088
3089         if (output_name != NULL) {
3090                 err = trace__open_output(&trace, output_name);
3091                 if (err < 0) {
3092                         perror("failed to create output file");
3093                         goto out;
3094                 }
3095         }
3096
3097         if (ev_qualifier_str != NULL) {
3098                 const char *s = ev_qualifier_str;
3099                 struct strlist_config slist_config = {
3100                         .dirname = system_path(STRACE_GROUPS_DIR),
3101                 };
3102
3103                 trace.not_ev_qualifier = *s == '!';
3104                 if (trace.not_ev_qualifier)
3105                         ++s;
3106                 trace.ev_qualifier = strlist__new(s, &slist_config);
3107                 if (trace.ev_qualifier == NULL) {
3108                         fputs("Not enough memory to parse event qualifier",
3109                               trace.output);
3110                         err = -ENOMEM;
3111                         goto out_close;
3112                 }
3113
3114                 err = trace__validate_ev_qualifier(&trace);
3115                 if (err)
3116                         goto out_close;
3117         }
3118
3119         err = target__validate(&trace.opts.target);
3120         if (err) {
3121                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3122                 fprintf(trace.output, "%s", bf);
3123                 goto out_close;
3124         }
3125
3126         err = target__parse_uid(&trace.opts.target);
3127         if (err) {
3128                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3129                 fprintf(trace.output, "%s", bf);
3130                 goto out_close;
3131         }
3132
3133         if (!argc && target__none(&trace.opts.target))
3134                 trace.opts.target.system_wide = true;
3135
3136         if (input_name)
3137                 err = trace__replay(&trace);
3138         else
3139                 err = trace__run(&trace, argc, argv);
3140
3141 out_close:
3142         if (output_name != NULL)
3143                 fclose(trace.output);
3144 out:
3145         return err;
3146 }