perf tools: Fix usage of max_stack sysctl
[linux-2.6-block.git] / tools / perf / builtin-trace.c
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <sys/ptrace.h>
47 #include <linux/random.h>
48 #include <linux/stringify.h>
49
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC              02000000
52 #endif
53
54 struct trace {
55         struct perf_tool        tool;
56         struct syscalltbl       *sctbl;
57         struct {
58                 int             max;
59                 struct syscall  *table;
60                 struct {
61                         struct perf_evsel *sys_enter,
62                                           *sys_exit;
63                 }               events;
64         } syscalls;
65         struct record_opts      opts;
66         struct perf_evlist      *evlist;
67         struct machine          *host;
68         struct thread           *current;
69         u64                     base_time;
70         FILE                    *output;
71         unsigned long           nr_events;
72         struct strlist          *ev_qualifier;
73         struct {
74                 size_t          nr;
75                 int             *entries;
76         }                       ev_qualifier_ids;
77         struct intlist          *tid_list;
78         struct intlist          *pid_list;
79         struct {
80                 size_t          nr;
81                 pid_t           *entries;
82         }                       filter_pids;
83         double                  duration_filter;
84         double                  runtime_ms;
85         struct {
86                 u64             vfs_getname,
87                                 proc_getname;
88         } stats;
89         unsigned int            max_stack;
90         unsigned int            min_stack;
91         bool                    not_ev_qualifier;
92         bool                    live;
93         bool                    full_time;
94         bool                    sched;
95         bool                    multiple_threads;
96         bool                    summary;
97         bool                    summary_only;
98         bool                    show_comm;
99         bool                    show_tool_stats;
100         bool                    trace_syscalls;
101         bool                    kernel_syscallchains;
102         bool                    force;
103         bool                    vfs_getname;
104         int                     trace_pgfaults;
105         int                     open_id;
106 };
107
108 struct tp_field {
109         int offset;
110         union {
111                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113         };
114 };
115
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119         u##bits value; \
120         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121         return value;  \
122 }
123
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132         u##bits value; \
133         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134         return bswap_##bits(value);\
135 }
136
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140
141 static int tp_field__init_uint(struct tp_field *field,
142                                struct format_field *format_field,
143                                bool needs_swap)
144 {
145         field->offset = format_field->offset;
146
147         switch (format_field->size) {
148         case 1:
149                 field->integer = tp_field__u8;
150                 break;
151         case 2:
152                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153                 break;
154         case 4:
155                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156                 break;
157         case 8:
158                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159                 break;
160         default:
161                 return -1;
162         }
163
164         return 0;
165 }
166
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169         return sample->raw_data + field->offset;
170 }
171
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174         field->offset = format_field->offset;
175         field->pointer = tp_field__ptr;
176         return 0;
177 }
178
179 struct syscall_tp {
180         struct tp_field id;
181         union {
182                 struct tp_field args, ret;
183         };
184 };
185
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187                                           struct tp_field *field,
188                                           const char *name)
189 {
190         struct format_field *format_field = perf_evsel__field(evsel, name);
191
192         if (format_field == NULL)
193                 return -1;
194
195         return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199         ({ struct syscall_tp *sc = evsel->priv;\
200            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203                                          struct tp_field *field,
204                                          const char *name)
205 {
206         struct format_field *format_field = perf_evsel__field(evsel, name);
207
208         if (format_field == NULL)
209                 return -1;
210
211         return tp_field__init_ptr(field, format_field);
212 }
213
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215         ({ struct syscall_tp *sc = evsel->priv;\
216            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220         zfree(&evsel->priv);
221         perf_evsel__delete(evsel);
222 }
223
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226         evsel->priv = malloc(sizeof(struct syscall_tp));
227         if (evsel->priv != NULL) {
228                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229                         goto out_delete;
230
231                 evsel->handler = handler;
232                 return 0;
233         }
234
235         return -ENOMEM;
236
237 out_delete:
238         zfree(&evsel->priv);
239         return -ENOENT;
240 }
241
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245
246         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247         if (IS_ERR(evsel))
248                 evsel = perf_evsel__newtp("syscalls", direction);
249
250         if (IS_ERR(evsel))
251                 return NULL;
252
253         if (perf_evsel__init_syscall_tp(evsel, handler))
254                 goto out_delete;
255
256         return evsel;
257
258 out_delete:
259         perf_evsel__delete_priv(evsel);
260         return NULL;
261 }
262
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264         ({ struct syscall_tp *fields = evsel->priv; \
265            fields->name.integer(&fields->name, sample); })
266
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268         ({ struct syscall_tp *fields = evsel->priv; \
269            fields->name.pointer(&fields->name, sample); })
270
271 struct syscall_arg {
272         unsigned long val;
273         struct thread *thread;
274         struct trace  *trace;
275         void          *parm;
276         u8            idx;
277         u8            mask;
278 };
279
280 struct strarray {
281         int         offset;
282         int         nr_entries;
283         const char **entries;
284 };
285
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287         .nr_entries = ARRAY_SIZE(array), \
288         .entries = array, \
289 }
290
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292         .offset     = off, \
293         .nr_entries = ARRAY_SIZE(array), \
294         .entries = array, \
295 }
296
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298                                                 const char *intfmt,
299                                                 struct syscall_arg *arg)
300 {
301         struct strarray *sa = arg->parm;
302         int idx = arg->val - sa->offset;
303
304         if (idx < 0 || idx >= sa->nr_entries)
305                 return scnprintf(bf, size, intfmt, arg->val);
306
307         return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311                                               struct syscall_arg *arg)
312 {
313         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320  * FIXME: Make this available to all arches as soon as the ioctl beautifier
321  *        gets rewritten to support all arches.
322  */
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324                                                  struct syscall_arg *arg)
325 {
326         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333                                         struct syscall_arg *arg);
334
335 #define SCA_FD syscall_arg__scnprintf_fd
336
337 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
338                                            struct syscall_arg *arg)
339 {
340         int fd = arg->val;
341
342         if (fd == AT_FDCWD)
343                 return scnprintf(bf, size, "CWD");
344
345         return syscall_arg__scnprintf_fd(bf, size, arg);
346 }
347
348 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349
350 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
351                                               struct syscall_arg *arg);
352
353 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354
355 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
356                                          struct syscall_arg *arg)
357 {
358         return scnprintf(bf, size, "%#lx", arg->val);
359 }
360
361 #define SCA_HEX syscall_arg__scnprintf_hex
362
363 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
364                                          struct syscall_arg *arg)
365 {
366         return scnprintf(bf, size, "%d", arg->val);
367 }
368
369 #define SCA_INT syscall_arg__scnprintf_int
370
371 static const char *bpf_cmd[] = {
372         "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
373         "MAP_GET_NEXT_KEY", "PROG_LOAD",
374 };
375 static DEFINE_STRARRAY(bpf_cmd);
376
377 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
378 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
379
380 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
381 static DEFINE_STRARRAY(itimers);
382
383 static const char *keyctl_options[] = {
384         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
385         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
386         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
387         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
388         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
389 };
390 static DEFINE_STRARRAY(keyctl_options);
391
392 static const char *whences[] = { "SET", "CUR", "END",
393 #ifdef SEEK_DATA
394 "DATA",
395 #endif
396 #ifdef SEEK_HOLE
397 "HOLE",
398 #endif
399 };
400 static DEFINE_STRARRAY(whences);
401
402 static const char *fcntl_cmds[] = {
403         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
404         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
405         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
406         "F_GETOWNER_UIDS",
407 };
408 static DEFINE_STRARRAY(fcntl_cmds);
409
410 static const char *rlimit_resources[] = {
411         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
412         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
413         "RTTIME",
414 };
415 static DEFINE_STRARRAY(rlimit_resources);
416
417 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
418 static DEFINE_STRARRAY(sighow);
419
420 static const char *clockid[] = {
421         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
422         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
423         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
424 };
425 static DEFINE_STRARRAY(clockid);
426
427 static const char *socket_families[] = {
428         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
429         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
430         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
431         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
432         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
433         "ALG", "NFC", "VSOCK",
434 };
435 static DEFINE_STRARRAY(socket_families);
436
437 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
438                                                  struct syscall_arg *arg)
439 {
440         size_t printed = 0;
441         int mode = arg->val;
442
443         if (mode == F_OK) /* 0 */
444                 return scnprintf(bf, size, "F");
445 #define P_MODE(n) \
446         if (mode & n##_OK) { \
447                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
448                 mode &= ~n##_OK; \
449         }
450
451         P_MODE(R);
452         P_MODE(W);
453         P_MODE(X);
454 #undef P_MODE
455
456         if (mode)
457                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
458
459         return printed;
460 }
461
462 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
463
464 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
465                                               struct syscall_arg *arg);
466
467 #define SCA_FILENAME syscall_arg__scnprintf_filename
468
469 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
470                                                 struct syscall_arg *arg)
471 {
472         int printed = 0, flags = arg->val;
473
474 #define P_FLAG(n) \
475         if (flags & O_##n) { \
476                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
477                 flags &= ~O_##n; \
478         }
479
480         P_FLAG(CLOEXEC);
481         P_FLAG(NONBLOCK);
482 #undef P_FLAG
483
484         if (flags)
485                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
486
487         return printed;
488 }
489
490 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
491
492 #if defined(__i386__) || defined(__x86_64__)
493 /*
494  * FIXME: Make this available to all arches.
495  */
496 #define TCGETS          0x5401
497
498 static const char *tioctls[] = {
499         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
500         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
501         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
502         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
503         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
504         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
505         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
506         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
507         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
508         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
509         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
510         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
511         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
512         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
513         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
514 };
515
516 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
517 #endif /* defined(__i386__) || defined(__x86_64__) */
518
519 #ifndef GRND_NONBLOCK
520 #define GRND_NONBLOCK   0x0001
521 #endif
522 #ifndef GRND_RANDOM
523 #define GRND_RANDOM     0x0002
524 #endif
525
526 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
527                                                    struct syscall_arg *arg)
528 {
529         int printed = 0, flags = arg->val;
530
531 #define P_FLAG(n) \
532         if (flags & GRND_##n) { \
533                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
534                 flags &= ~GRND_##n; \
535         }
536
537         P_FLAG(RANDOM);
538         P_FLAG(NONBLOCK);
539 #undef P_FLAG
540
541         if (flags)
542                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
543
544         return printed;
545 }
546
547 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548
549 #define STRARRAY(arg, name, array) \
550           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
551           .arg_parm      = { [arg] = &strarray__##array, }
552
553 #include "trace/beauty/eventfd.c"
554 #include "trace/beauty/flock.c"
555 #include "trace/beauty/futex_op.c"
556 #include "trace/beauty/mmap.c"
557 #include "trace/beauty/mode_t.c"
558 #include "trace/beauty/msg_flags.c"
559 #include "trace/beauty/open_flags.c"
560 #include "trace/beauty/perf_event_open.c"
561 #include "trace/beauty/pid.c"
562 #include "trace/beauty/sched_policy.c"
563 #include "trace/beauty/seccomp.c"
564 #include "trace/beauty/signum.c"
565 #include "trace/beauty/socket_type.c"
566 #include "trace/beauty/waitid_options.c"
567
568 static struct syscall_fmt {
569         const char *name;
570         const char *alias;
571         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
572         void       *arg_parm[6];
573         bool       errmsg;
574         bool       errpid;
575         bool       timeout;
576         bool       hexret;
577 } syscall_fmts[] = {
578         { .name     = "access",     .errmsg = true,
579           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
580                              [1] = SCA_ACCMODE,  /* mode */ }, },
581         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
582         { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
583         { .name     = "brk",        .hexret = true,
584           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
585         { .name     = "chdir",      .errmsg = true,
586           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
587         { .name     = "chmod",      .errmsg = true,
588           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
589         { .name     = "chroot",     .errmsg = true,
590           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
591         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
592         { .name     = "clone",      .errpid = true, },
593         { .name     = "close",      .errmsg = true,
594           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
595         { .name     = "connect",    .errmsg = true, },
596         { .name     = "creat",      .errmsg = true,
597           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
598         { .name     = "dup",        .errmsg = true,
599           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
600         { .name     = "dup2",       .errmsg = true,
601           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
602         { .name     = "dup3",       .errmsg = true,
603           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
604         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
605         { .name     = "eventfd2",   .errmsg = true,
606           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
607         { .name     = "faccessat",  .errmsg = true,
608           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
609                              [1] = SCA_FILENAME, /* filename */ }, },
610         { .name     = "fadvise64",  .errmsg = true,
611           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
612         { .name     = "fallocate",  .errmsg = true,
613           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
614         { .name     = "fchdir",     .errmsg = true,
615           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
616         { .name     = "fchmod",     .errmsg = true,
617           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
618         { .name     = "fchmodat",   .errmsg = true,
619           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
620                              [1] = SCA_FILENAME, /* filename */ }, },
621         { .name     = "fchown",     .errmsg = true,
622           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
623         { .name     = "fchownat",   .errmsg = true,
624           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
625                              [1] = SCA_FILENAME, /* filename */ }, },
626         { .name     = "fcntl",      .errmsg = true,
627           .arg_scnprintf = { [0] = SCA_FD, /* fd */
628                              [1] = SCA_STRARRAY, /* cmd */ },
629           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
630         { .name     = "fdatasync",  .errmsg = true,
631           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
632         { .name     = "flock",      .errmsg = true,
633           .arg_scnprintf = { [0] = SCA_FD, /* fd */
634                              [1] = SCA_FLOCK, /* cmd */ }, },
635         { .name     = "fsetxattr",  .errmsg = true,
636           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
637         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
638           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
639         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
640           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
641                              [1] = SCA_FILENAME, /* filename */ }, },
642         { .name     = "fstatfs",    .errmsg = true,
643           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
644         { .name     = "fsync",    .errmsg = true,
645           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
646         { .name     = "ftruncate", .errmsg = true,
647           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
648         { .name     = "futex",      .errmsg = true,
649           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
650         { .name     = "futimesat", .errmsg = true,
651           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
652                              [1] = SCA_FILENAME, /* filename */ }, },
653         { .name     = "getdents",   .errmsg = true,
654           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
655         { .name     = "getdents64", .errmsg = true,
656           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
657         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
658         { .name     = "getpid",     .errpid = true, },
659         { .name     = "getpgid",    .errpid = true, },
660         { .name     = "getppid",    .errpid = true, },
661         { .name     = "getrandom",  .errmsg = true,
662           .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
663         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
664         { .name     = "getxattr",    .errmsg = true,
665           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
666         { .name     = "inotify_add_watch",          .errmsg = true,
667           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
668         { .name     = "ioctl",      .errmsg = true,
669           .arg_scnprintf = { [0] = SCA_FD, /* fd */
670 #if defined(__i386__) || defined(__x86_64__)
671 /*
672  * FIXME: Make this available to all arches.
673  */
674                              [1] = SCA_STRHEXARRAY, /* cmd */
675                              [2] = SCA_HEX, /* arg */ },
676           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
677 #else
678                              [2] = SCA_HEX, /* arg */ }, },
679 #endif
680         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
681         { .name     = "kill",       .errmsg = true,
682           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
683         { .name     = "lchown",    .errmsg = true,
684           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
685         { .name     = "lgetxattr",  .errmsg = true,
686           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
687         { .name     = "linkat",     .errmsg = true,
688           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
689         { .name     = "listxattr",  .errmsg = true,
690           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
691         { .name     = "llistxattr", .errmsg = true,
692           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
693         { .name     = "lremovexattr",  .errmsg = true,
694           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
695         { .name     = "lseek",      .errmsg = true,
696           .arg_scnprintf = { [0] = SCA_FD, /* fd */
697                              [2] = SCA_STRARRAY, /* whence */ },
698           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
699         { .name     = "lsetxattr",  .errmsg = true,
700           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
701         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
702           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
703         { .name     = "lsxattr",    .errmsg = true,
704           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
705         { .name     = "madvise",    .errmsg = true,
706           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
707                              [2] = SCA_MADV_BHV, /* behavior */ }, },
708         { .name     = "mkdir",    .errmsg = true,
709           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
710         { .name     = "mkdirat",    .errmsg = true,
711           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
712                              [1] = SCA_FILENAME, /* pathname */ }, },
713         { .name     = "mknod",      .errmsg = true,
714           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
715         { .name     = "mknodat",    .errmsg = true,
716           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
717                              [1] = SCA_FILENAME, /* filename */ }, },
718         { .name     = "mlock",      .errmsg = true,
719           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
720         { .name     = "mlockall",   .errmsg = true,
721           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
722         { .name     = "mmap",       .hexret = true,
723           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
724                              [2] = SCA_MMAP_PROT, /* prot */
725                              [3] = SCA_MMAP_FLAGS, /* flags */
726                              [4] = SCA_FD,        /* fd */ }, },
727         { .name     = "mprotect",   .errmsg = true,
728           .arg_scnprintf = { [0] = SCA_HEX, /* start */
729                              [2] = SCA_MMAP_PROT, /* prot */ }, },
730         { .name     = "mq_unlink", .errmsg = true,
731           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
732         { .name     = "mremap",     .hexret = true,
733           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
734                              [3] = SCA_MREMAP_FLAGS, /* flags */
735                              [4] = SCA_HEX, /* new_addr */ }, },
736         { .name     = "munlock",    .errmsg = true,
737           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
738         { .name     = "munmap",     .errmsg = true,
739           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
740         { .name     = "name_to_handle_at", .errmsg = true,
741           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
742         { .name     = "newfstatat", .errmsg = true,
743           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
744                              [1] = SCA_FILENAME, /* filename */ }, },
745         { .name     = "open",       .errmsg = true,
746           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
747                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
748         { .name     = "open_by_handle_at", .errmsg = true,
749           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
750                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
751         { .name     = "openat",     .errmsg = true,
752           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
753                              [1] = SCA_FILENAME, /* filename */
754                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
755         { .name     = "perf_event_open", .errmsg = true,
756           .arg_scnprintf = { [2] = SCA_INT, /* cpu */
757                              [3] = SCA_FD,  /* group_fd */
758                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
759         { .name     = "pipe2",      .errmsg = true,
760           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
761         { .name     = "poll",       .errmsg = true, .timeout = true, },
762         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
763         { .name     = "pread",      .errmsg = true, .alias = "pread64",
764           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
765         { .name     = "preadv",     .errmsg = true, .alias = "pread",
766           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
767         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
768         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
769           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
770         { .name     = "pwritev",    .errmsg = true,
771           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
772         { .name     = "read",       .errmsg = true,
773           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
774         { .name     = "readlink",   .errmsg = true,
775           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
776         { .name     = "readlinkat", .errmsg = true,
777           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
778                              [1] = SCA_FILENAME, /* pathname */ }, },
779         { .name     = "readv",      .errmsg = true,
780           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
781         { .name     = "recvfrom",   .errmsg = true,
782           .arg_scnprintf = { [0] = SCA_FD, /* fd */
783                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
784         { .name     = "recvmmsg",   .errmsg = true,
785           .arg_scnprintf = { [0] = SCA_FD, /* fd */
786                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
787         { .name     = "recvmsg",    .errmsg = true,
788           .arg_scnprintf = { [0] = SCA_FD, /* fd */
789                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
790         { .name     = "removexattr", .errmsg = true,
791           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
792         { .name     = "renameat",   .errmsg = true,
793           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
794         { .name     = "rmdir",    .errmsg = true,
795           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
796         { .name     = "rt_sigaction", .errmsg = true,
797           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
798         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
799         { .name     = "rt_sigqueueinfo", .errmsg = true,
800           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
801         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
802           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
803         { .name     = "sched_setscheduler",   .errmsg = true,
804           .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
805         { .name     = "seccomp", .errmsg = true,
806           .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
807                              [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
808         { .name     = "select",     .errmsg = true, .timeout = true, },
809         { .name     = "sendmmsg",    .errmsg = true,
810           .arg_scnprintf = { [0] = SCA_FD, /* fd */
811                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
812         { .name     = "sendmsg",    .errmsg = true,
813           .arg_scnprintf = { [0] = SCA_FD, /* fd */
814                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
815         { .name     = "sendto",     .errmsg = true,
816           .arg_scnprintf = { [0] = SCA_FD, /* fd */
817                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
818         { .name     = "set_tid_address", .errpid = true, },
819         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
820         { .name     = "setpgid",    .errmsg = true, },
821         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
822         { .name     = "setxattr",   .errmsg = true,
823           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
824         { .name     = "shutdown",   .errmsg = true,
825           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
826         { .name     = "socket",     .errmsg = true,
827           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
828                              [1] = SCA_SK_TYPE, /* type */ },
829           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
830         { .name     = "socketpair", .errmsg = true,
831           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
832                              [1] = SCA_SK_TYPE, /* type */ },
833           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
834         { .name     = "stat",       .errmsg = true, .alias = "newstat",
835           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
836         { .name     = "statfs",     .errmsg = true,
837           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
838         { .name     = "swapoff",    .errmsg = true,
839           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
840         { .name     = "swapon",     .errmsg = true,
841           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
842         { .name     = "symlinkat",  .errmsg = true,
843           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
844         { .name     = "tgkill",     .errmsg = true,
845           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
846         { .name     = "tkill",      .errmsg = true,
847           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
848         { .name     = "truncate",   .errmsg = true,
849           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
850         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
851         { .name     = "unlinkat",   .errmsg = true,
852           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
853                              [1] = SCA_FILENAME, /* pathname */ }, },
854         { .name     = "utime",  .errmsg = true,
855           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
856         { .name     = "utimensat",  .errmsg = true,
857           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
858                              [1] = SCA_FILENAME, /* filename */ }, },
859         { .name     = "utimes",  .errmsg = true,
860           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
861         { .name     = "vmsplice",  .errmsg = true,
862           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863         { .name     = "wait4",      .errpid = true,
864           .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
865         { .name     = "waitid",     .errpid = true,
866           .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
867         { .name     = "write",      .errmsg = true,
868           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
869         { .name     = "writev",     .errmsg = true,
870           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
871 };
872
873 static int syscall_fmt__cmp(const void *name, const void *fmtp)
874 {
875         const struct syscall_fmt *fmt = fmtp;
876         return strcmp(name, fmt->name);
877 }
878
879 static struct syscall_fmt *syscall_fmt__find(const char *name)
880 {
881         const int nmemb = ARRAY_SIZE(syscall_fmts);
882         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
883 }
884
885 struct syscall {
886         struct event_format *tp_format;
887         int                 nr_args;
888         struct format_field *args;
889         const char          *name;
890         bool                is_exit;
891         struct syscall_fmt  *fmt;
892         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
893         void                **arg_parm;
894 };
895
896 static size_t fprintf_duration(unsigned long t, FILE *fp)
897 {
898         double duration = (double)t / NSEC_PER_MSEC;
899         size_t printed = fprintf(fp, "(");
900
901         if (duration >= 1.0)
902                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
903         else if (duration >= 0.01)
904                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
905         else
906                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
907         return printed + fprintf(fp, "): ");
908 }
909
910 /**
911  * filename.ptr: The filename char pointer that will be vfs_getname'd
912  * filename.entry_str_pos: Where to insert the string translated from
913  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
914  */
915 struct thread_trace {
916         u64               entry_time;
917         u64               exit_time;
918         bool              entry_pending;
919         unsigned long     nr_events;
920         unsigned long     pfmaj, pfmin;
921         char              *entry_str;
922         double            runtime_ms;
923         struct {
924                 unsigned long ptr;
925                 short int     entry_str_pos;
926                 bool          pending_open;
927                 unsigned int  namelen;
928                 char          *name;
929         } filename;
930         struct {
931                 int       max;
932                 char      **table;
933         } paths;
934
935         struct intlist *syscall_stats;
936 };
937
938 static struct thread_trace *thread_trace__new(void)
939 {
940         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
941
942         if (ttrace)
943                 ttrace->paths.max = -1;
944
945         ttrace->syscall_stats = intlist__new(NULL);
946
947         return ttrace;
948 }
949
950 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
951 {
952         struct thread_trace *ttrace;
953
954         if (thread == NULL)
955                 goto fail;
956
957         if (thread__priv(thread) == NULL)
958                 thread__set_priv(thread, thread_trace__new());
959
960         if (thread__priv(thread) == NULL)
961                 goto fail;
962
963         ttrace = thread__priv(thread);
964         ++ttrace->nr_events;
965
966         return ttrace;
967 fail:
968         color_fprintf(fp, PERF_COLOR_RED,
969                       "WARNING: not enough memory, dropping samples!\n");
970         return NULL;
971 }
972
973 #define TRACE_PFMAJ             (1 << 0)
974 #define TRACE_PFMIN             (1 << 1)
975
976 static const size_t trace__entry_str_size = 2048;
977
978 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
979 {
980         struct thread_trace *ttrace = thread__priv(thread);
981
982         if (fd > ttrace->paths.max) {
983                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
984
985                 if (npath == NULL)
986                         return -1;
987
988                 if (ttrace->paths.max != -1) {
989                         memset(npath + ttrace->paths.max + 1, 0,
990                                (fd - ttrace->paths.max) * sizeof(char *));
991                 } else {
992                         memset(npath, 0, (fd + 1) * sizeof(char *));
993                 }
994
995                 ttrace->paths.table = npath;
996                 ttrace->paths.max   = fd;
997         }
998
999         ttrace->paths.table[fd] = strdup(pathname);
1000
1001         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1002 }
1003
1004 static int thread__read_fd_path(struct thread *thread, int fd)
1005 {
1006         char linkname[PATH_MAX], pathname[PATH_MAX];
1007         struct stat st;
1008         int ret;
1009
1010         if (thread->pid_ == thread->tid) {
1011                 scnprintf(linkname, sizeof(linkname),
1012                           "/proc/%d/fd/%d", thread->pid_, fd);
1013         } else {
1014                 scnprintf(linkname, sizeof(linkname),
1015                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1016         }
1017
1018         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1019                 return -1;
1020
1021         ret = readlink(linkname, pathname, sizeof(pathname));
1022
1023         if (ret < 0 || ret > st.st_size)
1024                 return -1;
1025
1026         pathname[ret] = '\0';
1027         return trace__set_fd_pathname(thread, fd, pathname);
1028 }
1029
1030 static const char *thread__fd_path(struct thread *thread, int fd,
1031                                    struct trace *trace)
1032 {
1033         struct thread_trace *ttrace = thread__priv(thread);
1034
1035         if (ttrace == NULL)
1036                 return NULL;
1037
1038         if (fd < 0)
1039                 return NULL;
1040
1041         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1042                 if (!trace->live)
1043                         return NULL;
1044                 ++trace->stats.proc_getname;
1045                 if (thread__read_fd_path(thread, fd))
1046                         return NULL;
1047         }
1048
1049         return ttrace->paths.table[fd];
1050 }
1051
1052 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1053                                         struct syscall_arg *arg)
1054 {
1055         int fd = arg->val;
1056         size_t printed = scnprintf(bf, size, "%d", fd);
1057         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1058
1059         if (path)
1060                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1061
1062         return printed;
1063 }
1064
1065 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1066                                               struct syscall_arg *arg)
1067 {
1068         int fd = arg->val;
1069         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1070         struct thread_trace *ttrace = thread__priv(arg->thread);
1071
1072         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1073                 zfree(&ttrace->paths.table[fd]);
1074
1075         return printed;
1076 }
1077
1078 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1079                                      unsigned long ptr)
1080 {
1081         struct thread_trace *ttrace = thread__priv(thread);
1082
1083         ttrace->filename.ptr = ptr;
1084         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1085 }
1086
1087 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1088                                               struct syscall_arg *arg)
1089 {
1090         unsigned long ptr = arg->val;
1091
1092         if (!arg->trace->vfs_getname)
1093                 return scnprintf(bf, size, "%#x", ptr);
1094
1095         thread__set_filename_pos(arg->thread, bf, ptr);
1096         return 0;
1097 }
1098
1099 static bool trace__filter_duration(struct trace *trace, double t)
1100 {
1101         return t < (trace->duration_filter * NSEC_PER_MSEC);
1102 }
1103
1104 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1105 {
1106         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1107
1108         return fprintf(fp, "%10.3f ", ts);
1109 }
1110
1111 static bool done = false;
1112 static bool interrupted = false;
1113
1114 static void sig_handler(int sig)
1115 {
1116         done = true;
1117         interrupted = sig == SIGINT;
1118 }
1119
1120 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1121                                         u64 duration, u64 tstamp, FILE *fp)
1122 {
1123         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1124         printed += fprintf_duration(duration, fp);
1125
1126         if (trace->multiple_threads) {
1127                 if (trace->show_comm)
1128                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1129                 printed += fprintf(fp, "%d ", thread->tid);
1130         }
1131
1132         return printed;
1133 }
1134
1135 static int trace__process_event(struct trace *trace, struct machine *machine,
1136                                 union perf_event *event, struct perf_sample *sample)
1137 {
1138         int ret = 0;
1139
1140         switch (event->header.type) {
1141         case PERF_RECORD_LOST:
1142                 color_fprintf(trace->output, PERF_COLOR_RED,
1143                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1144                 ret = machine__process_lost_event(machine, event, sample);
1145                 break;
1146         default:
1147                 ret = machine__process_event(machine, event, sample);
1148                 break;
1149         }
1150
1151         return ret;
1152 }
1153
1154 static int trace__tool_process(struct perf_tool *tool,
1155                                union perf_event *event,
1156                                struct perf_sample *sample,
1157                                struct machine *machine)
1158 {
1159         struct trace *trace = container_of(tool, struct trace, tool);
1160         return trace__process_event(trace, machine, event, sample);
1161 }
1162
1163 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1164 {
1165         struct machine *machine = vmachine;
1166
1167         if (machine->kptr_restrict_warned)
1168                 return NULL;
1169
1170         if (symbol_conf.kptr_restrict) {
1171                 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1172                            "Check /proc/sys/kernel/kptr_restrict.\n\n"
1173                            "Kernel samples will not be resolved.\n");
1174                 machine->kptr_restrict_warned = true;
1175                 return NULL;
1176         }
1177
1178         return machine__resolve_kernel_addr(vmachine, addrp, modp);
1179 }
1180
1181 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1182 {
1183         int err = symbol__init(NULL);
1184
1185         if (err)
1186                 return err;
1187
1188         trace->host = machine__new_host();
1189         if (trace->host == NULL)
1190                 return -ENOMEM;
1191
1192         if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1193                 return -errno;
1194
1195         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1196                                             evlist->threads, trace__tool_process, false,
1197                                             trace->opts.proc_map_timeout);
1198         if (err)
1199                 symbol__exit();
1200
1201         return err;
1202 }
1203
1204 static int syscall__set_arg_fmts(struct syscall *sc)
1205 {
1206         struct format_field *field;
1207         int idx = 0;
1208
1209         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1210         if (sc->arg_scnprintf == NULL)
1211                 return -1;
1212
1213         if (sc->fmt)
1214                 sc->arg_parm = sc->fmt->arg_parm;
1215
1216         for (field = sc->args; field; field = field->next) {
1217                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1218                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1219                 else if (field->flags & FIELD_IS_POINTER)
1220                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1221                 else if (strcmp(field->type, "pid_t") == 0)
1222                         sc->arg_scnprintf[idx] = SCA_PID;
1223                 else if (strcmp(field->type, "umode_t") == 0)
1224                         sc->arg_scnprintf[idx] = SCA_MODE_T;
1225                 ++idx;
1226         }
1227
1228         return 0;
1229 }
1230
1231 static int trace__read_syscall_info(struct trace *trace, int id)
1232 {
1233         char tp_name[128];
1234         struct syscall *sc;
1235         const char *name = syscalltbl__name(trace->sctbl, id);
1236
1237         if (name == NULL)
1238                 return -1;
1239
1240         if (id > trace->syscalls.max) {
1241                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1242
1243                 if (nsyscalls == NULL)
1244                         return -1;
1245
1246                 if (trace->syscalls.max != -1) {
1247                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1248                                (id - trace->syscalls.max) * sizeof(*sc));
1249                 } else {
1250                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1251                 }
1252
1253                 trace->syscalls.table = nsyscalls;
1254                 trace->syscalls.max   = id;
1255         }
1256
1257         sc = trace->syscalls.table + id;
1258         sc->name = name;
1259
1260         sc->fmt  = syscall_fmt__find(sc->name);
1261
1262         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1263         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1264
1265         if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1266                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1267                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1268         }
1269
1270         if (IS_ERR(sc->tp_format))
1271                 return -1;
1272
1273         sc->args = sc->tp_format->format.fields;
1274         sc->nr_args = sc->tp_format->format.nr_fields;
1275         /*
1276          * We need to check and discard the first variable '__syscall_nr'
1277          * or 'nr' that mean the syscall number. It is needless here.
1278          * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1279          */
1280         if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1281                 sc->args = sc->args->next;
1282                 --sc->nr_args;
1283         }
1284
1285         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1286
1287         return syscall__set_arg_fmts(sc);
1288 }
1289
1290 static int trace__validate_ev_qualifier(struct trace *trace)
1291 {
1292         int err = 0, i;
1293         struct str_node *pos;
1294
1295         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1296         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1297                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1298
1299         if (trace->ev_qualifier_ids.entries == NULL) {
1300                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1301                        trace->output);
1302                 err = -EINVAL;
1303                 goto out;
1304         }
1305
1306         i = 0;
1307
1308         strlist__for_each(pos, trace->ev_qualifier) {
1309                 const char *sc = pos->s;
1310                 int id = syscalltbl__id(trace->sctbl, sc);
1311
1312                 if (id < 0) {
1313                         if (err == 0) {
1314                                 fputs("Error:\tInvalid syscall ", trace->output);
1315                                 err = -EINVAL;
1316                         } else {
1317                                 fputs(", ", trace->output);
1318                         }
1319
1320                         fputs(sc, trace->output);
1321                 }
1322
1323                 trace->ev_qualifier_ids.entries[i++] = id;
1324         }
1325
1326         if (err < 0) {
1327                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1328                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1329                 zfree(&trace->ev_qualifier_ids.entries);
1330                 trace->ev_qualifier_ids.nr = 0;
1331         }
1332 out:
1333         return err;
1334 }
1335
1336 /*
1337  * args is to be interpreted as a series of longs but we need to handle
1338  * 8-byte unaligned accesses. args points to raw_data within the event
1339  * and raw_data is guaranteed to be 8-byte unaligned because it is
1340  * preceded by raw_size which is a u32. So we need to copy args to a temp
1341  * variable to read it. Most notably this avoids extended load instructions
1342  * on unaligned addresses
1343  */
1344
1345 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1346                                       unsigned char *args, struct trace *trace,
1347                                       struct thread *thread)
1348 {
1349         size_t printed = 0;
1350         unsigned char *p;
1351         unsigned long val;
1352
1353         if (sc->args != NULL) {
1354                 struct format_field *field;
1355                 u8 bit = 1;
1356                 struct syscall_arg arg = {
1357                         .idx    = 0,
1358                         .mask   = 0,
1359                         .trace  = trace,
1360                         .thread = thread,
1361                 };
1362
1363                 for (field = sc->args; field;
1364                      field = field->next, ++arg.idx, bit <<= 1) {
1365                         if (arg.mask & bit)
1366                                 continue;
1367
1368                         /* special care for unaligned accesses */
1369                         p = args + sizeof(unsigned long) * arg.idx;
1370                         memcpy(&val, p, sizeof(val));
1371
1372                         /*
1373                          * Suppress this argument if its value is zero and
1374                          * and we don't have a string associated in an
1375                          * strarray for it.
1376                          */
1377                         if (val == 0 &&
1378                             !(sc->arg_scnprintf &&
1379                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1380                               sc->arg_parm[arg.idx]))
1381                                 continue;
1382
1383                         printed += scnprintf(bf + printed, size - printed,
1384                                              "%s%s: ", printed ? ", " : "", field->name);
1385                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1386                                 arg.val = val;
1387                                 if (sc->arg_parm)
1388                                         arg.parm = sc->arg_parm[arg.idx];
1389                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1390                                                                       size - printed, &arg);
1391                         } else {
1392                                 printed += scnprintf(bf + printed, size - printed,
1393                                                      "%ld", val);
1394                         }
1395                 }
1396         } else if (IS_ERR(sc->tp_format)) {
1397                 /*
1398                  * If we managed to read the tracepoint /format file, then we
1399                  * may end up not having any args, like with gettid(), so only
1400                  * print the raw args when we didn't manage to read it.
1401                  */
1402                 int i = 0;
1403
1404                 while (i < 6) {
1405                         /* special care for unaligned accesses */
1406                         p = args + sizeof(unsigned long) * i;
1407                         memcpy(&val, p, sizeof(val));
1408                         printed += scnprintf(bf + printed, size - printed,
1409                                              "%sarg%d: %ld",
1410                                              printed ? ", " : "", i, val);
1411                         ++i;
1412                 }
1413         }
1414
1415         return printed;
1416 }
1417
1418 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1419                                   union perf_event *event,
1420                                   struct perf_sample *sample);
1421
1422 static struct syscall *trace__syscall_info(struct trace *trace,
1423                                            struct perf_evsel *evsel, int id)
1424 {
1425
1426         if (id < 0) {
1427
1428                 /*
1429                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1430                  * before that, leaving at a higher verbosity level till that is
1431                  * explained. Reproduced with plain ftrace with:
1432                  *
1433                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1434                  * grep "NR -1 " /t/trace_pipe
1435                  *
1436                  * After generating some load on the machine.
1437                  */
1438                 if (verbose > 1) {
1439                         static u64 n;
1440                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1441                                 id, perf_evsel__name(evsel), ++n);
1442                 }
1443                 return NULL;
1444         }
1445
1446         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1447             trace__read_syscall_info(trace, id))
1448                 goto out_cant_read;
1449
1450         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1451                 goto out_cant_read;
1452
1453         return &trace->syscalls.table[id];
1454
1455 out_cant_read:
1456         if (verbose) {
1457                 fprintf(trace->output, "Problems reading syscall %d", id);
1458                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1459                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1460                 fputs(" information\n", trace->output);
1461         }
1462         return NULL;
1463 }
1464
1465 static void thread__update_stats(struct thread_trace *ttrace,
1466                                  int id, struct perf_sample *sample)
1467 {
1468         struct int_node *inode;
1469         struct stats *stats;
1470         u64 duration = 0;
1471
1472         inode = intlist__findnew(ttrace->syscall_stats, id);
1473         if (inode == NULL)
1474                 return;
1475
1476         stats = inode->priv;
1477         if (stats == NULL) {
1478                 stats = malloc(sizeof(struct stats));
1479                 if (stats == NULL)
1480                         return;
1481                 init_stats(stats);
1482                 inode->priv = stats;
1483         }
1484
1485         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1486                 duration = sample->time - ttrace->entry_time;
1487
1488         update_stats(stats, duration);
1489 }
1490
1491 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1492 {
1493         struct thread_trace *ttrace;
1494         u64 duration;
1495         size_t printed;
1496
1497         if (trace->current == NULL)
1498                 return 0;
1499
1500         ttrace = thread__priv(trace->current);
1501
1502         if (!ttrace->entry_pending)
1503                 return 0;
1504
1505         duration = sample->time - ttrace->entry_time;
1506
1507         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1508         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1509         ttrace->entry_pending = false;
1510
1511         return printed;
1512 }
1513
1514 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1515                             union perf_event *event __maybe_unused,
1516                             struct perf_sample *sample)
1517 {
1518         char *msg;
1519         void *args;
1520         size_t printed = 0;
1521         struct thread *thread;
1522         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1523         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1524         struct thread_trace *ttrace;
1525
1526         if (sc == NULL)
1527                 return -1;
1528
1529         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1530         ttrace = thread__trace(thread, trace->output);
1531         if (ttrace == NULL)
1532                 goto out_put;
1533
1534         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1535
1536         if (ttrace->entry_str == NULL) {
1537                 ttrace->entry_str = malloc(trace__entry_str_size);
1538                 if (!ttrace->entry_str)
1539                         goto out_put;
1540         }
1541
1542         if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1543                 trace__printf_interrupted_entry(trace, sample);
1544
1545         ttrace->entry_time = sample->time;
1546         msg = ttrace->entry_str;
1547         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1548
1549         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1550                                            args, trace, thread);
1551
1552         if (sc->is_exit) {
1553                 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1554                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1555                         fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1556                 }
1557         } else {
1558                 ttrace->entry_pending = true;
1559                 /* See trace__vfs_getname & trace__sys_exit */
1560                 ttrace->filename.pending_open = false;
1561         }
1562
1563         if (trace->current != thread) {
1564                 thread__put(trace->current);
1565                 trace->current = thread__get(thread);
1566         }
1567         err = 0;
1568 out_put:
1569         thread__put(thread);
1570         return err;
1571 }
1572
1573 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1574                                     struct perf_sample *sample,
1575                                     struct callchain_cursor *cursor)
1576 {
1577         struct addr_location al;
1578
1579         if (machine__resolve(trace->host, &al, sample) < 0 ||
1580             thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1581                 return -1;
1582
1583         return 0;
1584 }
1585
1586 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1587 {
1588         /* TODO: user-configurable print_opts */
1589         const unsigned int print_opts = EVSEL__PRINT_SYM |
1590                                         EVSEL__PRINT_DSO |
1591                                         EVSEL__PRINT_UNKNOWN_AS_ADDR;
1592
1593         return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1594 }
1595
1596 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1597                            union perf_event *event __maybe_unused,
1598                            struct perf_sample *sample)
1599 {
1600         long ret;
1601         u64 duration = 0;
1602         struct thread *thread;
1603         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1604         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1605         struct thread_trace *ttrace;
1606
1607         if (sc == NULL)
1608                 return -1;
1609
1610         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1611         ttrace = thread__trace(thread, trace->output);
1612         if (ttrace == NULL)
1613                 goto out_put;
1614
1615         if (trace->summary)
1616                 thread__update_stats(ttrace, id, sample);
1617
1618         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1619
1620         if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1621                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1622                 ttrace->filename.pending_open = false;
1623                 ++trace->stats.vfs_getname;
1624         }
1625
1626         ttrace->exit_time = sample->time;
1627
1628         if (ttrace->entry_time) {
1629                 duration = sample->time - ttrace->entry_time;
1630                 if (trace__filter_duration(trace, duration))
1631                         goto out;
1632         } else if (trace->duration_filter)
1633                 goto out;
1634
1635         if (sample->callchain) {
1636                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1637                 if (callchain_ret == 0) {
1638                         if (callchain_cursor.nr < trace->min_stack)
1639                                 goto out;
1640                         callchain_ret = 1;
1641                 }
1642         }
1643
1644         if (trace->summary_only)
1645                 goto out;
1646
1647         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1648
1649         if (ttrace->entry_pending) {
1650                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1651         } else {
1652                 fprintf(trace->output, " ... [");
1653                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1654                 fprintf(trace->output, "]: %s()", sc->name);
1655         }
1656
1657         if (sc->fmt == NULL) {
1658 signed_print:
1659                 fprintf(trace->output, ") = %ld", ret);
1660         } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1661                 char bf[STRERR_BUFSIZE];
1662                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1663                            *e = audit_errno_to_name(-ret);
1664
1665                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1666         } else if (ret == 0 && sc->fmt->timeout)
1667                 fprintf(trace->output, ") = 0 Timeout");
1668         else if (sc->fmt->hexret)
1669                 fprintf(trace->output, ") = %#lx", ret);
1670         else if (sc->fmt->errpid) {
1671                 struct thread *child = machine__find_thread(trace->host, ret, ret);
1672
1673                 if (child != NULL) {
1674                         fprintf(trace->output, ") = %ld", ret);
1675                         if (child->comm_set)
1676                                 fprintf(trace->output, " (%s)", thread__comm_str(child));
1677                         thread__put(child);
1678                 }
1679         } else
1680                 goto signed_print;
1681
1682         fputc('\n', trace->output);
1683
1684         if (callchain_ret > 0)
1685                 trace__fprintf_callchain(trace, sample);
1686         else if (callchain_ret < 0)
1687                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1688 out:
1689         ttrace->entry_pending = false;
1690         err = 0;
1691 out_put:
1692         thread__put(thread);
1693         return err;
1694 }
1695
1696 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1697                               union perf_event *event __maybe_unused,
1698                               struct perf_sample *sample)
1699 {
1700         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1701         struct thread_trace *ttrace;
1702         size_t filename_len, entry_str_len, to_move;
1703         ssize_t remaining_space;
1704         char *pos;
1705         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1706
1707         if (!thread)
1708                 goto out;
1709
1710         ttrace = thread__priv(thread);
1711         if (!ttrace)
1712                 goto out;
1713
1714         filename_len = strlen(filename);
1715
1716         if (ttrace->filename.namelen < filename_len) {
1717                 char *f = realloc(ttrace->filename.name, filename_len + 1);
1718
1719                 if (f == NULL)
1720                                 goto out;
1721
1722                 ttrace->filename.namelen = filename_len;
1723                 ttrace->filename.name = f;
1724         }
1725
1726         strcpy(ttrace->filename.name, filename);
1727         ttrace->filename.pending_open = true;
1728
1729         if (!ttrace->filename.ptr)
1730                 goto out;
1731
1732         entry_str_len = strlen(ttrace->entry_str);
1733         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1734         if (remaining_space <= 0)
1735                 goto out;
1736
1737         if (filename_len > (size_t)remaining_space) {
1738                 filename += filename_len - remaining_space;
1739                 filename_len = remaining_space;
1740         }
1741
1742         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1743         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1744         memmove(pos + filename_len, pos, to_move);
1745         memcpy(pos, filename, filename_len);
1746
1747         ttrace->filename.ptr = 0;
1748         ttrace->filename.entry_str_pos = 0;
1749 out:
1750         return 0;
1751 }
1752
1753 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1754                                      union perf_event *event __maybe_unused,
1755                                      struct perf_sample *sample)
1756 {
1757         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1758         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1759         struct thread *thread = machine__findnew_thread(trace->host,
1760                                                         sample->pid,
1761                                                         sample->tid);
1762         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1763
1764         if (ttrace == NULL)
1765                 goto out_dump;
1766
1767         ttrace->runtime_ms += runtime_ms;
1768         trace->runtime_ms += runtime_ms;
1769         thread__put(thread);
1770         return 0;
1771
1772 out_dump:
1773         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1774                evsel->name,
1775                perf_evsel__strval(evsel, sample, "comm"),
1776                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1777                runtime,
1778                perf_evsel__intval(evsel, sample, "vruntime"));
1779         thread__put(thread);
1780         return 0;
1781 }
1782
1783 static void bpf_output__printer(enum binary_printer_ops op,
1784                                 unsigned int val, void *extra)
1785 {
1786         FILE *output = extra;
1787         unsigned char ch = (unsigned char)val;
1788
1789         switch (op) {
1790         case BINARY_PRINT_CHAR_DATA:
1791                 fprintf(output, "%c", isprint(ch) ? ch : '.');
1792                 break;
1793         case BINARY_PRINT_DATA_BEGIN:
1794         case BINARY_PRINT_LINE_BEGIN:
1795         case BINARY_PRINT_ADDR:
1796         case BINARY_PRINT_NUM_DATA:
1797         case BINARY_PRINT_NUM_PAD:
1798         case BINARY_PRINT_SEP:
1799         case BINARY_PRINT_CHAR_PAD:
1800         case BINARY_PRINT_LINE_END:
1801         case BINARY_PRINT_DATA_END:
1802         default:
1803                 break;
1804         }
1805 }
1806
1807 static void bpf_output__fprintf(struct trace *trace,
1808                                 struct perf_sample *sample)
1809 {
1810         print_binary(sample->raw_data, sample->raw_size, 8,
1811                      bpf_output__printer, trace->output);
1812 }
1813
1814 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1815                                 union perf_event *event __maybe_unused,
1816                                 struct perf_sample *sample)
1817 {
1818         int callchain_ret = 0;
1819
1820         if (sample->callchain) {
1821                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1822                 if (callchain_ret == 0) {
1823                         if (callchain_cursor.nr < trace->min_stack)
1824                                 goto out;
1825                         callchain_ret = 1;
1826                 }
1827         }
1828
1829         trace__printf_interrupted_entry(trace, sample);
1830         trace__fprintf_tstamp(trace, sample->time, trace->output);
1831
1832         if (trace->trace_syscalls)
1833                 fprintf(trace->output, "(         ): ");
1834
1835         fprintf(trace->output, "%s:", evsel->name);
1836
1837         if (perf_evsel__is_bpf_output(evsel)) {
1838                 bpf_output__fprintf(trace, sample);
1839         } else if (evsel->tp_format) {
1840                 event_format__fprintf(evsel->tp_format, sample->cpu,
1841                                       sample->raw_data, sample->raw_size,
1842                                       trace->output);
1843         }
1844
1845         fprintf(trace->output, ")\n");
1846
1847         if (callchain_ret > 0)
1848                 trace__fprintf_callchain(trace, sample);
1849         else if (callchain_ret < 0)
1850                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1851 out:
1852         return 0;
1853 }
1854
1855 static void print_location(FILE *f, struct perf_sample *sample,
1856                            struct addr_location *al,
1857                            bool print_dso, bool print_sym)
1858 {
1859
1860         if ((verbose || print_dso) && al->map)
1861                 fprintf(f, "%s@", al->map->dso->long_name);
1862
1863         if ((verbose || print_sym) && al->sym)
1864                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1865                         al->addr - al->sym->start);
1866         else if (al->map)
1867                 fprintf(f, "0x%" PRIx64, al->addr);
1868         else
1869                 fprintf(f, "0x%" PRIx64, sample->addr);
1870 }
1871
1872 static int trace__pgfault(struct trace *trace,
1873                           struct perf_evsel *evsel,
1874                           union perf_event *event __maybe_unused,
1875                           struct perf_sample *sample)
1876 {
1877         struct thread *thread;
1878         struct addr_location al;
1879         char map_type = 'd';
1880         struct thread_trace *ttrace;
1881         int err = -1;
1882         int callchain_ret = 0;
1883
1884         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1885
1886         if (sample->callchain) {
1887                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1888                 if (callchain_ret == 0) {
1889                         if (callchain_cursor.nr < trace->min_stack)
1890                                 goto out_put;
1891                         callchain_ret = 1;
1892                 }
1893         }
1894
1895         ttrace = thread__trace(thread, trace->output);
1896         if (ttrace == NULL)
1897                 goto out_put;
1898
1899         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1900                 ttrace->pfmaj++;
1901         else
1902                 ttrace->pfmin++;
1903
1904         if (trace->summary_only)
1905                 goto out;
1906
1907         thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1908                               sample->ip, &al);
1909
1910         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1911
1912         fprintf(trace->output, "%sfault [",
1913                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1914                 "maj" : "min");
1915
1916         print_location(trace->output, sample, &al, false, true);
1917
1918         fprintf(trace->output, "] => ");
1919
1920         thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1921                                    sample->addr, &al);
1922
1923         if (!al.map) {
1924                 thread__find_addr_location(thread, sample->cpumode,
1925                                            MAP__FUNCTION, sample->addr, &al);
1926
1927                 if (al.map)
1928                         map_type = 'x';
1929                 else
1930                         map_type = '?';
1931         }
1932
1933         print_location(trace->output, sample, &al, true, false);
1934
1935         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1936
1937         if (callchain_ret > 0)
1938                 trace__fprintf_callchain(trace, sample);
1939         else if (callchain_ret < 0)
1940                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1941 out:
1942         err = 0;
1943 out_put:
1944         thread__put(thread);
1945         return err;
1946 }
1947
1948 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1949 {
1950         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1951             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1952                 return false;
1953
1954         if (trace->pid_list || trace->tid_list)
1955                 return true;
1956
1957         return false;
1958 }
1959
1960 static void trace__set_base_time(struct trace *trace,
1961                                  struct perf_evsel *evsel,
1962                                  struct perf_sample *sample)
1963 {
1964         /*
1965          * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1966          * and don't use sample->time unconditionally, we may end up having
1967          * some other event in the future without PERF_SAMPLE_TIME for good
1968          * reason, i.e. we may not be interested in its timestamps, just in
1969          * it taking place, picking some piece of information when it
1970          * appears in our event stream (vfs_getname comes to mind).
1971          */
1972         if (trace->base_time == 0 && !trace->full_time &&
1973             (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1974                 trace->base_time = sample->time;
1975 }
1976
1977 static int trace__process_sample(struct perf_tool *tool,
1978                                  union perf_event *event,
1979                                  struct perf_sample *sample,
1980                                  struct perf_evsel *evsel,
1981                                  struct machine *machine __maybe_unused)
1982 {
1983         struct trace *trace = container_of(tool, struct trace, tool);
1984         int err = 0;
1985
1986         tracepoint_handler handler = evsel->handler;
1987
1988         if (skip_sample(trace, sample))
1989                 return 0;
1990
1991         trace__set_base_time(trace, evsel, sample);
1992
1993         if (handler) {
1994                 ++trace->nr_events;
1995                 handler(trace, evsel, event, sample);
1996         }
1997
1998         return err;
1999 }
2000
2001 static int parse_target_str(struct trace *trace)
2002 {
2003         if (trace->opts.target.pid) {
2004                 trace->pid_list = intlist__new(trace->opts.target.pid);
2005                 if (trace->pid_list == NULL) {
2006                         pr_err("Error parsing process id string\n");
2007                         return -EINVAL;
2008                 }
2009         }
2010
2011         if (trace->opts.target.tid) {
2012                 trace->tid_list = intlist__new(trace->opts.target.tid);
2013                 if (trace->tid_list == NULL) {
2014                         pr_err("Error parsing thread id string\n");
2015                         return -EINVAL;
2016                 }
2017         }
2018
2019         return 0;
2020 }
2021
2022 static int trace__record(struct trace *trace, int argc, const char **argv)
2023 {
2024         unsigned int rec_argc, i, j;
2025         const char **rec_argv;
2026         const char * const record_args[] = {
2027                 "record",
2028                 "-R",
2029                 "-m", "1024",
2030                 "-c", "1",
2031         };
2032
2033         const char * const sc_args[] = { "-e", };
2034         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2035         const char * const majpf_args[] = { "-e", "major-faults" };
2036         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2037         const char * const minpf_args[] = { "-e", "minor-faults" };
2038         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2039
2040         /* +1 is for the event string below */
2041         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2042                 majpf_args_nr + minpf_args_nr + argc;
2043         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2044
2045         if (rec_argv == NULL)
2046                 return -ENOMEM;
2047
2048         j = 0;
2049         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2050                 rec_argv[j++] = record_args[i];
2051
2052         if (trace->trace_syscalls) {
2053                 for (i = 0; i < sc_args_nr; i++)
2054                         rec_argv[j++] = sc_args[i];
2055
2056                 /* event string may be different for older kernels - e.g., RHEL6 */
2057                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2058                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2059                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2060                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2061                 else {
2062                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2063                         return -1;
2064                 }
2065         }
2066
2067         if (trace->trace_pgfaults & TRACE_PFMAJ)
2068                 for (i = 0; i < majpf_args_nr; i++)
2069                         rec_argv[j++] = majpf_args[i];
2070
2071         if (trace->trace_pgfaults & TRACE_PFMIN)
2072                 for (i = 0; i < minpf_args_nr; i++)
2073                         rec_argv[j++] = minpf_args[i];
2074
2075         for (i = 0; i < (unsigned int)argc; i++)
2076                 rec_argv[j++] = argv[i];
2077
2078         return cmd_record(j, rec_argv, NULL);
2079 }
2080
2081 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2082
2083 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2084 {
2085         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2086
2087         if (IS_ERR(evsel))
2088                 return false;
2089
2090         if (perf_evsel__field(evsel, "pathname") == NULL) {
2091                 perf_evsel__delete(evsel);
2092                 return false;
2093         }
2094
2095         evsel->handler = trace__vfs_getname;
2096         perf_evlist__add(evlist, evsel);
2097         return true;
2098 }
2099
2100 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2101 {
2102         struct perf_evsel *evsel;
2103         struct perf_event_attr attr = {
2104                 .type = PERF_TYPE_SOFTWARE,
2105                 .mmap_data = 1,
2106         };
2107
2108         attr.config = config;
2109         attr.sample_period = 1;
2110
2111         event_attr_init(&attr);
2112
2113         evsel = perf_evsel__new(&attr);
2114         if (evsel)
2115                 evsel->handler = trace__pgfault;
2116
2117         return evsel;
2118 }
2119
2120 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2121 {
2122         const u32 type = event->header.type;
2123         struct perf_evsel *evsel;
2124
2125         if (type != PERF_RECORD_SAMPLE) {
2126                 trace__process_event(trace, trace->host, event, sample);
2127                 return;
2128         }
2129
2130         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2131         if (evsel == NULL) {
2132                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2133                 return;
2134         }
2135
2136         trace__set_base_time(trace, evsel, sample);
2137
2138         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2139             sample->raw_data == NULL) {
2140                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2141                        perf_evsel__name(evsel), sample->tid,
2142                        sample->cpu, sample->raw_size);
2143         } else {
2144                 tracepoint_handler handler = evsel->handler;
2145                 handler(trace, evsel, event, sample);
2146         }
2147 }
2148
2149 static int trace__add_syscall_newtp(struct trace *trace)
2150 {
2151         int ret = -1;
2152         struct perf_evlist *evlist = trace->evlist;
2153         struct perf_evsel *sys_enter, *sys_exit;
2154
2155         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2156         if (sys_enter == NULL)
2157                 goto out;
2158
2159         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2160                 goto out_delete_sys_enter;
2161
2162         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2163         if (sys_exit == NULL)
2164                 goto out_delete_sys_enter;
2165
2166         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2167                 goto out_delete_sys_exit;
2168
2169         perf_evlist__add(evlist, sys_enter);
2170         perf_evlist__add(evlist, sys_exit);
2171
2172         if (callchain_param.enabled && !trace->kernel_syscallchains) {
2173                 /*
2174                  * We're interested only in the user space callchain
2175                  * leading to the syscall, allow overriding that for
2176                  * debugging reasons using --kernel_syscall_callchains
2177                  */
2178                 sys_exit->attr.exclude_callchain_kernel = 1;
2179         }
2180
2181         trace->syscalls.events.sys_enter = sys_enter;
2182         trace->syscalls.events.sys_exit  = sys_exit;
2183
2184         ret = 0;
2185 out:
2186         return ret;
2187
2188 out_delete_sys_exit:
2189         perf_evsel__delete_priv(sys_exit);
2190 out_delete_sys_enter:
2191         perf_evsel__delete_priv(sys_enter);
2192         goto out;
2193 }
2194
2195 static int trace__set_ev_qualifier_filter(struct trace *trace)
2196 {
2197         int err = -1;
2198         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2199                                                 trace->ev_qualifier_ids.nr,
2200                                                 trace->ev_qualifier_ids.entries);
2201
2202         if (filter == NULL)
2203                 goto out_enomem;
2204
2205         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2206                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2207
2208         free(filter);
2209 out:
2210         return err;
2211 out_enomem:
2212         errno = ENOMEM;
2213         goto out;
2214 }
2215
2216 static int trace__run(struct trace *trace, int argc, const char **argv)
2217 {
2218         struct perf_evlist *evlist = trace->evlist;
2219         struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2220         int err = -1, i;
2221         unsigned long before;
2222         const bool forks = argc > 0;
2223         bool draining = false;
2224
2225         trace->live = true;
2226
2227         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2228                 goto out_error_raw_syscalls;
2229
2230         if (trace->trace_syscalls)
2231                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2232
2233         if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2234                 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2235                 if (pgfault_maj == NULL)
2236                         goto out_error_mem;
2237                 perf_evlist__add(evlist, pgfault_maj);
2238         }
2239
2240         if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2241                 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2242                 if (pgfault_min == NULL)
2243                         goto out_error_mem;
2244                 perf_evlist__add(evlist, pgfault_min);
2245         }
2246
2247         if (trace->sched &&
2248             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2249                                    trace__sched_stat_runtime))
2250                 goto out_error_sched_stat_runtime;
2251
2252         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2253         if (err < 0) {
2254                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2255                 goto out_delete_evlist;
2256         }
2257
2258         err = trace__symbols_init(trace, evlist);
2259         if (err < 0) {
2260                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2261                 goto out_delete_evlist;
2262         }
2263
2264         perf_evlist__config(evlist, &trace->opts, NULL);
2265
2266         if (callchain_param.enabled) {
2267                 bool use_identifier = false;
2268
2269                 if (trace->syscalls.events.sys_exit) {
2270                         perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2271                                                      &trace->opts, &callchain_param);
2272                         use_identifier = true;
2273                 }
2274
2275                 if (pgfault_maj) {
2276                         perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2277                         use_identifier = true;
2278                 }
2279
2280                 if (pgfault_min) {
2281                         perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2282                         use_identifier = true;
2283                 }
2284
2285                 if (use_identifier) {
2286                        /*
2287                         * Now we have evsels with different sample_ids, use
2288                         * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2289                         * from a fixed position in each ring buffer record.
2290                         *
2291                         * As of this the changeset introducing this comment, this
2292                         * isn't strictly needed, as the fields that can come before
2293                         * PERF_SAMPLE_ID are all used, but we'll probably disable
2294                         * some of those for things like copying the payload of
2295                         * pointer syscall arguments, and for vfs_getname we don't
2296                         * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2297                         * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2298                         */
2299                         perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2300                         perf_evlist__reset_sample_bit(evlist, ID);
2301                 }
2302         }
2303
2304         signal(SIGCHLD, sig_handler);
2305         signal(SIGINT, sig_handler);
2306
2307         if (forks) {
2308                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2309                                                     argv, false, NULL);
2310                 if (err < 0) {
2311                         fprintf(trace->output, "Couldn't run the workload!\n");
2312                         goto out_delete_evlist;
2313                 }
2314         }
2315
2316         err = perf_evlist__open(evlist);
2317         if (err < 0)
2318                 goto out_error_open;
2319
2320         err = bpf__apply_obj_config();
2321         if (err) {
2322                 char errbuf[BUFSIZ];
2323
2324                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2325                 pr_err("ERROR: Apply config to BPF failed: %s\n",
2326                          errbuf);
2327                 goto out_error_open;
2328         }
2329
2330         /*
2331          * Better not use !target__has_task() here because we need to cover the
2332          * case where no threads were specified in the command line, but a
2333          * workload was, and in that case we will fill in the thread_map when
2334          * we fork the workload in perf_evlist__prepare_workload.
2335          */
2336         if (trace->filter_pids.nr > 0)
2337                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2338         else if (thread_map__pid(evlist->threads, 0) == -1)
2339                 err = perf_evlist__set_filter_pid(evlist, getpid());
2340
2341         if (err < 0)
2342                 goto out_error_mem;
2343
2344         if (trace->ev_qualifier_ids.nr > 0) {
2345                 err = trace__set_ev_qualifier_filter(trace);
2346                 if (err < 0)
2347                         goto out_errno;
2348
2349                 pr_debug("event qualifier tracepoint filter: %s\n",
2350                          trace->syscalls.events.sys_exit->filter);
2351         }
2352
2353         err = perf_evlist__apply_filters(evlist, &evsel);
2354         if (err < 0)
2355                 goto out_error_apply_filters;
2356
2357         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2358         if (err < 0)
2359                 goto out_error_mmap;
2360
2361         if (!target__none(&trace->opts.target))
2362                 perf_evlist__enable(evlist);
2363
2364         if (forks)
2365                 perf_evlist__start_workload(evlist);
2366
2367         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2368                                   evlist->threads->nr > 1 ||
2369                                   perf_evlist__first(evlist)->attr.inherit;
2370 again:
2371         before = trace->nr_events;
2372
2373         for (i = 0; i < evlist->nr_mmaps; i++) {
2374                 union perf_event *event;
2375
2376                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2377                         struct perf_sample sample;
2378
2379                         ++trace->nr_events;
2380
2381                         err = perf_evlist__parse_sample(evlist, event, &sample);
2382                         if (err) {
2383                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2384                                 goto next_event;
2385                         }
2386
2387                         trace__handle_event(trace, event, &sample);
2388 next_event:
2389                         perf_evlist__mmap_consume(evlist, i);
2390
2391                         if (interrupted)
2392                                 goto out_disable;
2393
2394                         if (done && !draining) {
2395                                 perf_evlist__disable(evlist);
2396                                 draining = true;
2397                         }
2398                 }
2399         }
2400
2401         if (trace->nr_events == before) {
2402                 int timeout = done ? 100 : -1;
2403
2404                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2405                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2406                                 draining = true;
2407
2408                         goto again;
2409                 }
2410         } else {
2411                 goto again;
2412         }
2413
2414 out_disable:
2415         thread__zput(trace->current);
2416
2417         perf_evlist__disable(evlist);
2418
2419         if (!err) {
2420                 if (trace->summary)
2421                         trace__fprintf_thread_summary(trace, trace->output);
2422
2423                 if (trace->show_tool_stats) {
2424                         fprintf(trace->output, "Stats:\n "
2425                                                " vfs_getname : %" PRIu64 "\n"
2426                                                " proc_getname: %" PRIu64 "\n",
2427                                 trace->stats.vfs_getname,
2428                                 trace->stats.proc_getname);
2429                 }
2430         }
2431
2432 out_delete_evlist:
2433         perf_evlist__delete(evlist);
2434         trace->evlist = NULL;
2435         trace->live = false;
2436         return err;
2437 {
2438         char errbuf[BUFSIZ];
2439
2440 out_error_sched_stat_runtime:
2441         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2442         goto out_error;
2443
2444 out_error_raw_syscalls:
2445         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2446         goto out_error;
2447
2448 out_error_mmap:
2449         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2450         goto out_error;
2451
2452 out_error_open:
2453         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2454
2455 out_error:
2456         fprintf(trace->output, "%s\n", errbuf);
2457         goto out_delete_evlist;
2458
2459 out_error_apply_filters:
2460         fprintf(trace->output,
2461                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2462                 evsel->filter, perf_evsel__name(evsel), errno,
2463                 strerror_r(errno, errbuf, sizeof(errbuf)));
2464         goto out_delete_evlist;
2465 }
2466 out_error_mem:
2467         fprintf(trace->output, "Not enough memory to run!\n");
2468         goto out_delete_evlist;
2469
2470 out_errno:
2471         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2472         goto out_delete_evlist;
2473 }
2474
2475 static int trace__replay(struct trace *trace)
2476 {
2477         const struct perf_evsel_str_handler handlers[] = {
2478                 { "probe:vfs_getname",       trace__vfs_getname, },
2479         };
2480         struct perf_data_file file = {
2481                 .path  = input_name,
2482                 .mode  = PERF_DATA_MODE_READ,
2483                 .force = trace->force,
2484         };
2485         struct perf_session *session;
2486         struct perf_evsel *evsel;
2487         int err = -1;
2488
2489         trace->tool.sample        = trace__process_sample;
2490         trace->tool.mmap          = perf_event__process_mmap;
2491         trace->tool.mmap2         = perf_event__process_mmap2;
2492         trace->tool.comm          = perf_event__process_comm;
2493         trace->tool.exit          = perf_event__process_exit;
2494         trace->tool.fork          = perf_event__process_fork;
2495         trace->tool.attr          = perf_event__process_attr;
2496         trace->tool.tracing_data = perf_event__process_tracing_data;
2497         trace->tool.build_id      = perf_event__process_build_id;
2498
2499         trace->tool.ordered_events = true;
2500         trace->tool.ordering_requires_timestamps = true;
2501
2502         /* add tid to output */
2503         trace->multiple_threads = true;
2504
2505         session = perf_session__new(&file, false, &trace->tool);
2506         if (session == NULL)
2507                 return -1;
2508
2509         if (symbol__init(&session->header.env) < 0)
2510                 goto out;
2511
2512         trace->host = &session->machines.host;
2513
2514         err = perf_session__set_tracepoints_handlers(session, handlers);
2515         if (err)
2516                 goto out;
2517
2518         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2519                                                      "raw_syscalls:sys_enter");
2520         /* older kernels have syscalls tp versus raw_syscalls */
2521         if (evsel == NULL)
2522                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2523                                                              "syscalls:sys_enter");
2524
2525         if (evsel &&
2526             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2527             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2528                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2529                 goto out;
2530         }
2531
2532         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2533                                                      "raw_syscalls:sys_exit");
2534         if (evsel == NULL)
2535                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2536                                                              "syscalls:sys_exit");
2537         if (evsel &&
2538             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2539             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2540                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2541                 goto out;
2542         }
2543
2544         evlist__for_each(session->evlist, evsel) {
2545                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2546                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2547                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2548                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2549                         evsel->handler = trace__pgfault;
2550         }
2551
2552         err = parse_target_str(trace);
2553         if (err != 0)
2554                 goto out;
2555
2556         setup_pager();
2557
2558         err = perf_session__process_events(session);
2559         if (err)
2560                 pr_err("Failed to process events, error %d", err);
2561
2562         else if (trace->summary)
2563                 trace__fprintf_thread_summary(trace, trace->output);
2564
2565 out:
2566         perf_session__delete(session);
2567
2568         return err;
2569 }
2570
2571 static size_t trace__fprintf_threads_header(FILE *fp)
2572 {
2573         size_t printed;
2574
2575         printed  = fprintf(fp, "\n Summary of events:\n\n");
2576
2577         return printed;
2578 }
2579
2580 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2581         struct stats    *stats;
2582         double          msecs;
2583         int             syscall;
2584 )
2585 {
2586         struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2587         struct stats *stats = source->priv;
2588
2589         entry->syscall = source->i;
2590         entry->stats   = stats;
2591         entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2592 }
2593
2594 static size_t thread__dump_stats(struct thread_trace *ttrace,
2595                                  struct trace *trace, FILE *fp)
2596 {
2597         size_t printed = 0;
2598         struct syscall *sc;
2599         struct rb_node *nd;
2600         DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2601
2602         if (syscall_stats == NULL)
2603                 return 0;
2604
2605         printed += fprintf(fp, "\n");
2606
2607         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2608         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2609         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2610
2611         resort_rb__for_each(nd, syscall_stats) {
2612                 struct stats *stats = syscall_stats_entry->stats;
2613                 if (stats) {
2614                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2615                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2616                         double avg = avg_stats(stats);
2617                         double pct;
2618                         u64 n = (u64) stats->n;
2619
2620                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2621                         avg /= NSEC_PER_MSEC;
2622
2623                         sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2624                         printed += fprintf(fp, "   %-15s", sc->name);
2625                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2626                                            n, syscall_stats_entry->msecs, min, avg);
2627                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2628                 }
2629         }
2630
2631         resort_rb__delete(syscall_stats);
2632         printed += fprintf(fp, "\n\n");
2633
2634         return printed;
2635 }
2636
2637 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2638 {
2639         size_t printed = 0;
2640         struct thread_trace *ttrace = thread__priv(thread);
2641         double ratio;
2642
2643         if (ttrace == NULL)
2644                 return 0;
2645
2646         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2647
2648         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2649         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2650         printed += fprintf(fp, "%.1f%%", ratio);
2651         if (ttrace->pfmaj)
2652                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2653         if (ttrace->pfmin)
2654                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2655         if (trace->sched)
2656                 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2657         else if (fputc('\n', fp) != EOF)
2658                 ++printed;
2659
2660         printed += thread__dump_stats(ttrace, trace, fp);
2661
2662         return printed;
2663 }
2664
2665 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2666 {
2667         return ttrace ? ttrace->nr_events : 0;
2668 }
2669
2670 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2671         struct thread *thread;
2672 )
2673 {
2674         entry->thread = rb_entry(nd, struct thread, rb_node);
2675 }
2676
2677 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2678 {
2679         DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2680         size_t printed = trace__fprintf_threads_header(fp);
2681         struct rb_node *nd;
2682
2683         if (threads == NULL) {
2684                 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2685                 return 0;
2686         }
2687
2688         resort_rb__for_each(nd, threads)
2689                 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2690
2691         resort_rb__delete(threads);
2692
2693         return printed;
2694 }
2695
2696 static int trace__set_duration(const struct option *opt, const char *str,
2697                                int unset __maybe_unused)
2698 {
2699         struct trace *trace = opt->value;
2700
2701         trace->duration_filter = atof(str);
2702         return 0;
2703 }
2704
2705 static int trace__set_filter_pids(const struct option *opt, const char *str,
2706                                   int unset __maybe_unused)
2707 {
2708         int ret = -1;
2709         size_t i;
2710         struct trace *trace = opt->value;
2711         /*
2712          * FIXME: introduce a intarray class, plain parse csv and create a
2713          * { int nr, int entries[] } struct...
2714          */
2715         struct intlist *list = intlist__new(str);
2716
2717         if (list == NULL)
2718                 return -1;
2719
2720         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2721         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2722
2723         if (trace->filter_pids.entries == NULL)
2724                 goto out;
2725
2726         trace->filter_pids.entries[0] = getpid();
2727
2728         for (i = 1; i < trace->filter_pids.nr; ++i)
2729                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2730
2731         intlist__delete(list);
2732         ret = 0;
2733 out:
2734         return ret;
2735 }
2736
2737 static int trace__open_output(struct trace *trace, const char *filename)
2738 {
2739         struct stat st;
2740
2741         if (!stat(filename, &st) && st.st_size) {
2742                 char oldname[PATH_MAX];
2743
2744                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2745                 unlink(oldname);
2746                 rename(filename, oldname);
2747         }
2748
2749         trace->output = fopen(filename, "w");
2750
2751         return trace->output == NULL ? -errno : 0;
2752 }
2753
2754 static int parse_pagefaults(const struct option *opt, const char *str,
2755                             int unset __maybe_unused)
2756 {
2757         int *trace_pgfaults = opt->value;
2758
2759         if (strcmp(str, "all") == 0)
2760                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2761         else if (strcmp(str, "maj") == 0)
2762                 *trace_pgfaults |= TRACE_PFMAJ;
2763         else if (strcmp(str, "min") == 0)
2764                 *trace_pgfaults |= TRACE_PFMIN;
2765         else
2766                 return -1;
2767
2768         return 0;
2769 }
2770
2771 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2772 {
2773         struct perf_evsel *evsel;
2774
2775         evlist__for_each(evlist, evsel)
2776                 evsel->handler = handler;
2777 }
2778
2779 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2780 {
2781         const char *trace_usage[] = {
2782                 "perf trace [<options>] [<command>]",
2783                 "perf trace [<options>] -- <command> [<options>]",
2784                 "perf trace record [<options>] [<command>]",
2785                 "perf trace record [<options>] -- <command> [<options>]",
2786                 NULL
2787         };
2788         struct trace trace = {
2789                 .syscalls = {
2790                         . max = -1,
2791                 },
2792                 .opts = {
2793                         .target = {
2794                                 .uid       = UINT_MAX,
2795                                 .uses_mmap = true,
2796                         },
2797                         .user_freq     = UINT_MAX,
2798                         .user_interval = ULLONG_MAX,
2799                         .no_buffering  = true,
2800                         .mmap_pages    = UINT_MAX,
2801                         .proc_map_timeout  = 500,
2802                 },
2803                 .output = stderr,
2804                 .show_comm = true,
2805                 .trace_syscalls = true,
2806                 .kernel_syscallchains = false,
2807                 .max_stack = UINT_MAX,
2808         };
2809         const char *output_name = NULL;
2810         const char *ev_qualifier_str = NULL;
2811         const struct option trace_options[] = {
2812         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2813                      "event selector. use 'perf list' to list available events",
2814                      parse_events_option),
2815         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2816                     "show the thread COMM next to its id"),
2817         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2818         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2819         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2820         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2821         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2822                     "trace events on existing process id"),
2823         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2824                     "trace events on existing thread id"),
2825         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2826                      "pids to filter (by the kernel)", trace__set_filter_pids),
2827         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2828                     "system-wide collection from all CPUs"),
2829         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2830                     "list of cpus to monitor"),
2831         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2832                     "child tasks do not inherit counters"),
2833         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2834                      "number of mmap data pages",
2835                      perf_evlist__parse_mmap_pages),
2836         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2837                    "user to profile"),
2838         OPT_CALLBACK(0, "duration", &trace, "float",
2839                      "show only events with duration > N.M ms",
2840                      trace__set_duration),
2841         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2842         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2843         OPT_BOOLEAN('T', "time", &trace.full_time,
2844                     "Show full timestamp, not time relative to first start"),
2845         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2846                     "Show only syscall summary with statistics"),
2847         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2848                     "Show all syscalls and summary with statistics"),
2849         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2850                      "Trace pagefaults", parse_pagefaults, "maj"),
2851         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2852         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2853         OPT_CALLBACK(0, "call-graph", &trace.opts,
2854                      "record_mode[,record_size]", record_callchain_help,
2855                      &record_parse_callchain_opt),
2856         OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2857                     "Show the kernel callchains on the syscall exit path"),
2858         OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2859                      "Set the minimum stack depth when parsing the callchain, "
2860                      "anything below the specified depth will be ignored."),
2861         OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2862                      "Set the maximum stack depth when parsing the callchain, "
2863                      "anything beyond the specified depth will be ignored. "
2864                      "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2865         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2866                         "per thread proc mmap processing timeout in ms"),
2867         OPT_END()
2868         };
2869         bool __maybe_unused max_stack_user_set = true;
2870         bool mmap_pages_user_set = true;
2871         const char * const trace_subcommands[] = { "record", NULL };
2872         int err;
2873         char bf[BUFSIZ];
2874
2875         signal(SIGSEGV, sighandler_dump_stack);
2876         signal(SIGFPE, sighandler_dump_stack);
2877
2878         trace.evlist = perf_evlist__new();
2879         trace.sctbl = syscalltbl__new();
2880
2881         if (trace.evlist == NULL || trace.sctbl == NULL) {
2882                 pr_err("Not enough memory to run!\n");
2883                 err = -ENOMEM;
2884                 goto out;
2885         }
2886
2887         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2888                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2889
2890         err = bpf__setup_stdout(trace.evlist);
2891         if (err) {
2892                 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2893                 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2894                 goto out;
2895         }
2896
2897         err = -1;
2898
2899         if (trace.trace_pgfaults) {
2900                 trace.opts.sample_address = true;
2901                 trace.opts.sample_time = true;
2902         }
2903
2904         if (trace.opts.mmap_pages == UINT_MAX)
2905                 mmap_pages_user_set = false;
2906
2907         if (trace.max_stack == UINT_MAX) {
2908                 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2909                 max_stack_user_set = false;
2910         }
2911
2912 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2913         if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled)
2914                 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2915 #endif
2916
2917         if (callchain_param.enabled) {
2918                 if (!mmap_pages_user_set && geteuid() == 0)
2919                         trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2920
2921                 symbol_conf.use_callchain = true;
2922         }
2923
2924         if (trace.evlist->nr_entries > 0)
2925                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2926
2927         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2928                 return trace__record(&trace, argc-1, &argv[1]);
2929
2930         /* summary_only implies summary option, but don't overwrite summary if set */
2931         if (trace.summary_only)
2932                 trace.summary = trace.summary_only;
2933
2934         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2935             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2936                 pr_err("Please specify something to trace.\n");
2937                 return -1;
2938         }
2939
2940         if (!trace.trace_syscalls && ev_qualifier_str) {
2941                 pr_err("The -e option can't be used with --no-syscalls.\n");
2942                 goto out;
2943         }
2944
2945         if (output_name != NULL) {
2946                 err = trace__open_output(&trace, output_name);
2947                 if (err < 0) {
2948                         perror("failed to create output file");
2949                         goto out;
2950                 }
2951         }
2952
2953         trace.open_id = syscalltbl__id(trace.sctbl, "open");
2954
2955         if (ev_qualifier_str != NULL) {
2956                 const char *s = ev_qualifier_str;
2957                 struct strlist_config slist_config = {
2958                         .dirname = system_path(STRACE_GROUPS_DIR),
2959                 };
2960
2961                 trace.not_ev_qualifier = *s == '!';
2962                 if (trace.not_ev_qualifier)
2963                         ++s;
2964                 trace.ev_qualifier = strlist__new(s, &slist_config);
2965                 if (trace.ev_qualifier == NULL) {
2966                         fputs("Not enough memory to parse event qualifier",
2967                               trace.output);
2968                         err = -ENOMEM;
2969                         goto out_close;
2970                 }
2971
2972                 err = trace__validate_ev_qualifier(&trace);
2973                 if (err)
2974                         goto out_close;
2975         }
2976
2977         err = target__validate(&trace.opts.target);
2978         if (err) {
2979                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2980                 fprintf(trace.output, "%s", bf);
2981                 goto out_close;
2982         }
2983
2984         err = target__parse_uid(&trace.opts.target);
2985         if (err) {
2986                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2987                 fprintf(trace.output, "%s", bf);
2988                 goto out_close;
2989         }
2990
2991         if (!argc && target__none(&trace.opts.target))
2992                 trace.opts.target.system_wide = true;
2993
2994         if (input_name)
2995                 err = trace__replay(&trace);
2996         else
2997                 err = trace__run(&trace, argc, argv);
2998
2999 out_close:
3000         if (output_name != NULL)
3001                 fclose(trace.output);
3002 out:
3003         return err;
3004 }