Merge tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
[linux-2.6-block.git] / tools / perf / builtin-trace.c
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <sys/ptrace.h>
47 #include <linux/random.h>
48 #include <linux/stringify.h>
49
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC              02000000
52 #endif
53
54 struct trace {
55         struct perf_tool        tool;
56         struct syscalltbl       *sctbl;
57         struct {
58                 int             max;
59                 struct syscall  *table;
60                 struct {
61                         struct perf_evsel *sys_enter,
62                                           *sys_exit;
63                 }               events;
64         } syscalls;
65         struct record_opts      opts;
66         struct perf_evlist      *evlist;
67         struct machine          *host;
68         struct thread           *current;
69         u64                     base_time;
70         FILE                    *output;
71         unsigned long           nr_events;
72         struct strlist          *ev_qualifier;
73         struct {
74                 size_t          nr;
75                 int             *entries;
76         }                       ev_qualifier_ids;
77         struct intlist          *tid_list;
78         struct intlist          *pid_list;
79         struct {
80                 size_t          nr;
81                 pid_t           *entries;
82         }                       filter_pids;
83         double                  duration_filter;
84         double                  runtime_ms;
85         struct {
86                 u64             vfs_getname,
87                                 proc_getname;
88         } stats;
89         unsigned int            max_stack;
90         unsigned int            min_stack;
91         bool                    not_ev_qualifier;
92         bool                    live;
93         bool                    full_time;
94         bool                    sched;
95         bool                    multiple_threads;
96         bool                    summary;
97         bool                    summary_only;
98         bool                    show_comm;
99         bool                    show_tool_stats;
100         bool                    trace_syscalls;
101         bool                    kernel_syscallchains;
102         bool                    force;
103         bool                    vfs_getname;
104         int                     trace_pgfaults;
105         int                     open_id;
106 };
107
108 struct tp_field {
109         int offset;
110         union {
111                 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112                 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113         };
114 };
115
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119         u##bits value; \
120         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121         return value;  \
122 }
123
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132         u##bits value; \
133         memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134         return bswap_##bits(value);\
135 }
136
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140
141 static int tp_field__init_uint(struct tp_field *field,
142                                struct format_field *format_field,
143                                bool needs_swap)
144 {
145         field->offset = format_field->offset;
146
147         switch (format_field->size) {
148         case 1:
149                 field->integer = tp_field__u8;
150                 break;
151         case 2:
152                 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153                 break;
154         case 4:
155                 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156                 break;
157         case 8:
158                 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159                 break;
160         default:
161                 return -1;
162         }
163
164         return 0;
165 }
166
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169         return sample->raw_data + field->offset;
170 }
171
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174         field->offset = format_field->offset;
175         field->pointer = tp_field__ptr;
176         return 0;
177 }
178
179 struct syscall_tp {
180         struct tp_field id;
181         union {
182                 struct tp_field args, ret;
183         };
184 };
185
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187                                           struct tp_field *field,
188                                           const char *name)
189 {
190         struct format_field *format_field = perf_evsel__field(evsel, name);
191
192         if (format_field == NULL)
193                 return -1;
194
195         return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199         ({ struct syscall_tp *sc = evsel->priv;\
200            perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203                                          struct tp_field *field,
204                                          const char *name)
205 {
206         struct format_field *format_field = perf_evsel__field(evsel, name);
207
208         if (format_field == NULL)
209                 return -1;
210
211         return tp_field__init_ptr(field, format_field);
212 }
213
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215         ({ struct syscall_tp *sc = evsel->priv;\
216            perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220         zfree(&evsel->priv);
221         perf_evsel__delete(evsel);
222 }
223
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226         evsel->priv = malloc(sizeof(struct syscall_tp));
227         if (evsel->priv != NULL) {
228                 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229                         goto out_delete;
230
231                 evsel->handler = handler;
232                 return 0;
233         }
234
235         return -ENOMEM;
236
237 out_delete:
238         zfree(&evsel->priv);
239         return -ENOENT;
240 }
241
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244         struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245
246         /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247         if (IS_ERR(evsel))
248                 evsel = perf_evsel__newtp("syscalls", direction);
249
250         if (IS_ERR(evsel))
251                 return NULL;
252
253         if (perf_evsel__init_syscall_tp(evsel, handler))
254                 goto out_delete;
255
256         return evsel;
257
258 out_delete:
259         perf_evsel__delete_priv(evsel);
260         return NULL;
261 }
262
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264         ({ struct syscall_tp *fields = evsel->priv; \
265            fields->name.integer(&fields->name, sample); })
266
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268         ({ struct syscall_tp *fields = evsel->priv; \
269            fields->name.pointer(&fields->name, sample); })
270
271 struct syscall_arg {
272         unsigned long val;
273         struct thread *thread;
274         struct trace  *trace;
275         void          *parm;
276         u8            idx;
277         u8            mask;
278 };
279
280 struct strarray {
281         int         offset;
282         int         nr_entries;
283         const char **entries;
284 };
285
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287         .nr_entries = ARRAY_SIZE(array), \
288         .entries = array, \
289 }
290
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292         .offset     = off, \
293         .nr_entries = ARRAY_SIZE(array), \
294         .entries = array, \
295 }
296
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298                                                 const char *intfmt,
299                                                 struct syscall_arg *arg)
300 {
301         struct strarray *sa = arg->parm;
302         int idx = arg->val - sa->offset;
303
304         if (idx < 0 || idx >= sa->nr_entries)
305                 return scnprintf(bf, size, intfmt, arg->val);
306
307         return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311                                               struct syscall_arg *arg)
312 {
313         return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320  * FIXME: Make this available to all arches as soon as the ioctl beautifier
321  *        gets rewritten to support all arches.
322  */
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324                                                  struct syscall_arg *arg)
325 {
326         return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333                                         struct syscall_arg *arg);
334
335 #define SCA_FD syscall_arg__scnprintf_fd
336
337 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
338                                            struct syscall_arg *arg)
339 {
340         int fd = arg->val;
341
342         if (fd == AT_FDCWD)
343                 return scnprintf(bf, size, "CWD");
344
345         return syscall_arg__scnprintf_fd(bf, size, arg);
346 }
347
348 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349
350 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
351                                               struct syscall_arg *arg);
352
353 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354
355 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
356                                          struct syscall_arg *arg)
357 {
358         return scnprintf(bf, size, "%#lx", arg->val);
359 }
360
361 #define SCA_HEX syscall_arg__scnprintf_hex
362
363 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
364                                          struct syscall_arg *arg)
365 {
366         return scnprintf(bf, size, "%d", arg->val);
367 }
368
369 #define SCA_INT syscall_arg__scnprintf_int
370
371 static const char *bpf_cmd[] = {
372         "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
373         "MAP_GET_NEXT_KEY", "PROG_LOAD",
374 };
375 static DEFINE_STRARRAY(bpf_cmd);
376
377 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
378 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
379
380 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
381 static DEFINE_STRARRAY(itimers);
382
383 static const char *keyctl_options[] = {
384         "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
385         "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
386         "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
387         "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
388         "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
389 };
390 static DEFINE_STRARRAY(keyctl_options);
391
392 static const char *whences[] = { "SET", "CUR", "END",
393 #ifdef SEEK_DATA
394 "DATA",
395 #endif
396 #ifdef SEEK_HOLE
397 "HOLE",
398 #endif
399 };
400 static DEFINE_STRARRAY(whences);
401
402 static const char *fcntl_cmds[] = {
403         "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
404         "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
405         "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
406         "F_GETOWNER_UIDS",
407 };
408 static DEFINE_STRARRAY(fcntl_cmds);
409
410 static const char *rlimit_resources[] = {
411         "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
412         "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
413         "RTTIME",
414 };
415 static DEFINE_STRARRAY(rlimit_resources);
416
417 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
418 static DEFINE_STRARRAY(sighow);
419
420 static const char *clockid[] = {
421         "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
422         "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
423         "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
424 };
425 static DEFINE_STRARRAY(clockid);
426
427 static const char *socket_families[] = {
428         "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
429         "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
430         "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
431         "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
432         "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
433         "ALG", "NFC", "VSOCK",
434 };
435 static DEFINE_STRARRAY(socket_families);
436
437 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
438                                                  struct syscall_arg *arg)
439 {
440         size_t printed = 0;
441         int mode = arg->val;
442
443         if (mode == F_OK) /* 0 */
444                 return scnprintf(bf, size, "F");
445 #define P_MODE(n) \
446         if (mode & n##_OK) { \
447                 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
448                 mode &= ~n##_OK; \
449         }
450
451         P_MODE(R);
452         P_MODE(W);
453         P_MODE(X);
454 #undef P_MODE
455
456         if (mode)
457                 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
458
459         return printed;
460 }
461
462 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
463
464 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
465                                               struct syscall_arg *arg);
466
467 #define SCA_FILENAME syscall_arg__scnprintf_filename
468
469 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
470                                                 struct syscall_arg *arg)
471 {
472         int printed = 0, flags = arg->val;
473
474 #define P_FLAG(n) \
475         if (flags & O_##n) { \
476                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
477                 flags &= ~O_##n; \
478         }
479
480         P_FLAG(CLOEXEC);
481         P_FLAG(NONBLOCK);
482 #undef P_FLAG
483
484         if (flags)
485                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
486
487         return printed;
488 }
489
490 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
491
492 #if defined(__i386__) || defined(__x86_64__)
493 /*
494  * FIXME: Make this available to all arches.
495  */
496 #define TCGETS          0x5401
497
498 static const char *tioctls[] = {
499         "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
500         "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
501         "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
502         "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
503         "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
504         "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
505         "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
506         "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
507         "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
508         "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
509         "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
510         [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
511         "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
512         "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
513         "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
514 };
515
516 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
517 #endif /* defined(__i386__) || defined(__x86_64__) */
518
519 #ifndef GRND_NONBLOCK
520 #define GRND_NONBLOCK   0x0001
521 #endif
522 #ifndef GRND_RANDOM
523 #define GRND_RANDOM     0x0002
524 #endif
525
526 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
527                                                    struct syscall_arg *arg)
528 {
529         int printed = 0, flags = arg->val;
530
531 #define P_FLAG(n) \
532         if (flags & GRND_##n) { \
533                 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
534                 flags &= ~GRND_##n; \
535         }
536
537         P_FLAG(RANDOM);
538         P_FLAG(NONBLOCK);
539 #undef P_FLAG
540
541         if (flags)
542                 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
543
544         return printed;
545 }
546
547 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548
549 #define STRARRAY(arg, name, array) \
550           .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
551           .arg_parm      = { [arg] = &strarray__##array, }
552
553 #include "trace/beauty/eventfd.c"
554 #include "trace/beauty/flock.c"
555 #include "trace/beauty/futex_op.c"
556 #include "trace/beauty/mmap.c"
557 #include "trace/beauty/mode_t.c"
558 #include "trace/beauty/msg_flags.c"
559 #include "trace/beauty/open_flags.c"
560 #include "trace/beauty/perf_event_open.c"
561 #include "trace/beauty/pid.c"
562 #include "trace/beauty/sched_policy.c"
563 #include "trace/beauty/seccomp.c"
564 #include "trace/beauty/signum.c"
565 #include "trace/beauty/socket_type.c"
566 #include "trace/beauty/waitid_options.c"
567
568 static struct syscall_fmt {
569         const char *name;
570         const char *alias;
571         size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
572         void       *arg_parm[6];
573         bool       errmsg;
574         bool       errpid;
575         bool       timeout;
576         bool       hexret;
577 } syscall_fmts[] = {
578         { .name     = "access",     .errmsg = true,
579           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
580                              [1] = SCA_ACCMODE,  /* mode */ }, },
581         { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
582         { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
583         { .name     = "brk",        .hexret = true,
584           .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
585         { .name     = "chdir",      .errmsg = true,
586           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
587         { .name     = "chmod",      .errmsg = true,
588           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
589         { .name     = "chroot",     .errmsg = true,
590           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
591         { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
592         { .name     = "clone",      .errpid = true, },
593         { .name     = "close",      .errmsg = true,
594           .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
595         { .name     = "connect",    .errmsg = true, },
596         { .name     = "creat",      .errmsg = true,
597           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
598         { .name     = "dup",        .errmsg = true,
599           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
600         { .name     = "dup2",       .errmsg = true,
601           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
602         { .name     = "dup3",       .errmsg = true,
603           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
604         { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
605         { .name     = "eventfd2",   .errmsg = true,
606           .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
607         { .name     = "faccessat",  .errmsg = true,
608           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
609                              [1] = SCA_FILENAME, /* filename */ }, },
610         { .name     = "fadvise64",  .errmsg = true,
611           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
612         { .name     = "fallocate",  .errmsg = true,
613           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
614         { .name     = "fchdir",     .errmsg = true,
615           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
616         { .name     = "fchmod",     .errmsg = true,
617           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
618         { .name     = "fchmodat",   .errmsg = true,
619           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
620                              [1] = SCA_FILENAME, /* filename */ }, },
621         { .name     = "fchown",     .errmsg = true,
622           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
623         { .name     = "fchownat",   .errmsg = true,
624           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
625                              [1] = SCA_FILENAME, /* filename */ }, },
626         { .name     = "fcntl",      .errmsg = true,
627           .arg_scnprintf = { [0] = SCA_FD, /* fd */
628                              [1] = SCA_STRARRAY, /* cmd */ },
629           .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
630         { .name     = "fdatasync",  .errmsg = true,
631           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
632         { .name     = "flock",      .errmsg = true,
633           .arg_scnprintf = { [0] = SCA_FD, /* fd */
634                              [1] = SCA_FLOCK, /* cmd */ }, },
635         { .name     = "fsetxattr",  .errmsg = true,
636           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
637         { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
638           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
639         { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
640           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
641                              [1] = SCA_FILENAME, /* filename */ }, },
642         { .name     = "fstatfs",    .errmsg = true,
643           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
644         { .name     = "fsync",    .errmsg = true,
645           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
646         { .name     = "ftruncate", .errmsg = true,
647           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
648         { .name     = "futex",      .errmsg = true,
649           .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
650         { .name     = "futimesat", .errmsg = true,
651           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
652                              [1] = SCA_FILENAME, /* filename */ }, },
653         { .name     = "getdents",   .errmsg = true,
654           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
655         { .name     = "getdents64", .errmsg = true,
656           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
657         { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
658         { .name     = "getpid",     .errpid = true, },
659         { .name     = "getpgid",    .errpid = true, },
660         { .name     = "getppid",    .errpid = true, },
661         { .name     = "getrandom",  .errmsg = true,
662           .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
663         { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
664         { .name     = "getxattr",    .errmsg = true,
665           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
666         { .name     = "inotify_add_watch",          .errmsg = true,
667           .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
668         { .name     = "ioctl",      .errmsg = true,
669           .arg_scnprintf = { [0] = SCA_FD, /* fd */
670 #if defined(__i386__) || defined(__x86_64__)
671 /*
672  * FIXME: Make this available to all arches.
673  */
674                              [1] = SCA_STRHEXARRAY, /* cmd */
675                              [2] = SCA_HEX, /* arg */ },
676           .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
677 #else
678                              [2] = SCA_HEX, /* arg */ }, },
679 #endif
680         { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
681         { .name     = "kill",       .errmsg = true,
682           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
683         { .name     = "lchown",    .errmsg = true,
684           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
685         { .name     = "lgetxattr",  .errmsg = true,
686           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
687         { .name     = "linkat",     .errmsg = true,
688           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
689         { .name     = "listxattr",  .errmsg = true,
690           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
691         { .name     = "llistxattr", .errmsg = true,
692           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
693         { .name     = "lremovexattr",  .errmsg = true,
694           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
695         { .name     = "lseek",      .errmsg = true,
696           .arg_scnprintf = { [0] = SCA_FD, /* fd */
697                              [2] = SCA_STRARRAY, /* whence */ },
698           .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
699         { .name     = "lsetxattr",  .errmsg = true,
700           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
701         { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
702           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
703         { .name     = "lsxattr",    .errmsg = true,
704           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
705         { .name     = "madvise",    .errmsg = true,
706           .arg_scnprintf = { [0] = SCA_HEX,      /* start */
707                              [2] = SCA_MADV_BHV, /* behavior */ }, },
708         { .name     = "mkdir",    .errmsg = true,
709           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
710         { .name     = "mkdirat",    .errmsg = true,
711           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
712                              [1] = SCA_FILENAME, /* pathname */ }, },
713         { .name     = "mknod",      .errmsg = true,
714           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
715         { .name     = "mknodat",    .errmsg = true,
716           .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
717                              [1] = SCA_FILENAME, /* filename */ }, },
718         { .name     = "mlock",      .errmsg = true,
719           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
720         { .name     = "mlockall",   .errmsg = true,
721           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
722         { .name     = "mmap",       .hexret = true,
723           .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
724                              [2] = SCA_MMAP_PROT, /* prot */
725                              [3] = SCA_MMAP_FLAGS, /* flags */
726                              [4] = SCA_FD,        /* fd */ }, },
727         { .name     = "mprotect",   .errmsg = true,
728           .arg_scnprintf = { [0] = SCA_HEX, /* start */
729                              [2] = SCA_MMAP_PROT, /* prot */ }, },
730         { .name     = "mq_unlink", .errmsg = true,
731           .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
732         { .name     = "mremap",     .hexret = true,
733           .arg_scnprintf = { [0] = SCA_HEX, /* addr */
734                              [3] = SCA_MREMAP_FLAGS, /* flags */
735                              [4] = SCA_HEX, /* new_addr */ }, },
736         { .name     = "munlock",    .errmsg = true,
737           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
738         { .name     = "munmap",     .errmsg = true,
739           .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
740         { .name     = "name_to_handle_at", .errmsg = true,
741           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
742         { .name     = "newfstatat", .errmsg = true,
743           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
744                              [1] = SCA_FILENAME, /* filename */ }, },
745         { .name     = "open",       .errmsg = true,
746           .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
747                              [1] = SCA_OPEN_FLAGS, /* flags */ }, },
748         { .name     = "open_by_handle_at", .errmsg = true,
749           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
750                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
751         { .name     = "openat",     .errmsg = true,
752           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
753                              [1] = SCA_FILENAME, /* filename */
754                              [2] = SCA_OPEN_FLAGS, /* flags */ }, },
755         { .name     = "perf_event_open", .errmsg = true,
756           .arg_scnprintf = { [2] = SCA_INT, /* cpu */
757                              [3] = SCA_FD,  /* group_fd */
758                              [4] = SCA_PERF_FLAGS,  /* flags */ }, },
759         { .name     = "pipe2",      .errmsg = true,
760           .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
761         { .name     = "poll",       .errmsg = true, .timeout = true, },
762         { .name     = "ppoll",      .errmsg = true, .timeout = true, },
763         { .name     = "pread",      .errmsg = true, .alias = "pread64",
764           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
765         { .name     = "preadv",     .errmsg = true, .alias = "pread",
766           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
767         { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
768         { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
769           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
770         { .name     = "pwritev",    .errmsg = true,
771           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
772         { .name     = "read",       .errmsg = true,
773           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
774         { .name     = "readlink",   .errmsg = true,
775           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
776         { .name     = "readlinkat", .errmsg = true,
777           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
778                              [1] = SCA_FILENAME, /* pathname */ }, },
779         { .name     = "readv",      .errmsg = true,
780           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
781         { .name     = "recvfrom",   .errmsg = true,
782           .arg_scnprintf = { [0] = SCA_FD, /* fd */
783                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
784         { .name     = "recvmmsg",   .errmsg = true,
785           .arg_scnprintf = { [0] = SCA_FD, /* fd */
786                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
787         { .name     = "recvmsg",    .errmsg = true,
788           .arg_scnprintf = { [0] = SCA_FD, /* fd */
789                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
790         { .name     = "removexattr", .errmsg = true,
791           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
792         { .name     = "renameat",   .errmsg = true,
793           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
794         { .name     = "rmdir",    .errmsg = true,
795           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
796         { .name     = "rt_sigaction", .errmsg = true,
797           .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
798         { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
799         { .name     = "rt_sigqueueinfo", .errmsg = true,
800           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
801         { .name     = "rt_tgsigqueueinfo", .errmsg = true,
802           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
803         { .name     = "sched_setscheduler",   .errmsg = true,
804           .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
805         { .name     = "seccomp", .errmsg = true,
806           .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
807                              [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
808         { .name     = "select",     .errmsg = true, .timeout = true, },
809         { .name     = "sendmmsg",    .errmsg = true,
810           .arg_scnprintf = { [0] = SCA_FD, /* fd */
811                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
812         { .name     = "sendmsg",    .errmsg = true,
813           .arg_scnprintf = { [0] = SCA_FD, /* fd */
814                              [2] = SCA_MSG_FLAGS, /* flags */ }, },
815         { .name     = "sendto",     .errmsg = true,
816           .arg_scnprintf = { [0] = SCA_FD, /* fd */
817                              [3] = SCA_MSG_FLAGS, /* flags */ }, },
818         { .name     = "set_tid_address", .errpid = true, },
819         { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
820         { .name     = "setpgid",    .errmsg = true, },
821         { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
822         { .name     = "setxattr",   .errmsg = true,
823           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
824         { .name     = "shutdown",   .errmsg = true,
825           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
826         { .name     = "socket",     .errmsg = true,
827           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
828                              [1] = SCA_SK_TYPE, /* type */ },
829           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
830         { .name     = "socketpair", .errmsg = true,
831           .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
832                              [1] = SCA_SK_TYPE, /* type */ },
833           .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
834         { .name     = "stat",       .errmsg = true, .alias = "newstat",
835           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
836         { .name     = "statfs",     .errmsg = true,
837           .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
838         { .name     = "swapoff",    .errmsg = true,
839           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
840         { .name     = "swapon",     .errmsg = true,
841           .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
842         { .name     = "symlinkat",  .errmsg = true,
843           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
844         { .name     = "tgkill",     .errmsg = true,
845           .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
846         { .name     = "tkill",      .errmsg = true,
847           .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
848         { .name     = "truncate",   .errmsg = true,
849           .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
850         { .name     = "uname",      .errmsg = true, .alias = "newuname", },
851         { .name     = "unlinkat",   .errmsg = true,
852           .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
853                              [1] = SCA_FILENAME, /* pathname */ }, },
854         { .name     = "utime",  .errmsg = true,
855           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
856         { .name     = "utimensat",  .errmsg = true,
857           .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
858                              [1] = SCA_FILENAME, /* filename */ }, },
859         { .name     = "utimes",  .errmsg = true,
860           .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
861         { .name     = "vmsplice",  .errmsg = true,
862           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863         { .name     = "wait4",      .errpid = true,
864           .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
865         { .name     = "waitid",     .errpid = true,
866           .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
867         { .name     = "write",      .errmsg = true,
868           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
869         { .name     = "writev",     .errmsg = true,
870           .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
871 };
872
873 static int syscall_fmt__cmp(const void *name, const void *fmtp)
874 {
875         const struct syscall_fmt *fmt = fmtp;
876         return strcmp(name, fmt->name);
877 }
878
879 static struct syscall_fmt *syscall_fmt__find(const char *name)
880 {
881         const int nmemb = ARRAY_SIZE(syscall_fmts);
882         return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
883 }
884
885 struct syscall {
886         struct event_format *tp_format;
887         int                 nr_args;
888         struct format_field *args;
889         const char          *name;
890         bool                is_exit;
891         struct syscall_fmt  *fmt;
892         size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
893         void                **arg_parm;
894 };
895
896 static size_t fprintf_duration(unsigned long t, FILE *fp)
897 {
898         double duration = (double)t / NSEC_PER_MSEC;
899         size_t printed = fprintf(fp, "(");
900
901         if (duration >= 1.0)
902                 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
903         else if (duration >= 0.01)
904                 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
905         else
906                 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
907         return printed + fprintf(fp, "): ");
908 }
909
910 /**
911  * filename.ptr: The filename char pointer that will be vfs_getname'd
912  * filename.entry_str_pos: Where to insert the string translated from
913  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
914  */
915 struct thread_trace {
916         u64               entry_time;
917         u64               exit_time;
918         bool              entry_pending;
919         unsigned long     nr_events;
920         unsigned long     pfmaj, pfmin;
921         char              *entry_str;
922         double            runtime_ms;
923         struct {
924                 unsigned long ptr;
925                 short int     entry_str_pos;
926                 bool          pending_open;
927                 unsigned int  namelen;
928                 char          *name;
929         } filename;
930         struct {
931                 int       max;
932                 char      **table;
933         } paths;
934
935         struct intlist *syscall_stats;
936 };
937
938 static struct thread_trace *thread_trace__new(void)
939 {
940         struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
941
942         if (ttrace)
943                 ttrace->paths.max = -1;
944
945         ttrace->syscall_stats = intlist__new(NULL);
946
947         return ttrace;
948 }
949
950 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
951 {
952         struct thread_trace *ttrace;
953
954         if (thread == NULL)
955                 goto fail;
956
957         if (thread__priv(thread) == NULL)
958                 thread__set_priv(thread, thread_trace__new());
959
960         if (thread__priv(thread) == NULL)
961                 goto fail;
962
963         ttrace = thread__priv(thread);
964         ++ttrace->nr_events;
965
966         return ttrace;
967 fail:
968         color_fprintf(fp, PERF_COLOR_RED,
969                       "WARNING: not enough memory, dropping samples!\n");
970         return NULL;
971 }
972
973 #define TRACE_PFMAJ             (1 << 0)
974 #define TRACE_PFMIN             (1 << 1)
975
976 static const size_t trace__entry_str_size = 2048;
977
978 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
979 {
980         struct thread_trace *ttrace = thread__priv(thread);
981
982         if (fd > ttrace->paths.max) {
983                 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
984
985                 if (npath == NULL)
986                         return -1;
987
988                 if (ttrace->paths.max != -1) {
989                         memset(npath + ttrace->paths.max + 1, 0,
990                                (fd - ttrace->paths.max) * sizeof(char *));
991                 } else {
992                         memset(npath, 0, (fd + 1) * sizeof(char *));
993                 }
994
995                 ttrace->paths.table = npath;
996                 ttrace->paths.max   = fd;
997         }
998
999         ttrace->paths.table[fd] = strdup(pathname);
1000
1001         return ttrace->paths.table[fd] != NULL ? 0 : -1;
1002 }
1003
1004 static int thread__read_fd_path(struct thread *thread, int fd)
1005 {
1006         char linkname[PATH_MAX], pathname[PATH_MAX];
1007         struct stat st;
1008         int ret;
1009
1010         if (thread->pid_ == thread->tid) {
1011                 scnprintf(linkname, sizeof(linkname),
1012                           "/proc/%d/fd/%d", thread->pid_, fd);
1013         } else {
1014                 scnprintf(linkname, sizeof(linkname),
1015                           "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1016         }
1017
1018         if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1019                 return -1;
1020
1021         ret = readlink(linkname, pathname, sizeof(pathname));
1022
1023         if (ret < 0 || ret > st.st_size)
1024                 return -1;
1025
1026         pathname[ret] = '\0';
1027         return trace__set_fd_pathname(thread, fd, pathname);
1028 }
1029
1030 static const char *thread__fd_path(struct thread *thread, int fd,
1031                                    struct trace *trace)
1032 {
1033         struct thread_trace *ttrace = thread__priv(thread);
1034
1035         if (ttrace == NULL)
1036                 return NULL;
1037
1038         if (fd < 0)
1039                 return NULL;
1040
1041         if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1042                 if (!trace->live)
1043                         return NULL;
1044                 ++trace->stats.proc_getname;
1045                 if (thread__read_fd_path(thread, fd))
1046                         return NULL;
1047         }
1048
1049         return ttrace->paths.table[fd];
1050 }
1051
1052 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1053                                         struct syscall_arg *arg)
1054 {
1055         int fd = arg->val;
1056         size_t printed = scnprintf(bf, size, "%d", fd);
1057         const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1058
1059         if (path)
1060                 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1061
1062         return printed;
1063 }
1064
1065 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1066                                               struct syscall_arg *arg)
1067 {
1068         int fd = arg->val;
1069         size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1070         struct thread_trace *ttrace = thread__priv(arg->thread);
1071
1072         if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1073                 zfree(&ttrace->paths.table[fd]);
1074
1075         return printed;
1076 }
1077
1078 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1079                                      unsigned long ptr)
1080 {
1081         struct thread_trace *ttrace = thread__priv(thread);
1082
1083         ttrace->filename.ptr = ptr;
1084         ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1085 }
1086
1087 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1088                                               struct syscall_arg *arg)
1089 {
1090         unsigned long ptr = arg->val;
1091
1092         if (!arg->trace->vfs_getname)
1093                 return scnprintf(bf, size, "%#x", ptr);
1094
1095         thread__set_filename_pos(arg->thread, bf, ptr);
1096         return 0;
1097 }
1098
1099 static bool trace__filter_duration(struct trace *trace, double t)
1100 {
1101         return t < (trace->duration_filter * NSEC_PER_MSEC);
1102 }
1103
1104 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1105 {
1106         double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1107
1108         return fprintf(fp, "%10.3f ", ts);
1109 }
1110
1111 static bool done = false;
1112 static bool interrupted = false;
1113
1114 static void sig_handler(int sig)
1115 {
1116         done = true;
1117         interrupted = sig == SIGINT;
1118 }
1119
1120 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1121                                         u64 duration, u64 tstamp, FILE *fp)
1122 {
1123         size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1124         printed += fprintf_duration(duration, fp);
1125
1126         if (trace->multiple_threads) {
1127                 if (trace->show_comm)
1128                         printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1129                 printed += fprintf(fp, "%d ", thread->tid);
1130         }
1131
1132         return printed;
1133 }
1134
1135 static int trace__process_event(struct trace *trace, struct machine *machine,
1136                                 union perf_event *event, struct perf_sample *sample)
1137 {
1138         int ret = 0;
1139
1140         switch (event->header.type) {
1141         case PERF_RECORD_LOST:
1142                 color_fprintf(trace->output, PERF_COLOR_RED,
1143                               "LOST %" PRIu64 " events!\n", event->lost.lost);
1144                 ret = machine__process_lost_event(machine, event, sample);
1145                 break;
1146         default:
1147                 ret = machine__process_event(machine, event, sample);
1148                 break;
1149         }
1150
1151         return ret;
1152 }
1153
1154 static int trace__tool_process(struct perf_tool *tool,
1155                                union perf_event *event,
1156                                struct perf_sample *sample,
1157                                struct machine *machine)
1158 {
1159         struct trace *trace = container_of(tool, struct trace, tool);
1160         return trace__process_event(trace, machine, event, sample);
1161 }
1162
1163 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1164 {
1165         int err = symbol__init(NULL);
1166
1167         if (err)
1168                 return err;
1169
1170         trace->host = machine__new_host();
1171         if (trace->host == NULL)
1172                 return -ENOMEM;
1173
1174         if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1175                 return -errno;
1176
1177         err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1178                                             evlist->threads, trace__tool_process, false,
1179                                             trace->opts.proc_map_timeout);
1180         if (err)
1181                 symbol__exit();
1182
1183         return err;
1184 }
1185
1186 static int syscall__set_arg_fmts(struct syscall *sc)
1187 {
1188         struct format_field *field;
1189         int idx = 0;
1190
1191         sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1192         if (sc->arg_scnprintf == NULL)
1193                 return -1;
1194
1195         if (sc->fmt)
1196                 sc->arg_parm = sc->fmt->arg_parm;
1197
1198         for (field = sc->args; field; field = field->next) {
1199                 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1200                         sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1201                 else if (field->flags & FIELD_IS_POINTER)
1202                         sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1203                 else if (strcmp(field->type, "pid_t") == 0)
1204                         sc->arg_scnprintf[idx] = SCA_PID;
1205                 else if (strcmp(field->type, "umode_t") == 0)
1206                         sc->arg_scnprintf[idx] = SCA_MODE_T;
1207                 ++idx;
1208         }
1209
1210         return 0;
1211 }
1212
1213 static int trace__read_syscall_info(struct trace *trace, int id)
1214 {
1215         char tp_name[128];
1216         struct syscall *sc;
1217         const char *name = syscalltbl__name(trace->sctbl, id);
1218
1219         if (name == NULL)
1220                 return -1;
1221
1222         if (id > trace->syscalls.max) {
1223                 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1224
1225                 if (nsyscalls == NULL)
1226                         return -1;
1227
1228                 if (trace->syscalls.max != -1) {
1229                         memset(nsyscalls + trace->syscalls.max + 1, 0,
1230                                (id - trace->syscalls.max) * sizeof(*sc));
1231                 } else {
1232                         memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1233                 }
1234
1235                 trace->syscalls.table = nsyscalls;
1236                 trace->syscalls.max   = id;
1237         }
1238
1239         sc = trace->syscalls.table + id;
1240         sc->name = name;
1241
1242         sc->fmt  = syscall_fmt__find(sc->name);
1243
1244         snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1245         sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1246
1247         if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1248                 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1249                 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1250         }
1251
1252         if (IS_ERR(sc->tp_format))
1253                 return -1;
1254
1255         sc->args = sc->tp_format->format.fields;
1256         sc->nr_args = sc->tp_format->format.nr_fields;
1257         /*
1258          * We need to check and discard the first variable '__syscall_nr'
1259          * or 'nr' that mean the syscall number. It is needless here.
1260          * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1261          */
1262         if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1263                 sc->args = sc->args->next;
1264                 --sc->nr_args;
1265         }
1266
1267         sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1268
1269         return syscall__set_arg_fmts(sc);
1270 }
1271
1272 static int trace__validate_ev_qualifier(struct trace *trace)
1273 {
1274         int err = 0, i;
1275         struct str_node *pos;
1276
1277         trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1278         trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1279                                                  sizeof(trace->ev_qualifier_ids.entries[0]));
1280
1281         if (trace->ev_qualifier_ids.entries == NULL) {
1282                 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1283                        trace->output);
1284                 err = -EINVAL;
1285                 goto out;
1286         }
1287
1288         i = 0;
1289
1290         strlist__for_each(pos, trace->ev_qualifier) {
1291                 const char *sc = pos->s;
1292                 int id = syscalltbl__id(trace->sctbl, sc);
1293
1294                 if (id < 0) {
1295                         if (err == 0) {
1296                                 fputs("Error:\tInvalid syscall ", trace->output);
1297                                 err = -EINVAL;
1298                         } else {
1299                                 fputs(", ", trace->output);
1300                         }
1301
1302                         fputs(sc, trace->output);
1303                 }
1304
1305                 trace->ev_qualifier_ids.entries[i++] = id;
1306         }
1307
1308         if (err < 0) {
1309                 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1310                       "\nHint:\tand: 'man syscalls'\n", trace->output);
1311                 zfree(&trace->ev_qualifier_ids.entries);
1312                 trace->ev_qualifier_ids.nr = 0;
1313         }
1314 out:
1315         return err;
1316 }
1317
1318 /*
1319  * args is to be interpreted as a series of longs but we need to handle
1320  * 8-byte unaligned accesses. args points to raw_data within the event
1321  * and raw_data is guaranteed to be 8-byte unaligned because it is
1322  * preceded by raw_size which is a u32. So we need to copy args to a temp
1323  * variable to read it. Most notably this avoids extended load instructions
1324  * on unaligned addresses
1325  */
1326
1327 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1328                                       unsigned char *args, struct trace *trace,
1329                                       struct thread *thread)
1330 {
1331         size_t printed = 0;
1332         unsigned char *p;
1333         unsigned long val;
1334
1335         if (sc->args != NULL) {
1336                 struct format_field *field;
1337                 u8 bit = 1;
1338                 struct syscall_arg arg = {
1339                         .idx    = 0,
1340                         .mask   = 0,
1341                         .trace  = trace,
1342                         .thread = thread,
1343                 };
1344
1345                 for (field = sc->args; field;
1346                      field = field->next, ++arg.idx, bit <<= 1) {
1347                         if (arg.mask & bit)
1348                                 continue;
1349
1350                         /* special care for unaligned accesses */
1351                         p = args + sizeof(unsigned long) * arg.idx;
1352                         memcpy(&val, p, sizeof(val));
1353
1354                         /*
1355                          * Suppress this argument if its value is zero and
1356                          * and we don't have a string associated in an
1357                          * strarray for it.
1358                          */
1359                         if (val == 0 &&
1360                             !(sc->arg_scnprintf &&
1361                               sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1362                               sc->arg_parm[arg.idx]))
1363                                 continue;
1364
1365                         printed += scnprintf(bf + printed, size - printed,
1366                                              "%s%s: ", printed ? ", " : "", field->name);
1367                         if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1368                                 arg.val = val;
1369                                 if (sc->arg_parm)
1370                                         arg.parm = sc->arg_parm[arg.idx];
1371                                 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1372                                                                       size - printed, &arg);
1373                         } else {
1374                                 printed += scnprintf(bf + printed, size - printed,
1375                                                      "%ld", val);
1376                         }
1377                 }
1378         } else if (IS_ERR(sc->tp_format)) {
1379                 /*
1380                  * If we managed to read the tracepoint /format file, then we
1381                  * may end up not having any args, like with gettid(), so only
1382                  * print the raw args when we didn't manage to read it.
1383                  */
1384                 int i = 0;
1385
1386                 while (i < 6) {
1387                         /* special care for unaligned accesses */
1388                         p = args + sizeof(unsigned long) * i;
1389                         memcpy(&val, p, sizeof(val));
1390                         printed += scnprintf(bf + printed, size - printed,
1391                                              "%sarg%d: %ld",
1392                                              printed ? ", " : "", i, val);
1393                         ++i;
1394                 }
1395         }
1396
1397         return printed;
1398 }
1399
1400 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1401                                   union perf_event *event,
1402                                   struct perf_sample *sample);
1403
1404 static struct syscall *trace__syscall_info(struct trace *trace,
1405                                            struct perf_evsel *evsel, int id)
1406 {
1407
1408         if (id < 0) {
1409
1410                 /*
1411                  * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1412                  * before that, leaving at a higher verbosity level till that is
1413                  * explained. Reproduced with plain ftrace with:
1414                  *
1415                  * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1416                  * grep "NR -1 " /t/trace_pipe
1417                  *
1418                  * After generating some load on the machine.
1419                  */
1420                 if (verbose > 1) {
1421                         static u64 n;
1422                         fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1423                                 id, perf_evsel__name(evsel), ++n);
1424                 }
1425                 return NULL;
1426         }
1427
1428         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1429             trace__read_syscall_info(trace, id))
1430                 goto out_cant_read;
1431
1432         if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1433                 goto out_cant_read;
1434
1435         return &trace->syscalls.table[id];
1436
1437 out_cant_read:
1438         if (verbose) {
1439                 fprintf(trace->output, "Problems reading syscall %d", id);
1440                 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1441                         fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1442                 fputs(" information\n", trace->output);
1443         }
1444         return NULL;
1445 }
1446
1447 static void thread__update_stats(struct thread_trace *ttrace,
1448                                  int id, struct perf_sample *sample)
1449 {
1450         struct int_node *inode;
1451         struct stats *stats;
1452         u64 duration = 0;
1453
1454         inode = intlist__findnew(ttrace->syscall_stats, id);
1455         if (inode == NULL)
1456                 return;
1457
1458         stats = inode->priv;
1459         if (stats == NULL) {
1460                 stats = malloc(sizeof(struct stats));
1461                 if (stats == NULL)
1462                         return;
1463                 init_stats(stats);
1464                 inode->priv = stats;
1465         }
1466
1467         if (ttrace->entry_time && sample->time > ttrace->entry_time)
1468                 duration = sample->time - ttrace->entry_time;
1469
1470         update_stats(stats, duration);
1471 }
1472
1473 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1474 {
1475         struct thread_trace *ttrace;
1476         u64 duration;
1477         size_t printed;
1478
1479         if (trace->current == NULL)
1480                 return 0;
1481
1482         ttrace = thread__priv(trace->current);
1483
1484         if (!ttrace->entry_pending)
1485                 return 0;
1486
1487         duration = sample->time - ttrace->entry_time;
1488
1489         printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1490         printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1491         ttrace->entry_pending = false;
1492
1493         return printed;
1494 }
1495
1496 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1497                             union perf_event *event __maybe_unused,
1498                             struct perf_sample *sample)
1499 {
1500         char *msg;
1501         void *args;
1502         size_t printed = 0;
1503         struct thread *thread;
1504         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1505         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1506         struct thread_trace *ttrace;
1507
1508         if (sc == NULL)
1509                 return -1;
1510
1511         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1512         ttrace = thread__trace(thread, trace->output);
1513         if (ttrace == NULL)
1514                 goto out_put;
1515
1516         args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1517
1518         if (ttrace->entry_str == NULL) {
1519                 ttrace->entry_str = malloc(trace__entry_str_size);
1520                 if (!ttrace->entry_str)
1521                         goto out_put;
1522         }
1523
1524         if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1525                 trace__printf_interrupted_entry(trace, sample);
1526
1527         ttrace->entry_time = sample->time;
1528         msg = ttrace->entry_str;
1529         printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1530
1531         printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1532                                            args, trace, thread);
1533
1534         if (sc->is_exit) {
1535                 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1536                         trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1537                         fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1538                 }
1539         } else {
1540                 ttrace->entry_pending = true;
1541                 /* See trace__vfs_getname & trace__sys_exit */
1542                 ttrace->filename.pending_open = false;
1543         }
1544
1545         if (trace->current != thread) {
1546                 thread__put(trace->current);
1547                 trace->current = thread__get(thread);
1548         }
1549         err = 0;
1550 out_put:
1551         thread__put(thread);
1552         return err;
1553 }
1554
1555 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1556                                     struct perf_sample *sample,
1557                                     struct callchain_cursor *cursor)
1558 {
1559         struct addr_location al;
1560
1561         if (machine__resolve(trace->host, &al, sample) < 0 ||
1562             thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1563                 return -1;
1564
1565         return 0;
1566 }
1567
1568 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1569 {
1570         /* TODO: user-configurable print_opts */
1571         const unsigned int print_opts = EVSEL__PRINT_SYM |
1572                                         EVSEL__PRINT_DSO |
1573                                         EVSEL__PRINT_UNKNOWN_AS_ADDR;
1574
1575         return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1576 }
1577
1578 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1579                            union perf_event *event __maybe_unused,
1580                            struct perf_sample *sample)
1581 {
1582         long ret;
1583         u64 duration = 0;
1584         struct thread *thread;
1585         int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1586         struct syscall *sc = trace__syscall_info(trace, evsel, id);
1587         struct thread_trace *ttrace;
1588
1589         if (sc == NULL)
1590                 return -1;
1591
1592         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1593         ttrace = thread__trace(thread, trace->output);
1594         if (ttrace == NULL)
1595                 goto out_put;
1596
1597         if (trace->summary)
1598                 thread__update_stats(ttrace, id, sample);
1599
1600         ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1601
1602         if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1603                 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1604                 ttrace->filename.pending_open = false;
1605                 ++trace->stats.vfs_getname;
1606         }
1607
1608         ttrace->exit_time = sample->time;
1609
1610         if (ttrace->entry_time) {
1611                 duration = sample->time - ttrace->entry_time;
1612                 if (trace__filter_duration(trace, duration))
1613                         goto out;
1614         } else if (trace->duration_filter)
1615                 goto out;
1616
1617         if (sample->callchain) {
1618                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1619                 if (callchain_ret == 0) {
1620                         if (callchain_cursor.nr < trace->min_stack)
1621                                 goto out;
1622                         callchain_ret = 1;
1623                 }
1624         }
1625
1626         if (trace->summary_only)
1627                 goto out;
1628
1629         trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1630
1631         if (ttrace->entry_pending) {
1632                 fprintf(trace->output, "%-70s", ttrace->entry_str);
1633         } else {
1634                 fprintf(trace->output, " ... [");
1635                 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1636                 fprintf(trace->output, "]: %s()", sc->name);
1637         }
1638
1639         if (sc->fmt == NULL) {
1640 signed_print:
1641                 fprintf(trace->output, ") = %ld", ret);
1642         } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1643                 char bf[STRERR_BUFSIZE];
1644                 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1645                            *e = audit_errno_to_name(-ret);
1646
1647                 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1648         } else if (ret == 0 && sc->fmt->timeout)
1649                 fprintf(trace->output, ") = 0 Timeout");
1650         else if (sc->fmt->hexret)
1651                 fprintf(trace->output, ") = %#lx", ret);
1652         else if (sc->fmt->errpid) {
1653                 struct thread *child = machine__find_thread(trace->host, ret, ret);
1654
1655                 if (child != NULL) {
1656                         fprintf(trace->output, ") = %ld", ret);
1657                         if (child->comm_set)
1658                                 fprintf(trace->output, " (%s)", thread__comm_str(child));
1659                         thread__put(child);
1660                 }
1661         } else
1662                 goto signed_print;
1663
1664         fputc('\n', trace->output);
1665
1666         if (callchain_ret > 0)
1667                 trace__fprintf_callchain(trace, sample);
1668         else if (callchain_ret < 0)
1669                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1670 out:
1671         ttrace->entry_pending = false;
1672         err = 0;
1673 out_put:
1674         thread__put(thread);
1675         return err;
1676 }
1677
1678 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1679                               union perf_event *event __maybe_unused,
1680                               struct perf_sample *sample)
1681 {
1682         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1683         struct thread_trace *ttrace;
1684         size_t filename_len, entry_str_len, to_move;
1685         ssize_t remaining_space;
1686         char *pos;
1687         const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1688
1689         if (!thread)
1690                 goto out;
1691
1692         ttrace = thread__priv(thread);
1693         if (!ttrace)
1694                 goto out;
1695
1696         filename_len = strlen(filename);
1697
1698         if (ttrace->filename.namelen < filename_len) {
1699                 char *f = realloc(ttrace->filename.name, filename_len + 1);
1700
1701                 if (f == NULL)
1702                                 goto out;
1703
1704                 ttrace->filename.namelen = filename_len;
1705                 ttrace->filename.name = f;
1706         }
1707
1708         strcpy(ttrace->filename.name, filename);
1709         ttrace->filename.pending_open = true;
1710
1711         if (!ttrace->filename.ptr)
1712                 goto out;
1713
1714         entry_str_len = strlen(ttrace->entry_str);
1715         remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1716         if (remaining_space <= 0)
1717                 goto out;
1718
1719         if (filename_len > (size_t)remaining_space) {
1720                 filename += filename_len - remaining_space;
1721                 filename_len = remaining_space;
1722         }
1723
1724         to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1725         pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1726         memmove(pos + filename_len, pos, to_move);
1727         memcpy(pos, filename, filename_len);
1728
1729         ttrace->filename.ptr = 0;
1730         ttrace->filename.entry_str_pos = 0;
1731 out:
1732         return 0;
1733 }
1734
1735 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1736                                      union perf_event *event __maybe_unused,
1737                                      struct perf_sample *sample)
1738 {
1739         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1740         double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1741         struct thread *thread = machine__findnew_thread(trace->host,
1742                                                         sample->pid,
1743                                                         sample->tid);
1744         struct thread_trace *ttrace = thread__trace(thread, trace->output);
1745
1746         if (ttrace == NULL)
1747                 goto out_dump;
1748
1749         ttrace->runtime_ms += runtime_ms;
1750         trace->runtime_ms += runtime_ms;
1751         thread__put(thread);
1752         return 0;
1753
1754 out_dump:
1755         fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1756                evsel->name,
1757                perf_evsel__strval(evsel, sample, "comm"),
1758                (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1759                runtime,
1760                perf_evsel__intval(evsel, sample, "vruntime"));
1761         thread__put(thread);
1762         return 0;
1763 }
1764
1765 static void bpf_output__printer(enum binary_printer_ops op,
1766                                 unsigned int val, void *extra)
1767 {
1768         FILE *output = extra;
1769         unsigned char ch = (unsigned char)val;
1770
1771         switch (op) {
1772         case BINARY_PRINT_CHAR_DATA:
1773                 fprintf(output, "%c", isprint(ch) ? ch : '.');
1774                 break;
1775         case BINARY_PRINT_DATA_BEGIN:
1776         case BINARY_PRINT_LINE_BEGIN:
1777         case BINARY_PRINT_ADDR:
1778         case BINARY_PRINT_NUM_DATA:
1779         case BINARY_PRINT_NUM_PAD:
1780         case BINARY_PRINT_SEP:
1781         case BINARY_PRINT_CHAR_PAD:
1782         case BINARY_PRINT_LINE_END:
1783         case BINARY_PRINT_DATA_END:
1784         default:
1785                 break;
1786         }
1787 }
1788
1789 static void bpf_output__fprintf(struct trace *trace,
1790                                 struct perf_sample *sample)
1791 {
1792         print_binary(sample->raw_data, sample->raw_size, 8,
1793                      bpf_output__printer, trace->output);
1794 }
1795
1796 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1797                                 union perf_event *event __maybe_unused,
1798                                 struct perf_sample *sample)
1799 {
1800         int callchain_ret = 0;
1801
1802         if (sample->callchain) {
1803                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1804                 if (callchain_ret == 0) {
1805                         if (callchain_cursor.nr < trace->min_stack)
1806                                 goto out;
1807                         callchain_ret = 1;
1808                 }
1809         }
1810
1811         trace__printf_interrupted_entry(trace, sample);
1812         trace__fprintf_tstamp(trace, sample->time, trace->output);
1813
1814         if (trace->trace_syscalls)
1815                 fprintf(trace->output, "(         ): ");
1816
1817         fprintf(trace->output, "%s:", evsel->name);
1818
1819         if (perf_evsel__is_bpf_output(evsel)) {
1820                 bpf_output__fprintf(trace, sample);
1821         } else if (evsel->tp_format) {
1822                 event_format__fprintf(evsel->tp_format, sample->cpu,
1823                                       sample->raw_data, sample->raw_size,
1824                                       trace->output);
1825         }
1826
1827         fprintf(trace->output, ")\n");
1828
1829         if (callchain_ret > 0)
1830                 trace__fprintf_callchain(trace, sample);
1831         else if (callchain_ret < 0)
1832                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1833 out:
1834         return 0;
1835 }
1836
1837 static void print_location(FILE *f, struct perf_sample *sample,
1838                            struct addr_location *al,
1839                            bool print_dso, bool print_sym)
1840 {
1841
1842         if ((verbose || print_dso) && al->map)
1843                 fprintf(f, "%s@", al->map->dso->long_name);
1844
1845         if ((verbose || print_sym) && al->sym)
1846                 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1847                         al->addr - al->sym->start);
1848         else if (al->map)
1849                 fprintf(f, "0x%" PRIx64, al->addr);
1850         else
1851                 fprintf(f, "0x%" PRIx64, sample->addr);
1852 }
1853
1854 static int trace__pgfault(struct trace *trace,
1855                           struct perf_evsel *evsel,
1856                           union perf_event *event __maybe_unused,
1857                           struct perf_sample *sample)
1858 {
1859         struct thread *thread;
1860         struct addr_location al;
1861         char map_type = 'd';
1862         struct thread_trace *ttrace;
1863         int err = -1;
1864         int callchain_ret = 0;
1865
1866         thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1867
1868         if (sample->callchain) {
1869                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1870                 if (callchain_ret == 0) {
1871                         if (callchain_cursor.nr < trace->min_stack)
1872                                 goto out_put;
1873                         callchain_ret = 1;
1874                 }
1875         }
1876
1877         ttrace = thread__trace(thread, trace->output);
1878         if (ttrace == NULL)
1879                 goto out_put;
1880
1881         if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1882                 ttrace->pfmaj++;
1883         else
1884                 ttrace->pfmin++;
1885
1886         if (trace->summary_only)
1887                 goto out;
1888
1889         thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1890                               sample->ip, &al);
1891
1892         trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1893
1894         fprintf(trace->output, "%sfault [",
1895                 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1896                 "maj" : "min");
1897
1898         print_location(trace->output, sample, &al, false, true);
1899
1900         fprintf(trace->output, "] => ");
1901
1902         thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1903                                    sample->addr, &al);
1904
1905         if (!al.map) {
1906                 thread__find_addr_location(thread, sample->cpumode,
1907                                            MAP__FUNCTION, sample->addr, &al);
1908
1909                 if (al.map)
1910                         map_type = 'x';
1911                 else
1912                         map_type = '?';
1913         }
1914
1915         print_location(trace->output, sample, &al, true, false);
1916
1917         fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1918
1919         if (callchain_ret > 0)
1920                 trace__fprintf_callchain(trace, sample);
1921         else if (callchain_ret < 0)
1922                 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1923 out:
1924         err = 0;
1925 out_put:
1926         thread__put(thread);
1927         return err;
1928 }
1929
1930 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1931 {
1932         if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1933             (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1934                 return false;
1935
1936         if (trace->pid_list || trace->tid_list)
1937                 return true;
1938
1939         return false;
1940 }
1941
1942 static void trace__set_base_time(struct trace *trace,
1943                                  struct perf_evsel *evsel,
1944                                  struct perf_sample *sample)
1945 {
1946         /*
1947          * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1948          * and don't use sample->time unconditionally, we may end up having
1949          * some other event in the future without PERF_SAMPLE_TIME for good
1950          * reason, i.e. we may not be interested in its timestamps, just in
1951          * it taking place, picking some piece of information when it
1952          * appears in our event stream (vfs_getname comes to mind).
1953          */
1954         if (trace->base_time == 0 && !trace->full_time &&
1955             (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1956                 trace->base_time = sample->time;
1957 }
1958
1959 static int trace__process_sample(struct perf_tool *tool,
1960                                  union perf_event *event,
1961                                  struct perf_sample *sample,
1962                                  struct perf_evsel *evsel,
1963                                  struct machine *machine __maybe_unused)
1964 {
1965         struct trace *trace = container_of(tool, struct trace, tool);
1966         int err = 0;
1967
1968         tracepoint_handler handler = evsel->handler;
1969
1970         if (skip_sample(trace, sample))
1971                 return 0;
1972
1973         trace__set_base_time(trace, evsel, sample);
1974
1975         if (handler) {
1976                 ++trace->nr_events;
1977                 handler(trace, evsel, event, sample);
1978         }
1979
1980         return err;
1981 }
1982
1983 static int parse_target_str(struct trace *trace)
1984 {
1985         if (trace->opts.target.pid) {
1986                 trace->pid_list = intlist__new(trace->opts.target.pid);
1987                 if (trace->pid_list == NULL) {
1988                         pr_err("Error parsing process id string\n");
1989                         return -EINVAL;
1990                 }
1991         }
1992
1993         if (trace->opts.target.tid) {
1994                 trace->tid_list = intlist__new(trace->opts.target.tid);
1995                 if (trace->tid_list == NULL) {
1996                         pr_err("Error parsing thread id string\n");
1997                         return -EINVAL;
1998                 }
1999         }
2000
2001         return 0;
2002 }
2003
2004 static int trace__record(struct trace *trace, int argc, const char **argv)
2005 {
2006         unsigned int rec_argc, i, j;
2007         const char **rec_argv;
2008         const char * const record_args[] = {
2009                 "record",
2010                 "-R",
2011                 "-m", "1024",
2012                 "-c", "1",
2013         };
2014
2015         const char * const sc_args[] = { "-e", };
2016         unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2017         const char * const majpf_args[] = { "-e", "major-faults" };
2018         unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2019         const char * const minpf_args[] = { "-e", "minor-faults" };
2020         unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2021
2022         /* +1 is for the event string below */
2023         rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2024                 majpf_args_nr + minpf_args_nr + argc;
2025         rec_argv = calloc(rec_argc + 1, sizeof(char *));
2026
2027         if (rec_argv == NULL)
2028                 return -ENOMEM;
2029
2030         j = 0;
2031         for (i = 0; i < ARRAY_SIZE(record_args); i++)
2032                 rec_argv[j++] = record_args[i];
2033
2034         if (trace->trace_syscalls) {
2035                 for (i = 0; i < sc_args_nr; i++)
2036                         rec_argv[j++] = sc_args[i];
2037
2038                 /* event string may be different for older kernels - e.g., RHEL6 */
2039                 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2040                         rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2041                 else if (is_valid_tracepoint("syscalls:sys_enter"))
2042                         rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2043                 else {
2044                         pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2045                         return -1;
2046                 }
2047         }
2048
2049         if (trace->trace_pgfaults & TRACE_PFMAJ)
2050                 for (i = 0; i < majpf_args_nr; i++)
2051                         rec_argv[j++] = majpf_args[i];
2052
2053         if (trace->trace_pgfaults & TRACE_PFMIN)
2054                 for (i = 0; i < minpf_args_nr; i++)
2055                         rec_argv[j++] = minpf_args[i];
2056
2057         for (i = 0; i < (unsigned int)argc; i++)
2058                 rec_argv[j++] = argv[i];
2059
2060         return cmd_record(j, rec_argv, NULL);
2061 }
2062
2063 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2064
2065 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2066 {
2067         struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2068
2069         if (IS_ERR(evsel))
2070                 return false;
2071
2072         if (perf_evsel__field(evsel, "pathname") == NULL) {
2073                 perf_evsel__delete(evsel);
2074                 return false;
2075         }
2076
2077         evsel->handler = trace__vfs_getname;
2078         perf_evlist__add(evlist, evsel);
2079         return true;
2080 }
2081
2082 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2083 {
2084         struct perf_evsel *evsel;
2085         struct perf_event_attr attr = {
2086                 .type = PERF_TYPE_SOFTWARE,
2087                 .mmap_data = 1,
2088         };
2089
2090         attr.config = config;
2091         attr.sample_period = 1;
2092
2093         event_attr_init(&attr);
2094
2095         evsel = perf_evsel__new(&attr);
2096         if (evsel)
2097                 evsel->handler = trace__pgfault;
2098
2099         return evsel;
2100 }
2101
2102 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2103 {
2104         const u32 type = event->header.type;
2105         struct perf_evsel *evsel;
2106
2107         if (type != PERF_RECORD_SAMPLE) {
2108                 trace__process_event(trace, trace->host, event, sample);
2109                 return;
2110         }
2111
2112         evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2113         if (evsel == NULL) {
2114                 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2115                 return;
2116         }
2117
2118         trace__set_base_time(trace, evsel, sample);
2119
2120         if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2121             sample->raw_data == NULL) {
2122                 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2123                        perf_evsel__name(evsel), sample->tid,
2124                        sample->cpu, sample->raw_size);
2125         } else {
2126                 tracepoint_handler handler = evsel->handler;
2127                 handler(trace, evsel, event, sample);
2128         }
2129 }
2130
2131 static int trace__add_syscall_newtp(struct trace *trace)
2132 {
2133         int ret = -1;
2134         struct perf_evlist *evlist = trace->evlist;
2135         struct perf_evsel *sys_enter, *sys_exit;
2136
2137         sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2138         if (sys_enter == NULL)
2139                 goto out;
2140
2141         if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2142                 goto out_delete_sys_enter;
2143
2144         sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2145         if (sys_exit == NULL)
2146                 goto out_delete_sys_enter;
2147
2148         if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2149                 goto out_delete_sys_exit;
2150
2151         perf_evlist__add(evlist, sys_enter);
2152         perf_evlist__add(evlist, sys_exit);
2153
2154         if (callchain_param.enabled && !trace->kernel_syscallchains) {
2155                 /*
2156                  * We're interested only in the user space callchain
2157                  * leading to the syscall, allow overriding that for
2158                  * debugging reasons using --kernel_syscall_callchains
2159                  */
2160                 sys_exit->attr.exclude_callchain_kernel = 1;
2161         }
2162
2163         trace->syscalls.events.sys_enter = sys_enter;
2164         trace->syscalls.events.sys_exit  = sys_exit;
2165
2166         ret = 0;
2167 out:
2168         return ret;
2169
2170 out_delete_sys_exit:
2171         perf_evsel__delete_priv(sys_exit);
2172 out_delete_sys_enter:
2173         perf_evsel__delete_priv(sys_enter);
2174         goto out;
2175 }
2176
2177 static int trace__set_ev_qualifier_filter(struct trace *trace)
2178 {
2179         int err = -1;
2180         char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2181                                                 trace->ev_qualifier_ids.nr,
2182                                                 trace->ev_qualifier_ids.entries);
2183
2184         if (filter == NULL)
2185                 goto out_enomem;
2186
2187         if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2188                 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2189
2190         free(filter);
2191 out:
2192         return err;
2193 out_enomem:
2194         errno = ENOMEM;
2195         goto out;
2196 }
2197
2198 static int trace__run(struct trace *trace, int argc, const char **argv)
2199 {
2200         struct perf_evlist *evlist = trace->evlist;
2201         struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2202         int err = -1, i;
2203         unsigned long before;
2204         const bool forks = argc > 0;
2205         bool draining = false;
2206
2207         trace->live = true;
2208
2209         if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2210                 goto out_error_raw_syscalls;
2211
2212         if (trace->trace_syscalls)
2213                 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2214
2215         if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2216                 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2217                 if (pgfault_maj == NULL)
2218                         goto out_error_mem;
2219                 perf_evlist__add(evlist, pgfault_maj);
2220         }
2221
2222         if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2223                 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2224                 if (pgfault_min == NULL)
2225                         goto out_error_mem;
2226                 perf_evlist__add(evlist, pgfault_min);
2227         }
2228
2229         if (trace->sched &&
2230             perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2231                                    trace__sched_stat_runtime))
2232                 goto out_error_sched_stat_runtime;
2233
2234         err = perf_evlist__create_maps(evlist, &trace->opts.target);
2235         if (err < 0) {
2236                 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2237                 goto out_delete_evlist;
2238         }
2239
2240         err = trace__symbols_init(trace, evlist);
2241         if (err < 0) {
2242                 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2243                 goto out_delete_evlist;
2244         }
2245
2246         perf_evlist__config(evlist, &trace->opts, NULL);
2247
2248         if (callchain_param.enabled) {
2249                 bool use_identifier = false;
2250
2251                 if (trace->syscalls.events.sys_exit) {
2252                         perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2253                                                      &trace->opts, &callchain_param);
2254                         use_identifier = true;
2255                 }
2256
2257                 if (pgfault_maj) {
2258                         perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2259                         use_identifier = true;
2260                 }
2261
2262                 if (pgfault_min) {
2263                         perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2264                         use_identifier = true;
2265                 }
2266
2267                 if (use_identifier) {
2268                        /*
2269                         * Now we have evsels with different sample_ids, use
2270                         * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2271                         * from a fixed position in each ring buffer record.
2272                         *
2273                         * As of this the changeset introducing this comment, this
2274                         * isn't strictly needed, as the fields that can come before
2275                         * PERF_SAMPLE_ID are all used, but we'll probably disable
2276                         * some of those for things like copying the payload of
2277                         * pointer syscall arguments, and for vfs_getname we don't
2278                         * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2279                         * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2280                         */
2281                         perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2282                         perf_evlist__reset_sample_bit(evlist, ID);
2283                 }
2284         }
2285
2286         signal(SIGCHLD, sig_handler);
2287         signal(SIGINT, sig_handler);
2288
2289         if (forks) {
2290                 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2291                                                     argv, false, NULL);
2292                 if (err < 0) {
2293                         fprintf(trace->output, "Couldn't run the workload!\n");
2294                         goto out_delete_evlist;
2295                 }
2296         }
2297
2298         err = perf_evlist__open(evlist);
2299         if (err < 0)
2300                 goto out_error_open;
2301
2302         err = bpf__apply_obj_config();
2303         if (err) {
2304                 char errbuf[BUFSIZ];
2305
2306                 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2307                 pr_err("ERROR: Apply config to BPF failed: %s\n",
2308                          errbuf);
2309                 goto out_error_open;
2310         }
2311
2312         /*
2313          * Better not use !target__has_task() here because we need to cover the
2314          * case where no threads were specified in the command line, but a
2315          * workload was, and in that case we will fill in the thread_map when
2316          * we fork the workload in perf_evlist__prepare_workload.
2317          */
2318         if (trace->filter_pids.nr > 0)
2319                 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2320         else if (thread_map__pid(evlist->threads, 0) == -1)
2321                 err = perf_evlist__set_filter_pid(evlist, getpid());
2322
2323         if (err < 0)
2324                 goto out_error_mem;
2325
2326         if (trace->ev_qualifier_ids.nr > 0) {
2327                 err = trace__set_ev_qualifier_filter(trace);
2328                 if (err < 0)
2329                         goto out_errno;
2330
2331                 pr_debug("event qualifier tracepoint filter: %s\n",
2332                          trace->syscalls.events.sys_exit->filter);
2333         }
2334
2335         err = perf_evlist__apply_filters(evlist, &evsel);
2336         if (err < 0)
2337                 goto out_error_apply_filters;
2338
2339         err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2340         if (err < 0)
2341                 goto out_error_mmap;
2342
2343         if (!target__none(&trace->opts.target))
2344                 perf_evlist__enable(evlist);
2345
2346         if (forks)
2347                 perf_evlist__start_workload(evlist);
2348
2349         trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2350                                   evlist->threads->nr > 1 ||
2351                                   perf_evlist__first(evlist)->attr.inherit;
2352 again:
2353         before = trace->nr_events;
2354
2355         for (i = 0; i < evlist->nr_mmaps; i++) {
2356                 union perf_event *event;
2357
2358                 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2359                         struct perf_sample sample;
2360
2361                         ++trace->nr_events;
2362
2363                         err = perf_evlist__parse_sample(evlist, event, &sample);
2364                         if (err) {
2365                                 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2366                                 goto next_event;
2367                         }
2368
2369                         trace__handle_event(trace, event, &sample);
2370 next_event:
2371                         perf_evlist__mmap_consume(evlist, i);
2372
2373                         if (interrupted)
2374                                 goto out_disable;
2375
2376                         if (done && !draining) {
2377                                 perf_evlist__disable(evlist);
2378                                 draining = true;
2379                         }
2380                 }
2381         }
2382
2383         if (trace->nr_events == before) {
2384                 int timeout = done ? 100 : -1;
2385
2386                 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2387                         if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2388                                 draining = true;
2389
2390                         goto again;
2391                 }
2392         } else {
2393                 goto again;
2394         }
2395
2396 out_disable:
2397         thread__zput(trace->current);
2398
2399         perf_evlist__disable(evlist);
2400
2401         if (!err) {
2402                 if (trace->summary)
2403                         trace__fprintf_thread_summary(trace, trace->output);
2404
2405                 if (trace->show_tool_stats) {
2406                         fprintf(trace->output, "Stats:\n "
2407                                                " vfs_getname : %" PRIu64 "\n"
2408                                                " proc_getname: %" PRIu64 "\n",
2409                                 trace->stats.vfs_getname,
2410                                 trace->stats.proc_getname);
2411                 }
2412         }
2413
2414 out_delete_evlist:
2415         perf_evlist__delete(evlist);
2416         trace->evlist = NULL;
2417         trace->live = false;
2418         return err;
2419 {
2420         char errbuf[BUFSIZ];
2421
2422 out_error_sched_stat_runtime:
2423         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2424         goto out_error;
2425
2426 out_error_raw_syscalls:
2427         tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2428         goto out_error;
2429
2430 out_error_mmap:
2431         perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2432         goto out_error;
2433
2434 out_error_open:
2435         perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2436
2437 out_error:
2438         fprintf(trace->output, "%s\n", errbuf);
2439         goto out_delete_evlist;
2440
2441 out_error_apply_filters:
2442         fprintf(trace->output,
2443                 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2444                 evsel->filter, perf_evsel__name(evsel), errno,
2445                 strerror_r(errno, errbuf, sizeof(errbuf)));
2446         goto out_delete_evlist;
2447 }
2448 out_error_mem:
2449         fprintf(trace->output, "Not enough memory to run!\n");
2450         goto out_delete_evlist;
2451
2452 out_errno:
2453         fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2454         goto out_delete_evlist;
2455 }
2456
2457 static int trace__replay(struct trace *trace)
2458 {
2459         const struct perf_evsel_str_handler handlers[] = {
2460                 { "probe:vfs_getname",       trace__vfs_getname, },
2461         };
2462         struct perf_data_file file = {
2463                 .path  = input_name,
2464                 .mode  = PERF_DATA_MODE_READ,
2465                 .force = trace->force,
2466         };
2467         struct perf_session *session;
2468         struct perf_evsel *evsel;
2469         int err = -1;
2470
2471         trace->tool.sample        = trace__process_sample;
2472         trace->tool.mmap          = perf_event__process_mmap;
2473         trace->tool.mmap2         = perf_event__process_mmap2;
2474         trace->tool.comm          = perf_event__process_comm;
2475         trace->tool.exit          = perf_event__process_exit;
2476         trace->tool.fork          = perf_event__process_fork;
2477         trace->tool.attr          = perf_event__process_attr;
2478         trace->tool.tracing_data = perf_event__process_tracing_data;
2479         trace->tool.build_id      = perf_event__process_build_id;
2480
2481         trace->tool.ordered_events = true;
2482         trace->tool.ordering_requires_timestamps = true;
2483
2484         /* add tid to output */
2485         trace->multiple_threads = true;
2486
2487         session = perf_session__new(&file, false, &trace->tool);
2488         if (session == NULL)
2489                 return -1;
2490
2491         if (symbol__init(&session->header.env) < 0)
2492                 goto out;
2493
2494         trace->host = &session->machines.host;
2495
2496         err = perf_session__set_tracepoints_handlers(session, handlers);
2497         if (err)
2498                 goto out;
2499
2500         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2501                                                      "raw_syscalls:sys_enter");
2502         /* older kernels have syscalls tp versus raw_syscalls */
2503         if (evsel == NULL)
2504                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2505                                                              "syscalls:sys_enter");
2506
2507         if (evsel &&
2508             (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2509             perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2510                 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2511                 goto out;
2512         }
2513
2514         evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2515                                                      "raw_syscalls:sys_exit");
2516         if (evsel == NULL)
2517                 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2518                                                              "syscalls:sys_exit");
2519         if (evsel &&
2520             (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2521             perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2522                 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2523                 goto out;
2524         }
2525
2526         evlist__for_each(session->evlist, evsel) {
2527                 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2528                     (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2529                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2530                      evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2531                         evsel->handler = trace__pgfault;
2532         }
2533
2534         err = parse_target_str(trace);
2535         if (err != 0)
2536                 goto out;
2537
2538         setup_pager();
2539
2540         err = perf_session__process_events(session);
2541         if (err)
2542                 pr_err("Failed to process events, error %d", err);
2543
2544         else if (trace->summary)
2545                 trace__fprintf_thread_summary(trace, trace->output);
2546
2547 out:
2548         perf_session__delete(session);
2549
2550         return err;
2551 }
2552
2553 static size_t trace__fprintf_threads_header(FILE *fp)
2554 {
2555         size_t printed;
2556
2557         printed  = fprintf(fp, "\n Summary of events:\n\n");
2558
2559         return printed;
2560 }
2561
2562 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2563         struct stats    *stats;
2564         double          msecs;
2565         int             syscall;
2566 )
2567 {
2568         struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2569         struct stats *stats = source->priv;
2570
2571         entry->syscall = source->i;
2572         entry->stats   = stats;
2573         entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2574 }
2575
2576 static size_t thread__dump_stats(struct thread_trace *ttrace,
2577                                  struct trace *trace, FILE *fp)
2578 {
2579         size_t printed = 0;
2580         struct syscall *sc;
2581         struct rb_node *nd;
2582         DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2583
2584         if (syscall_stats == NULL)
2585                 return 0;
2586
2587         printed += fprintf(fp, "\n");
2588
2589         printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2590         printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2591         printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2592
2593         resort_rb__for_each(nd, syscall_stats) {
2594                 struct stats *stats = syscall_stats_entry->stats;
2595                 if (stats) {
2596                         double min = (double)(stats->min) / NSEC_PER_MSEC;
2597                         double max = (double)(stats->max) / NSEC_PER_MSEC;
2598                         double avg = avg_stats(stats);
2599                         double pct;
2600                         u64 n = (u64) stats->n;
2601
2602                         pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2603                         avg /= NSEC_PER_MSEC;
2604
2605                         sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2606                         printed += fprintf(fp, "   %-15s", sc->name);
2607                         printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2608                                            n, syscall_stats_entry->msecs, min, avg);
2609                         printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2610                 }
2611         }
2612
2613         resort_rb__delete(syscall_stats);
2614         printed += fprintf(fp, "\n\n");
2615
2616         return printed;
2617 }
2618
2619 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2620 {
2621         size_t printed = 0;
2622         struct thread_trace *ttrace = thread__priv(thread);
2623         double ratio;
2624
2625         if (ttrace == NULL)
2626                 return 0;
2627
2628         ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2629
2630         printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2631         printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2632         printed += fprintf(fp, "%.1f%%", ratio);
2633         if (ttrace->pfmaj)
2634                 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2635         if (ttrace->pfmin)
2636                 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2637         if (trace->sched)
2638                 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2639         else if (fputc('\n', fp) != EOF)
2640                 ++printed;
2641
2642         printed += thread__dump_stats(ttrace, trace, fp);
2643
2644         return printed;
2645 }
2646
2647 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2648 {
2649         return ttrace ? ttrace->nr_events : 0;
2650 }
2651
2652 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2653         struct thread *thread;
2654 )
2655 {
2656         entry->thread = rb_entry(nd, struct thread, rb_node);
2657 }
2658
2659 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2660 {
2661         DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2662         size_t printed = trace__fprintf_threads_header(fp);
2663         struct rb_node *nd;
2664
2665         if (threads == NULL) {
2666                 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2667                 return 0;
2668         }
2669
2670         resort_rb__for_each(nd, threads)
2671                 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2672
2673         resort_rb__delete(threads);
2674
2675         return printed;
2676 }
2677
2678 static int trace__set_duration(const struct option *opt, const char *str,
2679                                int unset __maybe_unused)
2680 {
2681         struct trace *trace = opt->value;
2682
2683         trace->duration_filter = atof(str);
2684         return 0;
2685 }
2686
2687 static int trace__set_filter_pids(const struct option *opt, const char *str,
2688                                   int unset __maybe_unused)
2689 {
2690         int ret = -1;
2691         size_t i;
2692         struct trace *trace = opt->value;
2693         /*
2694          * FIXME: introduce a intarray class, plain parse csv and create a
2695          * { int nr, int entries[] } struct...
2696          */
2697         struct intlist *list = intlist__new(str);
2698
2699         if (list == NULL)
2700                 return -1;
2701
2702         i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2703         trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2704
2705         if (trace->filter_pids.entries == NULL)
2706                 goto out;
2707
2708         trace->filter_pids.entries[0] = getpid();
2709
2710         for (i = 1; i < trace->filter_pids.nr; ++i)
2711                 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2712
2713         intlist__delete(list);
2714         ret = 0;
2715 out:
2716         return ret;
2717 }
2718
2719 static int trace__open_output(struct trace *trace, const char *filename)
2720 {
2721         struct stat st;
2722
2723         if (!stat(filename, &st) && st.st_size) {
2724                 char oldname[PATH_MAX];
2725
2726                 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2727                 unlink(oldname);
2728                 rename(filename, oldname);
2729         }
2730
2731         trace->output = fopen(filename, "w");
2732
2733         return trace->output == NULL ? -errno : 0;
2734 }
2735
2736 static int parse_pagefaults(const struct option *opt, const char *str,
2737                             int unset __maybe_unused)
2738 {
2739         int *trace_pgfaults = opt->value;
2740
2741         if (strcmp(str, "all") == 0)
2742                 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2743         else if (strcmp(str, "maj") == 0)
2744                 *trace_pgfaults |= TRACE_PFMAJ;
2745         else if (strcmp(str, "min") == 0)
2746                 *trace_pgfaults |= TRACE_PFMIN;
2747         else
2748                 return -1;
2749
2750         return 0;
2751 }
2752
2753 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2754 {
2755         struct perf_evsel *evsel;
2756
2757         evlist__for_each(evlist, evsel)
2758                 evsel->handler = handler;
2759 }
2760
2761 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2762 {
2763         const char *trace_usage[] = {
2764                 "perf trace [<options>] [<command>]",
2765                 "perf trace [<options>] -- <command> [<options>]",
2766                 "perf trace record [<options>] [<command>]",
2767                 "perf trace record [<options>] -- <command> [<options>]",
2768                 NULL
2769         };
2770         struct trace trace = {
2771                 .syscalls = {
2772                         . max = -1,
2773                 },
2774                 .opts = {
2775                         .target = {
2776                                 .uid       = UINT_MAX,
2777                                 .uses_mmap = true,
2778                         },
2779                         .user_freq     = UINT_MAX,
2780                         .user_interval = ULLONG_MAX,
2781                         .no_buffering  = true,
2782                         .mmap_pages    = UINT_MAX,
2783                         .proc_map_timeout  = 500,
2784                 },
2785                 .output = stderr,
2786                 .show_comm = true,
2787                 .trace_syscalls = true,
2788                 .kernel_syscallchains = false,
2789                 .max_stack = UINT_MAX,
2790         };
2791         const char *output_name = NULL;
2792         const char *ev_qualifier_str = NULL;
2793         const struct option trace_options[] = {
2794         OPT_CALLBACK(0, "event", &trace.evlist, "event",
2795                      "event selector. use 'perf list' to list available events",
2796                      parse_events_option),
2797         OPT_BOOLEAN(0, "comm", &trace.show_comm,
2798                     "show the thread COMM next to its id"),
2799         OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2800         OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2801         OPT_STRING('o', "output", &output_name, "file", "output file name"),
2802         OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2803         OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2804                     "trace events on existing process id"),
2805         OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2806                     "trace events on existing thread id"),
2807         OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2808                      "pids to filter (by the kernel)", trace__set_filter_pids),
2809         OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2810                     "system-wide collection from all CPUs"),
2811         OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2812                     "list of cpus to monitor"),
2813         OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2814                     "child tasks do not inherit counters"),
2815         OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2816                      "number of mmap data pages",
2817                      perf_evlist__parse_mmap_pages),
2818         OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2819                    "user to profile"),
2820         OPT_CALLBACK(0, "duration", &trace, "float",
2821                      "show only events with duration > N.M ms",
2822                      trace__set_duration),
2823         OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2824         OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2825         OPT_BOOLEAN('T', "time", &trace.full_time,
2826                     "Show full timestamp, not time relative to first start"),
2827         OPT_BOOLEAN('s', "summary", &trace.summary_only,
2828                     "Show only syscall summary with statistics"),
2829         OPT_BOOLEAN('S', "with-summary", &trace.summary,
2830                     "Show all syscalls and summary with statistics"),
2831         OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2832                      "Trace pagefaults", parse_pagefaults, "maj"),
2833         OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2834         OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2835         OPT_CALLBACK(0, "call-graph", &trace.opts,
2836                      "record_mode[,record_size]", record_callchain_help,
2837                      &record_parse_callchain_opt),
2838         OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2839                     "Show the kernel callchains on the syscall exit path"),
2840         OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2841                      "Set the minimum stack depth when parsing the callchain, "
2842                      "anything below the specified depth will be ignored."),
2843         OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2844                      "Set the maximum stack depth when parsing the callchain, "
2845                      "anything beyond the specified depth will be ignored. "
2846                      "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2847         OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2848                         "per thread proc mmap processing timeout in ms"),
2849         OPT_END()
2850         };
2851         bool __maybe_unused max_stack_user_set = true;
2852         bool mmap_pages_user_set = true;
2853         const char * const trace_subcommands[] = { "record", NULL };
2854         int err;
2855         char bf[BUFSIZ];
2856
2857         signal(SIGSEGV, sighandler_dump_stack);
2858         signal(SIGFPE, sighandler_dump_stack);
2859
2860         trace.evlist = perf_evlist__new();
2861         trace.sctbl = syscalltbl__new();
2862
2863         if (trace.evlist == NULL || trace.sctbl == NULL) {
2864                 pr_err("Not enough memory to run!\n");
2865                 err = -ENOMEM;
2866                 goto out;
2867         }
2868
2869         argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2870                                  trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2871
2872         err = bpf__setup_stdout(trace.evlist);
2873         if (err) {
2874                 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2875                 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2876                 goto out;
2877         }
2878
2879         err = -1;
2880
2881         if (trace.trace_pgfaults) {
2882                 trace.opts.sample_address = true;
2883                 trace.opts.sample_time = true;
2884         }
2885
2886         if (trace.opts.mmap_pages == UINT_MAX)
2887                 mmap_pages_user_set = false;
2888
2889         if (trace.max_stack == UINT_MAX) {
2890                 trace.max_stack = sysctl_perf_event_max_stack;
2891                 max_stack_user_set = false;
2892         }
2893
2894 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2895         if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled)
2896                 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2897 #endif
2898
2899         if (callchain_param.enabled) {
2900                 if (!mmap_pages_user_set && geteuid() == 0)
2901                         trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2902
2903                 symbol_conf.use_callchain = true;
2904         }
2905
2906         if (trace.evlist->nr_entries > 0)
2907                 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2908
2909         if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2910                 return trace__record(&trace, argc-1, &argv[1]);
2911
2912         /* summary_only implies summary option, but don't overwrite summary if set */
2913         if (trace.summary_only)
2914                 trace.summary = trace.summary_only;
2915
2916         if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2917             trace.evlist->nr_entries == 0 /* Was --events used? */) {
2918                 pr_err("Please specify something to trace.\n");
2919                 return -1;
2920         }
2921
2922         if (!trace.trace_syscalls && ev_qualifier_str) {
2923                 pr_err("The -e option can't be used with --no-syscalls.\n");
2924                 goto out;
2925         }
2926
2927         if (output_name != NULL) {
2928                 err = trace__open_output(&trace, output_name);
2929                 if (err < 0) {
2930                         perror("failed to create output file");
2931                         goto out;
2932                 }
2933         }
2934
2935         trace.open_id = syscalltbl__id(trace.sctbl, "open");
2936
2937         if (ev_qualifier_str != NULL) {
2938                 const char *s = ev_qualifier_str;
2939                 struct strlist_config slist_config = {
2940                         .dirname = system_path(STRACE_GROUPS_DIR),
2941                 };
2942
2943                 trace.not_ev_qualifier = *s == '!';
2944                 if (trace.not_ev_qualifier)
2945                         ++s;
2946                 trace.ev_qualifier = strlist__new(s, &slist_config);
2947                 if (trace.ev_qualifier == NULL) {
2948                         fputs("Not enough memory to parse event qualifier",
2949                               trace.output);
2950                         err = -ENOMEM;
2951                         goto out_close;
2952                 }
2953
2954                 err = trace__validate_ev_qualifier(&trace);
2955                 if (err)
2956                         goto out_close;
2957         }
2958
2959         err = target__validate(&trace.opts.target);
2960         if (err) {
2961                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2962                 fprintf(trace.output, "%s", bf);
2963                 goto out_close;
2964         }
2965
2966         err = target__parse_uid(&trace.opts.target);
2967         if (err) {
2968                 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2969                 fprintf(trace.output, "%s", bf);
2970                 goto out_close;
2971         }
2972
2973         if (!argc && target__none(&trace.opts.target))
2974                 trace.opts.target.system_wide = true;
2975
2976         if (input_name)
2977                 err = trace__replay(&trace);
2978         else
2979                 err = trace__run(&trace, argc, argv);
2980
2981 out_close:
2982         if (output_name != NULL)
2983                 fclose(trace.output);
2984 out:
2985         return err;
2986 }