perf hists browser: Dynamically change verbosity level
[linux-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
6810fc91 27#include "util/session.h"
752fde44 28#include "util/thread.h"
4b6ab94e 29#include <subcmd/parse-options.h>
2ae3a312 30#include "util/strlist.h"
bdc89661 31#include "util/intlist.h"
514f1c67 32#include "util/thread_map.h"
bf2575c1 33#include "util/stat.h"
97978b3e 34#include "trace-event.h"
9aca7f17 35#include "util/parse-events.h"
ba504235 36#include "util/bpf-loader.h"
566a0885 37#include "callchain.h"
fd0db102 38#include "syscalltbl.h"
96c14451 39#include "rb_resort.h"
514f1c67 40
fd0db102 41#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 42#include <stdlib.h>
8dd2a131 43#include <linux/err.h>
997bba8c
ACM
44#include <linux/filter.h>
45#include <linux/audit.h>
39878d49 46#include <linux/random.h>
c6d4a494 47#include <linux/stringify.h>
bd48c63e 48#include <linux/time64.h>
514f1c67 49
c188e7ac
ACM
50#ifndef O_CLOEXEC
51# define O_CLOEXEC 02000000
52#endif
53
d1d438a3
ACM
54struct trace {
55 struct perf_tool tool;
fd0db102 56 struct syscalltbl *sctbl;
d1d438a3
ACM
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
77 struct intlist *tid_list;
78 struct intlist *pid_list;
79 struct {
80 size_t nr;
81 pid_t *entries;
82 } filter_pids;
83 double duration_filter;
84 double runtime_ms;
85 struct {
86 u64 vfs_getname,
87 proc_getname;
88 } stats;
c6d4a494 89 unsigned int max_stack;
5cf9c84e 90 unsigned int min_stack;
d1d438a3
ACM
91 bool not_ev_qualifier;
92 bool live;
93 bool full_time;
94 bool sched;
95 bool multiple_threads;
96 bool summary;
97 bool summary_only;
98 bool show_comm;
99 bool show_tool_stats;
100 bool trace_syscalls;
44621819 101 bool kernel_syscallchains;
d1d438a3
ACM
102 bool force;
103 bool vfs_getname;
104 int trace_pgfaults;
fd0db102 105 int open_id;
d1d438a3 106};
a1c2552d 107
77170988
ACM
108struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 };
114};
115
116#define TP_UINT_FIELD(bits) \
117static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118{ \
55d43bca
DA
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
77170988
ACM
122}
123
124TP_UINT_FIELD(8);
125TP_UINT_FIELD(16);
126TP_UINT_FIELD(32);
127TP_UINT_FIELD(64);
128
129#define TP_UINT_FIELD__SWAPPED(bits) \
130static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131{ \
55d43bca
DA
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
134 return bswap_##bits(value);\
135}
136
137TP_UINT_FIELD__SWAPPED(16);
138TP_UINT_FIELD__SWAPPED(32);
139TP_UINT_FIELD__SWAPPED(64);
140
141static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
144{
145 field->offset = format_field->offset;
146
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
162 }
163
164 return 0;
165}
166
167static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168{
169 return sample->raw_data + field->offset;
170}
171
172static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173{
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
177}
178
179struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
183 };
184};
185
186static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
189{
190 struct format_field *format_field = perf_evsel__field(evsel, name);
191
192 if (format_field == NULL)
193 return -1;
194
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
196}
197
198#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
202static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
205{
206 struct format_field *format_field = perf_evsel__field(evsel, name);
207
208 if (format_field == NULL)
209 return -1;
210
211 return tp_field__init_ptr(field, format_field);
212}
213
214#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
218static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219{
04662523 220 zfree(&evsel->priv);
77170988
ACM
221 perf_evsel__delete(evsel);
222}
223
96695d44
NK
224static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225{
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
230
231 evsel->handler = handler;
232 return 0;
233 }
234
235 return -ENOMEM;
236
237out_delete:
04662523 238 zfree(&evsel->priv);
96695d44
NK
239 return -ENOENT;
240}
241
ef503831 242static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 243{
ef503831 244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 245
9aca7f17 246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 247 if (IS_ERR(evsel))
9aca7f17
DA
248 evsel = perf_evsel__newtp("syscalls", direction);
249
8dd2a131
JO
250 if (IS_ERR(evsel))
251 return NULL;
252
253 if (perf_evsel__init_syscall_tp(evsel, handler))
254 goto out_delete;
77170988
ACM
255
256 return evsel;
257
258out_delete:
259 perf_evsel__delete_priv(evsel);
260 return NULL;
261}
262
263#define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 ({ struct syscall_tp *fields = evsel->priv; \
265 fields->name.integer(&fields->name, sample); })
266
267#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.pointer(&fields->name, sample); })
270
01533e97
ACM
271struct syscall_arg {
272 unsigned long val;
75b757ca
ACM
273 struct thread *thread;
274 struct trace *trace;
1f115cb7 275 void *parm;
01533e97
ACM
276 u8 idx;
277 u8 mask;
278};
279
1f115cb7 280struct strarray {
03e3adc9 281 int offset;
1f115cb7
ACM
282 int nr_entries;
283 const char **entries;
284};
285
286#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 .nr_entries = ARRAY_SIZE(array), \
288 .entries = array, \
289}
290
03e3adc9
ACM
291#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 .offset = off, \
293 .nr_entries = ARRAY_SIZE(array), \
294 .entries = array, \
295}
296
975b7c2f
ACM
297static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
1f115cb7 300{
1f115cb7 301 struct strarray *sa = arg->parm;
03e3adc9 302 int idx = arg->val - sa->offset;
1f115cb7
ACM
303
304 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 305 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
306
307 return scnprintf(bf, size, "%s", sa->entries[idx]);
308}
309
975b7c2f
ACM
310static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 struct syscall_arg *arg)
312{
313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314}
315
1f115cb7
ACM
316#define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
844ae5b4
ACM
318#if defined(__i386__) || defined(__x86_64__)
319/*
320 * FIXME: Make this available to all arches as soon as the ioctl beautifier
321 * gets rewritten to support all arches.
322 */
78645cf3
ACM
323static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 struct syscall_arg *arg)
325{
326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327}
328
329#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 330#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 331
75b757ca
ACM
332static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 struct syscall_arg *arg);
334
335#define SCA_FD syscall_arg__scnprintf_fd
336
48e1f91a
ACM
337#ifndef AT_FDCWD
338#define AT_FDCWD -100
339#endif
340
75b757ca
ACM
341static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
342 struct syscall_arg *arg)
343{
344 int fd = arg->val;
345
346 if (fd == AT_FDCWD)
347 return scnprintf(bf, size, "CWD");
348
349 return syscall_arg__scnprintf_fd(bf, size, arg);
350}
351
352#define SCA_FDAT syscall_arg__scnprintf_fd_at
353
354static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
355 struct syscall_arg *arg);
356
357#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
358
6e7eeb51 359static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 360 struct syscall_arg *arg)
13d4ff3e 361{
01533e97 362 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
363}
364
beccb2b5
ACM
365#define SCA_HEX syscall_arg__scnprintf_hex
366
a1c2552d
ACM
367static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
368 struct syscall_arg *arg)
369{
370 return scnprintf(bf, size, "%d", arg->val);
371}
372
373#define SCA_INT syscall_arg__scnprintf_int
374
729a7841
ACM
375static const char *bpf_cmd[] = {
376 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
377 "MAP_GET_NEXT_KEY", "PROG_LOAD",
378};
379static DEFINE_STRARRAY(bpf_cmd);
380
03e3adc9
ACM
381static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
382static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 383
1f115cb7
ACM
384static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
385static DEFINE_STRARRAY(itimers);
386
b62bee1b
ACM
387static const char *keyctl_options[] = {
388 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
389 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
390 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
391 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
392 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
393};
394static DEFINE_STRARRAY(keyctl_options);
395
efe6b882
ACM
396static const char *whences[] = { "SET", "CUR", "END",
397#ifdef SEEK_DATA
398"DATA",
399#endif
400#ifdef SEEK_HOLE
401"HOLE",
402#endif
403};
404static DEFINE_STRARRAY(whences);
f9da0b0c 405
80f587d5
ACM
406static const char *fcntl_cmds[] = {
407 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
408 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
409 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
410 "F_GETOWNER_UIDS",
411};
412static DEFINE_STRARRAY(fcntl_cmds);
413
c045bf02
ACM
414static const char *rlimit_resources[] = {
415 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
416 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
417 "RTTIME",
418};
419static DEFINE_STRARRAY(rlimit_resources);
420
eb5b1b14
ACM
421static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
422static DEFINE_STRARRAY(sighow);
423
4f8c1b74
DA
424static const char *clockid[] = {
425 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
426 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
427 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
428};
429static DEFINE_STRARRAY(clockid);
430
e10bce81
ACM
431static const char *socket_families[] = {
432 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
433 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
434 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
435 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
436 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
437 "ALG", "NFC", "VSOCK",
438};
439static DEFINE_STRARRAY(socket_families);
440
51108999
ACM
441static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
442 struct syscall_arg *arg)
443{
444 size_t printed = 0;
445 int mode = arg->val;
446
447 if (mode == F_OK) /* 0 */
448 return scnprintf(bf, size, "F");
449#define P_MODE(n) \
450 if (mode & n##_OK) { \
451 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
452 mode &= ~n##_OK; \
453 }
454
455 P_MODE(R);
456 P_MODE(W);
457 P_MODE(X);
458#undef P_MODE
459
460 if (mode)
461 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
462
463 return printed;
464}
465
466#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
467
f994592d
ACM
468static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
469 struct syscall_arg *arg);
470
471#define SCA_FILENAME syscall_arg__scnprintf_filename
472
46cce19b
ACM
473static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
474 struct syscall_arg *arg)
475{
476 int printed = 0, flags = arg->val;
477
478#define P_FLAG(n) \
479 if (flags & O_##n) { \
480 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
481 flags &= ~O_##n; \
482 }
483
484 P_FLAG(CLOEXEC);
485 P_FLAG(NONBLOCK);
486#undef P_FLAG
487
488 if (flags)
489 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
490
491 return printed;
492}
493
494#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
495
844ae5b4
ACM
496#if defined(__i386__) || defined(__x86_64__)
497/*
498 * FIXME: Make this available to all arches.
499 */
78645cf3
ACM
500#define TCGETS 0x5401
501
502static const char *tioctls[] = {
503 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
504 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
505 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
506 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
507 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
508 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
509 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
510 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
511 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
512 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
513 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
514 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
515 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
516 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
517 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
518};
519
520static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 521#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 522
a355a61e
ACM
523#ifndef GRND_NONBLOCK
524#define GRND_NONBLOCK 0x0001
525#endif
526#ifndef GRND_RANDOM
527#define GRND_RANDOM 0x0002
528#endif
529
39878d49
ACM
530static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
531 struct syscall_arg *arg)
532{
533 int printed = 0, flags = arg->val;
534
535#define P_FLAG(n) \
536 if (flags & GRND_##n) { \
537 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
538 flags &= ~GRND_##n; \
539 }
540
541 P_FLAG(RANDOM);
542 P_FLAG(NONBLOCK);
543#undef P_FLAG
544
545 if (flags)
546 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
547
548 return printed;
549}
550
551#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
552
453350dd
ACM
553#define STRARRAY(arg, name, array) \
554 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
555 .arg_parm = { [arg] = &strarray__##array, }
556
ea8dc3ce 557#include "trace/beauty/eventfd.c"
8bf382ce 558#include "trace/beauty/flock.c"
d5d71e86 559#include "trace/beauty/futex_op.c"
df4cb167 560#include "trace/beauty/mmap.c"
ba2f22cf 561#include "trace/beauty/mode_t.c"
a30e6259 562#include "trace/beauty/msg_flags.c"
8f48df69 563#include "trace/beauty/open_flags.c"
62de344e 564#include "trace/beauty/perf_event_open.c"
d5d71e86 565#include "trace/beauty/pid.c"
a3bca91f 566#include "trace/beauty/sched_policy.c"
f5cd95ea 567#include "trace/beauty/seccomp.c"
12199d8e 568#include "trace/beauty/signum.c"
bbf86c43 569#include "trace/beauty/socket_type.c"
7206b900 570#include "trace/beauty/waitid_options.c"
a3bca91f 571
514f1c67
ACM
572static struct syscall_fmt {
573 const char *name;
aec1930b 574 const char *alias;
01533e97 575 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 576 void *arg_parm[6];
514f1c67 577 bool errmsg;
11c8e39f 578 bool errpid;
514f1c67 579 bool timeout;
04b34729 580 bool hexret;
514f1c67 581} syscall_fmts[] = {
51108999 582 { .name = "access", .errmsg = true,
12f3ca4f 583 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 584 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 585 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
586 { .name = "brk", .hexret = true,
587 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
588 { .name = "chdir", .errmsg = true, },
589 { .name = "chmod", .errmsg = true, },
590 { .name = "chroot", .errmsg = true, },
4f8c1b74 591 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 592 { .name = "clone", .errpid = true, },
75b757ca 593 { .name = "close", .errmsg = true,
48000a1a 594 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 595 { .name = "connect", .errmsg = true, },
12f3ca4f 596 { .name = "creat", .errmsg = true, },
b6565c90
ACM
597 { .name = "dup", .errmsg = true, },
598 { .name = "dup2", .errmsg = true, },
599 { .name = "dup3", .errmsg = true, },
453350dd 600 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
601 { .name = "eventfd2", .errmsg = true,
602 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 603 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
604 { .name = "fadvise64", .errmsg = true, },
605 { .name = "fallocate", .errmsg = true, },
606 { .name = "fchdir", .errmsg = true, },
607 { .name = "fchmod", .errmsg = true, },
75b757ca 608 { .name = "fchmodat", .errmsg = true,
12f3ca4f 609 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 610 { .name = "fchown", .errmsg = true, },
75b757ca 611 { .name = "fchownat", .errmsg = true,
12f3ca4f 612 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 613 { .name = "fcntl", .errmsg = true,
b6565c90 614 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 615 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 616 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 617 { .name = "flock", .errmsg = true,
b6565c90
ACM
618 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
619 { .name = "fsetxattr", .errmsg = true, },
620 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 621 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
622 { .name = "fstatfs", .errmsg = true, },
623 { .name = "fsync", .errmsg = true, },
624 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
625 { .name = "futex", .errmsg = true,
626 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 627 { .name = "futimesat", .errmsg = true,
12f3ca4f 628 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
629 { .name = "getdents", .errmsg = true, },
630 { .name = "getdents64", .errmsg = true, },
453350dd 631 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 632 { .name = "getpid", .errpid = true, },
d1d438a3 633 { .name = "getpgid", .errpid = true, },
c65f1070 634 { .name = "getppid", .errpid = true, },
39878d49
ACM
635 { .name = "getrandom", .errmsg = true,
636 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 637 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
638 { .name = "getxattr", .errmsg = true, },
639 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 640 { .name = "ioctl", .errmsg = true,
b6565c90 641 .arg_scnprintf = {
844ae5b4
ACM
642#if defined(__i386__) || defined(__x86_64__)
643/*
644 * FIXME: Make this available to all arches.
645 */
78645cf3
ACM
646 [1] = SCA_STRHEXARRAY, /* cmd */
647 [2] = SCA_HEX, /* arg */ },
648 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
649#else
650 [2] = SCA_HEX, /* arg */ }, },
651#endif
b62bee1b 652 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
653 { .name = "kill", .errmsg = true,
654 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
655 { .name = "lchown", .errmsg = true, },
656 { .name = "lgetxattr", .errmsg = true, },
75b757ca 657 { .name = "linkat", .errmsg = true,
48000a1a 658 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
659 { .name = "listxattr", .errmsg = true, },
660 { .name = "llistxattr", .errmsg = true, },
661 { .name = "lremovexattr", .errmsg = true, },
75b757ca 662 { .name = "lseek", .errmsg = true,
b6565c90 663 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 664 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
665 { .name = "lsetxattr", .errmsg = true, },
666 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
667 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
668 { .name = "madvise", .errmsg = true,
669 .arg_scnprintf = { [0] = SCA_HEX, /* start */
670 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 671 { .name = "mkdir", .errmsg = true, },
75b757ca 672 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
673 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
674 { .name = "mknod", .errmsg = true, },
75b757ca 675 { .name = "mknodat", .errmsg = true,
12f3ca4f 676 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
677 { .name = "mlock", .errmsg = true,
678 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
679 { .name = "mlockall", .errmsg = true,
680 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 681 { .name = "mmap", .hexret = true,
ae685380 682 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 683 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 684 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 685 { .name = "mprotect", .errmsg = true,
ae685380
ACM
686 .arg_scnprintf = { [0] = SCA_HEX, /* start */
687 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
688 { .name = "mq_unlink", .errmsg = true,
689 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
690 { .name = "mremap", .hexret = true,
691 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 692 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 693 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
694 { .name = "munlock", .errmsg = true,
695 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
696 { .name = "munmap", .errmsg = true,
697 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 698 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 700 { .name = "newfstatat", .errmsg = true,
12f3ca4f 701 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 702 { .name = "open", .errmsg = true,
12f3ca4f 703 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 704 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
705 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
706 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 707 { .name = "openat", .errmsg = true,
75b757ca
ACM
708 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
709 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 710 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 711 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
712 [3] = SCA_FD, /* group_fd */
713 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
714 { .name = "pipe2", .errmsg = true,
715 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
716 { .name = "poll", .errmsg = true, .timeout = true, },
717 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
718 { .name = "pread", .errmsg = true, .alias = "pread64", },
719 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 720 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
721 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
722 { .name = "pwritev", .errmsg = true, },
723 { .name = "read", .errmsg = true, },
12f3ca4f 724 { .name = "readlink", .errmsg = true, },
75b757ca 725 { .name = "readlinkat", .errmsg = true,
12f3ca4f 726 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 727 { .name = "readv", .errmsg = true, },
b2cc99fd 728 { .name = "recvfrom", .errmsg = true,
b6565c90 729 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 730 { .name = "recvmmsg", .errmsg = true,
b6565c90 731 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 732 { .name = "recvmsg", .errmsg = true,
b6565c90 733 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 734 { .name = "removexattr", .errmsg = true, },
75b757ca 735 { .name = "renameat", .errmsg = true,
48000a1a 736 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 737 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
738 { .name = "rt_sigaction", .errmsg = true,
739 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 740 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
741 { .name = "rt_sigqueueinfo", .errmsg = true,
742 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
743 { .name = "rt_tgsigqueueinfo", .errmsg = true,
744 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
745 { .name = "sched_getattr", .errmsg = true, },
746 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
747 { .name = "sched_setscheduler", .errmsg = true,
748 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
749 { .name = "seccomp", .errmsg = true,
750 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
751 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 752 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 753 { .name = "sendmmsg", .errmsg = true,
b6565c90 754 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 755 { .name = "sendmsg", .errmsg = true,
b6565c90 756 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 757 { .name = "sendto", .errmsg = true,
b6565c90 758 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 759 { .name = "set_tid_address", .errpid = true, },
453350dd 760 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 761 { .name = "setpgid", .errmsg = true, },
453350dd 762 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 763 { .name = "setxattr", .errmsg = true, },
b6565c90 764 { .name = "shutdown", .errmsg = true, },
e10bce81 765 { .name = "socket", .errmsg = true,
a28b24b2
ACM
766 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
767 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
768 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
769 { .name = "socketpair", .errmsg = true,
770 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
771 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 772 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
773 { .name = "stat", .errmsg = true, .alias = "newstat", },
774 { .name = "statfs", .errmsg = true, },
34221118
ACM
775 { .name = "swapoff", .errmsg = true,
776 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
777 { .name = "swapon", .errmsg = true,
778 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 779 { .name = "symlinkat", .errmsg = true,
48000a1a 780 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
781 { .name = "tgkill", .errmsg = true,
782 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
783 { .name = "tkill", .errmsg = true,
784 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 785 { .name = "truncate", .errmsg = true, },
e5959683 786 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 787 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
788 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
789 { .name = "utime", .errmsg = true, },
75b757ca 790 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
792 { .name = "utimes", .errmsg = true, },
b6565c90 793 { .name = "vmsplice", .errmsg = true, },
11c8e39f 794 { .name = "wait4", .errpid = true,
7206b900 795 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 796 { .name = "waitid", .errpid = true,
7206b900 797 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
798 { .name = "write", .errmsg = true, },
799 { .name = "writev", .errmsg = true, },
514f1c67
ACM
800};
801
802static int syscall_fmt__cmp(const void *name, const void *fmtp)
803{
804 const struct syscall_fmt *fmt = fmtp;
805 return strcmp(name, fmt->name);
806}
807
808static struct syscall_fmt *syscall_fmt__find(const char *name)
809{
810 const int nmemb = ARRAY_SIZE(syscall_fmts);
811 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
812}
813
814struct syscall {
815 struct event_format *tp_format;
f208bd8d
ACM
816 int nr_args;
817 struct format_field *args;
514f1c67 818 const char *name;
5089f20e 819 bool is_exit;
514f1c67 820 struct syscall_fmt *fmt;
01533e97 821 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 822 void **arg_parm;
514f1c67
ACM
823};
824
60c907ab
ACM
825static size_t fprintf_duration(unsigned long t, FILE *fp)
826{
827 double duration = (double)t / NSEC_PER_MSEC;
828 size_t printed = fprintf(fp, "(");
829
830 if (duration >= 1.0)
831 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
832 else if (duration >= 0.01)
833 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
834 else
835 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 836 return printed + fprintf(fp, "): ");
60c907ab
ACM
837}
838
f994592d
ACM
839/**
840 * filename.ptr: The filename char pointer that will be vfs_getname'd
841 * filename.entry_str_pos: Where to insert the string translated from
842 * filename.ptr by the vfs_getname tracepoint/kprobe.
843 */
752fde44
ACM
844struct thread_trace {
845 u64 entry_time;
846 u64 exit_time;
847 bool entry_pending;
efd5745e 848 unsigned long nr_events;
a2ea67d7 849 unsigned long pfmaj, pfmin;
752fde44 850 char *entry_str;
1302d88e 851 double runtime_ms;
f994592d
ACM
852 struct {
853 unsigned long ptr;
7f4f8001
ACM
854 short int entry_str_pos;
855 bool pending_open;
856 unsigned int namelen;
857 char *name;
f994592d 858 } filename;
75b757ca
ACM
859 struct {
860 int max;
861 char **table;
862 } paths;
bf2575c1
DA
863
864 struct intlist *syscall_stats;
752fde44
ACM
865};
866
867static struct thread_trace *thread_trace__new(void)
868{
75b757ca
ACM
869 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
870
871 if (ttrace)
872 ttrace->paths.max = -1;
873
bf2575c1
DA
874 ttrace->syscall_stats = intlist__new(NULL);
875
75b757ca 876 return ttrace;
752fde44
ACM
877}
878
c24ff998 879static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 880{
efd5745e
ACM
881 struct thread_trace *ttrace;
882
752fde44
ACM
883 if (thread == NULL)
884 goto fail;
885
89dceb22
NK
886 if (thread__priv(thread) == NULL)
887 thread__set_priv(thread, thread_trace__new());
48000a1a 888
89dceb22 889 if (thread__priv(thread) == NULL)
752fde44
ACM
890 goto fail;
891
89dceb22 892 ttrace = thread__priv(thread);
efd5745e
ACM
893 ++ttrace->nr_events;
894
895 return ttrace;
752fde44 896fail:
c24ff998 897 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
898 "WARNING: not enough memory, dropping samples!\n");
899 return NULL;
900}
901
598d02c5
SF
902#define TRACE_PFMAJ (1 << 0)
903#define TRACE_PFMIN (1 << 1)
904
e4d44e83
ACM
905static const size_t trace__entry_str_size = 2048;
906
97119f37 907static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 908{
89dceb22 909 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
910
911 if (fd > ttrace->paths.max) {
912 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
913
914 if (npath == NULL)
915 return -1;
916
917 if (ttrace->paths.max != -1) {
918 memset(npath + ttrace->paths.max + 1, 0,
919 (fd - ttrace->paths.max) * sizeof(char *));
920 } else {
921 memset(npath, 0, (fd + 1) * sizeof(char *));
922 }
923
924 ttrace->paths.table = npath;
925 ttrace->paths.max = fd;
926 }
927
928 ttrace->paths.table[fd] = strdup(pathname);
929
930 return ttrace->paths.table[fd] != NULL ? 0 : -1;
931}
932
97119f37
ACM
933static int thread__read_fd_path(struct thread *thread, int fd)
934{
935 char linkname[PATH_MAX], pathname[PATH_MAX];
936 struct stat st;
937 int ret;
938
939 if (thread->pid_ == thread->tid) {
940 scnprintf(linkname, sizeof(linkname),
941 "/proc/%d/fd/%d", thread->pid_, fd);
942 } else {
943 scnprintf(linkname, sizeof(linkname),
944 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
945 }
946
947 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
948 return -1;
949
950 ret = readlink(linkname, pathname, sizeof(pathname));
951
952 if (ret < 0 || ret > st.st_size)
953 return -1;
954
955 pathname[ret] = '\0';
956 return trace__set_fd_pathname(thread, fd, pathname);
957}
958
c522739d
ACM
959static const char *thread__fd_path(struct thread *thread, int fd,
960 struct trace *trace)
75b757ca 961{
89dceb22 962 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
963
964 if (ttrace == NULL)
965 return NULL;
966
967 if (fd < 0)
968 return NULL;
969
cdcd1e6b 970 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
971 if (!trace->live)
972 return NULL;
973 ++trace->stats.proc_getname;
cdcd1e6b 974 if (thread__read_fd_path(thread, fd))
c522739d
ACM
975 return NULL;
976 }
75b757ca
ACM
977
978 return ttrace->paths.table[fd];
979}
980
981static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
982 struct syscall_arg *arg)
983{
984 int fd = arg->val;
985 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 986 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
987
988 if (path)
989 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
990
991 return printed;
992}
993
994static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
995 struct syscall_arg *arg)
996{
997 int fd = arg->val;
998 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 999 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1000
04662523
ACM
1001 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1002 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1003
1004 return printed;
1005}
1006
f994592d
ACM
1007static void thread__set_filename_pos(struct thread *thread, const char *bf,
1008 unsigned long ptr)
1009{
1010 struct thread_trace *ttrace = thread__priv(thread);
1011
1012 ttrace->filename.ptr = ptr;
1013 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1014}
1015
1016static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1017 struct syscall_arg *arg)
1018{
1019 unsigned long ptr = arg->val;
1020
1021 if (!arg->trace->vfs_getname)
1022 return scnprintf(bf, size, "%#x", ptr);
1023
1024 thread__set_filename_pos(arg->thread, bf, ptr);
1025 return 0;
1026}
1027
ae9ed035
ACM
1028static bool trace__filter_duration(struct trace *trace, double t)
1029{
1030 return t < (trace->duration_filter * NSEC_PER_MSEC);
1031}
1032
752fde44
ACM
1033static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1034{
1035 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1036
60c907ab 1037 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1038}
1039
f15eb531 1040static bool done = false;
ba209f85 1041static bool interrupted = false;
f15eb531 1042
ba209f85 1043static void sig_handler(int sig)
f15eb531
NK
1044{
1045 done = true;
ba209f85 1046 interrupted = sig == SIGINT;
f15eb531
NK
1047}
1048
752fde44 1049static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
60c907ab 1050 u64 duration, u64 tstamp, FILE *fp)
752fde44
ACM
1051{
1052 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
60c907ab 1053 printed += fprintf_duration(duration, fp);
752fde44 1054
50c95cbd
ACM
1055 if (trace->multiple_threads) {
1056 if (trace->show_comm)
1902efe7 1057 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1058 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1059 }
752fde44
ACM
1060
1061 return printed;
1062}
1063
c24ff998 1064static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1065 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1066{
1067 int ret = 0;
1068
1069 switch (event->header.type) {
1070 case PERF_RECORD_LOST:
c24ff998 1071 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1072 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1073 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1074 break;
752fde44 1075 default:
162f0bef 1076 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1077 break;
1078 }
1079
1080 return ret;
1081}
1082
c24ff998 1083static int trace__tool_process(struct perf_tool *tool,
752fde44 1084 union perf_event *event,
162f0bef 1085 struct perf_sample *sample,
752fde44
ACM
1086 struct machine *machine)
1087{
c24ff998 1088 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1089 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1090}
1091
caf8a0d0
ACM
1092static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1093{
1094 struct machine *machine = vmachine;
1095
1096 if (machine->kptr_restrict_warned)
1097 return NULL;
1098
1099 if (symbol_conf.kptr_restrict) {
1100 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1101 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1102 "Kernel samples will not be resolved.\n");
1103 machine->kptr_restrict_warned = true;
1104 return NULL;
1105 }
1106
1107 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1108}
1109
752fde44
ACM
1110static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1111{
0a7e6d1b 1112 int err = symbol__init(NULL);
752fde44
ACM
1113
1114 if (err)
1115 return err;
1116
8fb598e5
DA
1117 trace->host = machine__new_host();
1118 if (trace->host == NULL)
1119 return -ENOMEM;
752fde44 1120
caf8a0d0 1121 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1122 return -errno;
1123
a33fbd56 1124 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1125 evlist->threads, trace__tool_process, false,
1126 trace->opts.proc_map_timeout);
752fde44
ACM
1127 if (err)
1128 symbol__exit();
1129
1130 return err;
1131}
1132
13d4ff3e
ACM
1133static int syscall__set_arg_fmts(struct syscall *sc)
1134{
1135 struct format_field *field;
b6565c90 1136 int idx = 0, len;
13d4ff3e 1137
f208bd8d 1138 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1139 if (sc->arg_scnprintf == NULL)
1140 return -1;
1141
1f115cb7
ACM
1142 if (sc->fmt)
1143 sc->arg_parm = sc->fmt->arg_parm;
1144
f208bd8d 1145 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1146 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1147 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1148 else if (strcmp(field->type, "const char *") == 0 &&
1149 (strcmp(field->name, "filename") == 0 ||
1150 strcmp(field->name, "path") == 0 ||
1151 strcmp(field->name, "pathname") == 0))
1152 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1153 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1154 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1155 else if (strcmp(field->type, "pid_t") == 0)
1156 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1157 else if (strcmp(field->type, "umode_t") == 0)
1158 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1159 else if ((strcmp(field->type, "int") == 0 ||
1160 strcmp(field->type, "unsigned int") == 0 ||
1161 strcmp(field->type, "long") == 0) &&
1162 (len = strlen(field->name)) >= 2 &&
1163 strcmp(field->name + len - 2, "fd") == 0) {
1164 /*
1165 * /sys/kernel/tracing/events/syscalls/sys_enter*
1166 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1167 * 65 int
1168 * 23 unsigned int
1169 * 7 unsigned long
1170 */
1171 sc->arg_scnprintf[idx] = SCA_FD;
1172 }
13d4ff3e
ACM
1173 ++idx;
1174 }
1175
1176 return 0;
1177}
1178
514f1c67
ACM
1179static int trace__read_syscall_info(struct trace *trace, int id)
1180{
1181 char tp_name[128];
1182 struct syscall *sc;
fd0db102 1183 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1184
1185 if (name == NULL)
1186 return -1;
514f1c67
ACM
1187
1188 if (id > trace->syscalls.max) {
1189 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1190
1191 if (nsyscalls == NULL)
1192 return -1;
1193
1194 if (trace->syscalls.max != -1) {
1195 memset(nsyscalls + trace->syscalls.max + 1, 0,
1196 (id - trace->syscalls.max) * sizeof(*sc));
1197 } else {
1198 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1199 }
1200
1201 trace->syscalls.table = nsyscalls;
1202 trace->syscalls.max = id;
1203 }
1204
1205 sc = trace->syscalls.table + id;
3a531260 1206 sc->name = name;
2ae3a312 1207
3a531260 1208 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1209
aec1930b 1210 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1211 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1212
8dd2a131 1213 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1214 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1215 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1216 }
514f1c67 1217
8dd2a131 1218 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1219 return -1;
1220
f208bd8d
ACM
1221 sc->args = sc->tp_format->format.fields;
1222 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1223 /*
1224 * We need to check and discard the first variable '__syscall_nr'
1225 * or 'nr' that mean the syscall number. It is needless here.
1226 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1227 */
1228 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1229 sc->args = sc->args->next;
1230 --sc->nr_args;
1231 }
1232
5089f20e
ACM
1233 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1234
13d4ff3e 1235 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1236}
1237
d0cc439b
ACM
1238static int trace__validate_ev_qualifier(struct trace *trace)
1239{
8b3ce757 1240 int err = 0, i;
d0cc439b
ACM
1241 struct str_node *pos;
1242
8b3ce757
ACM
1243 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1244 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1245 sizeof(trace->ev_qualifier_ids.entries[0]));
1246
1247 if (trace->ev_qualifier_ids.entries == NULL) {
1248 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1249 trace->output);
1250 err = -EINVAL;
1251 goto out;
1252 }
1253
1254 i = 0;
1255
602a1f4d 1256 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1257 const char *sc = pos->s;
fd0db102 1258 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1259
8b3ce757 1260 if (id < 0) {
d0cc439b
ACM
1261 if (err == 0) {
1262 fputs("Error:\tInvalid syscall ", trace->output);
1263 err = -EINVAL;
1264 } else {
1265 fputs(", ", trace->output);
1266 }
1267
1268 fputs(sc, trace->output);
1269 }
8b3ce757
ACM
1270
1271 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1272 }
1273
1274 if (err < 0) {
1275 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1276 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1277 zfree(&trace->ev_qualifier_ids.entries);
1278 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1279 }
8b3ce757 1280out:
d0cc439b
ACM
1281 return err;
1282}
1283
55d43bca
DA
1284/*
1285 * args is to be interpreted as a series of longs but we need to handle
1286 * 8-byte unaligned accesses. args points to raw_data within the event
1287 * and raw_data is guaranteed to be 8-byte unaligned because it is
1288 * preceded by raw_size which is a u32. So we need to copy args to a temp
1289 * variable to read it. Most notably this avoids extended load instructions
1290 * on unaligned addresses
1291 */
1292
752fde44 1293static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1294 unsigned char *args, struct trace *trace,
75b757ca 1295 struct thread *thread)
514f1c67 1296{
514f1c67 1297 size_t printed = 0;
55d43bca
DA
1298 unsigned char *p;
1299 unsigned long val;
514f1c67 1300
f208bd8d 1301 if (sc->args != NULL) {
514f1c67 1302 struct format_field *field;
01533e97
ACM
1303 u8 bit = 1;
1304 struct syscall_arg arg = {
75b757ca
ACM
1305 .idx = 0,
1306 .mask = 0,
1307 .trace = trace,
1308 .thread = thread,
01533e97 1309 };
6e7eeb51 1310
f208bd8d 1311 for (field = sc->args; field;
01533e97
ACM
1312 field = field->next, ++arg.idx, bit <<= 1) {
1313 if (arg.mask & bit)
6e7eeb51 1314 continue;
55d43bca
DA
1315
1316 /* special care for unaligned accesses */
1317 p = args + sizeof(unsigned long) * arg.idx;
1318 memcpy(&val, p, sizeof(val));
1319
4aa58232
ACM
1320 /*
1321 * Suppress this argument if its value is zero and
1322 * and we don't have a string associated in an
1323 * strarray for it.
1324 */
55d43bca 1325 if (val == 0 &&
4aa58232
ACM
1326 !(sc->arg_scnprintf &&
1327 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1328 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1329 continue;
1330
752fde44 1331 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1332 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1333 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1334 arg.val = val;
1f115cb7
ACM
1335 if (sc->arg_parm)
1336 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1337 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1338 size - printed, &arg);
6e7eeb51 1339 } else {
13d4ff3e 1340 printed += scnprintf(bf + printed, size - printed,
55d43bca 1341 "%ld", val);
6e7eeb51 1342 }
514f1c67 1343 }
4c4d6e51
ACM
1344 } else if (IS_ERR(sc->tp_format)) {
1345 /*
1346 * If we managed to read the tracepoint /format file, then we
1347 * may end up not having any args, like with gettid(), so only
1348 * print the raw args when we didn't manage to read it.
1349 */
01533e97
ACM
1350 int i = 0;
1351
514f1c67 1352 while (i < 6) {
55d43bca
DA
1353 /* special care for unaligned accesses */
1354 p = args + sizeof(unsigned long) * i;
1355 memcpy(&val, p, sizeof(val));
752fde44
ACM
1356 printed += scnprintf(bf + printed, size - printed,
1357 "%sarg%d: %ld",
55d43bca 1358 printed ? ", " : "", i, val);
514f1c67
ACM
1359 ++i;
1360 }
1361 }
1362
1363 return printed;
1364}
1365
ba3d7dee 1366typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1367 union perf_event *event,
ba3d7dee
ACM
1368 struct perf_sample *sample);
1369
1370static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1371 struct perf_evsel *evsel, int id)
ba3d7dee 1372{
ba3d7dee
ACM
1373
1374 if (id < 0) {
adaa18bf
ACM
1375
1376 /*
1377 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1378 * before that, leaving at a higher verbosity level till that is
1379 * explained. Reproduced with plain ftrace with:
1380 *
1381 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1382 * grep "NR -1 " /t/trace_pipe
1383 *
1384 * After generating some load on the machine.
1385 */
1386 if (verbose > 1) {
1387 static u64 n;
1388 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1389 id, perf_evsel__name(evsel), ++n);
1390 }
ba3d7dee
ACM
1391 return NULL;
1392 }
1393
1394 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1395 trace__read_syscall_info(trace, id))
1396 goto out_cant_read;
1397
1398 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1399 goto out_cant_read;
1400
1401 return &trace->syscalls.table[id];
1402
1403out_cant_read:
7c304ee0
ACM
1404 if (verbose) {
1405 fprintf(trace->output, "Problems reading syscall %d", id);
1406 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1407 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1408 fputs(" information\n", trace->output);
1409 }
ba3d7dee
ACM
1410 return NULL;
1411}
1412
bf2575c1
DA
1413static void thread__update_stats(struct thread_trace *ttrace,
1414 int id, struct perf_sample *sample)
1415{
1416 struct int_node *inode;
1417 struct stats *stats;
1418 u64 duration = 0;
1419
1420 inode = intlist__findnew(ttrace->syscall_stats, id);
1421 if (inode == NULL)
1422 return;
1423
1424 stats = inode->priv;
1425 if (stats == NULL) {
1426 stats = malloc(sizeof(struct stats));
1427 if (stats == NULL)
1428 return;
1429 init_stats(stats);
1430 inode->priv = stats;
1431 }
1432
1433 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1434 duration = sample->time - ttrace->entry_time;
1435
1436 update_stats(stats, duration);
1437}
1438
e596663e
ACM
1439static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1440{
1441 struct thread_trace *ttrace;
1442 u64 duration;
1443 size_t printed;
1444
1445 if (trace->current == NULL)
1446 return 0;
1447
1448 ttrace = thread__priv(trace->current);
1449
1450 if (!ttrace->entry_pending)
1451 return 0;
1452
1453 duration = sample->time - ttrace->entry_time;
1454
1455 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1456 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1457 ttrace->entry_pending = false;
1458
1459 return printed;
1460}
1461
ba3d7dee 1462static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1463 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1464 struct perf_sample *sample)
1465{
752fde44 1466 char *msg;
ba3d7dee 1467 void *args;
752fde44 1468 size_t printed = 0;
2ae3a312 1469 struct thread *thread;
b91fc39f 1470 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1471 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1472 struct thread_trace *ttrace;
1473
1474 if (sc == NULL)
1475 return -1;
ba3d7dee 1476
8fb598e5 1477 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1478 ttrace = thread__trace(thread, trace->output);
2ae3a312 1479 if (ttrace == NULL)
b91fc39f 1480 goto out_put;
ba3d7dee 1481
77170988 1482 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1483
1484 if (ttrace->entry_str == NULL) {
e4d44e83 1485 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1486 if (!ttrace->entry_str)
b91fc39f 1487 goto out_put;
752fde44
ACM
1488 }
1489
5cf9c84e 1490 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1491 trace__printf_interrupted_entry(trace, sample);
e596663e 1492
752fde44
ACM
1493 ttrace->entry_time = sample->time;
1494 msg = ttrace->entry_str;
e4d44e83 1495 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1496
e4d44e83 1497 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1498 args, trace, thread);
752fde44 1499
5089f20e 1500 if (sc->is_exit) {
5cf9c84e 1501 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
c24ff998 1502 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
c008f78f 1503 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1504 }
7f4f8001 1505 } else {
752fde44 1506 ttrace->entry_pending = true;
7f4f8001
ACM
1507 /* See trace__vfs_getname & trace__sys_exit */
1508 ttrace->filename.pending_open = false;
1509 }
ba3d7dee 1510
f3b623b8
ACM
1511 if (trace->current != thread) {
1512 thread__put(trace->current);
1513 trace->current = thread__get(thread);
1514 }
b91fc39f
ACM
1515 err = 0;
1516out_put:
1517 thread__put(thread);
1518 return err;
ba3d7dee
ACM
1519}
1520
5cf9c84e
ACM
1521static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1522 struct perf_sample *sample,
1523 struct callchain_cursor *cursor)
202ff968
ACM
1524{
1525 struct addr_location al;
5cf9c84e
ACM
1526
1527 if (machine__resolve(trace->host, &al, sample) < 0 ||
1528 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1529 return -1;
1530
1531 return 0;
1532}
1533
1534static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1535{
202ff968 1536 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1537 const unsigned int print_opts = EVSEL__PRINT_SYM |
1538 EVSEL__PRINT_DSO |
1539 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1540
d327e60c 1541 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1542}
1543
ba3d7dee 1544static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1545 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1546 struct perf_sample *sample)
1547{
2c82c3ad 1548 long ret;
60c907ab 1549 u64 duration = 0;
2ae3a312 1550 struct thread *thread;
5cf9c84e 1551 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1552 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1553 struct thread_trace *ttrace;
1554
1555 if (sc == NULL)
1556 return -1;
ba3d7dee 1557
8fb598e5 1558 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1559 ttrace = thread__trace(thread, trace->output);
2ae3a312 1560 if (ttrace == NULL)
b91fc39f 1561 goto out_put;
ba3d7dee 1562
bf2575c1
DA
1563 if (trace->summary)
1564 thread__update_stats(ttrace, id, sample);
1565
77170988 1566 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1567
fd0db102 1568 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1569 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1570 ttrace->filename.pending_open = false;
c522739d
ACM
1571 ++trace->stats.vfs_getname;
1572 }
1573
752fde44
ACM
1574 ttrace->exit_time = sample->time;
1575
ae9ed035 1576 if (ttrace->entry_time) {
60c907ab 1577 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1578 if (trace__filter_duration(trace, duration))
1579 goto out;
1580 } else if (trace->duration_filter)
1581 goto out;
60c907ab 1582
5cf9c84e
ACM
1583 if (sample->callchain) {
1584 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1585 if (callchain_ret == 0) {
1586 if (callchain_cursor.nr < trace->min_stack)
1587 goto out;
1588 callchain_ret = 1;
1589 }
1590 }
1591
fd2eabaf
DA
1592 if (trace->summary_only)
1593 goto out;
1594
c24ff998 1595 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
752fde44
ACM
1596
1597 if (ttrace->entry_pending) {
c24ff998 1598 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1599 } else {
c24ff998
ACM
1600 fprintf(trace->output, " ... [");
1601 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1602 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1603 }
1604
da3c9a44
ACM
1605 if (sc->fmt == NULL) {
1606signed_print:
2c82c3ad 1607 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1608 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1609 char bf[STRERR_BUFSIZE];
c8b5f2c9 1610 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1611 *e = audit_errno_to_name(-ret);
1612
c24ff998 1613 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1614 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1615 fprintf(trace->output, ") = 0 Timeout");
04b34729 1616 else if (sc->fmt->hexret)
2c82c3ad 1617 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1618 else if (sc->fmt->errpid) {
1619 struct thread *child = machine__find_thread(trace->host, ret, ret);
1620
1621 if (child != NULL) {
1622 fprintf(trace->output, ") = %ld", ret);
1623 if (child->comm_set)
1624 fprintf(trace->output, " (%s)", thread__comm_str(child));
1625 thread__put(child);
1626 }
1627 } else
da3c9a44 1628 goto signed_print;
ba3d7dee 1629
c24ff998 1630 fputc('\n', trace->output);
566a0885 1631
5cf9c84e
ACM
1632 if (callchain_ret > 0)
1633 trace__fprintf_callchain(trace, sample);
1634 else if (callchain_ret < 0)
1635 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1636out:
752fde44 1637 ttrace->entry_pending = false;
b91fc39f
ACM
1638 err = 0;
1639out_put:
1640 thread__put(thread);
1641 return err;
ba3d7dee
ACM
1642}
1643
c522739d 1644static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1645 union perf_event *event __maybe_unused,
c522739d
ACM
1646 struct perf_sample *sample)
1647{
f994592d
ACM
1648 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1649 struct thread_trace *ttrace;
1650 size_t filename_len, entry_str_len, to_move;
1651 ssize_t remaining_space;
1652 char *pos;
7f4f8001 1653 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1654
1655 if (!thread)
1656 goto out;
1657
1658 ttrace = thread__priv(thread);
1659 if (!ttrace)
1660 goto out;
1661
7f4f8001
ACM
1662 filename_len = strlen(filename);
1663
1664 if (ttrace->filename.namelen < filename_len) {
1665 char *f = realloc(ttrace->filename.name, filename_len + 1);
1666
1667 if (f == NULL)
1668 goto out;
1669
1670 ttrace->filename.namelen = filename_len;
1671 ttrace->filename.name = f;
1672 }
1673
1674 strcpy(ttrace->filename.name, filename);
1675 ttrace->filename.pending_open = true;
1676
f994592d
ACM
1677 if (!ttrace->filename.ptr)
1678 goto out;
1679
1680 entry_str_len = strlen(ttrace->entry_str);
1681 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1682 if (remaining_space <= 0)
1683 goto out;
1684
f994592d
ACM
1685 if (filename_len > (size_t)remaining_space) {
1686 filename += filename_len - remaining_space;
1687 filename_len = remaining_space;
1688 }
1689
1690 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1691 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1692 memmove(pos + filename_len, pos, to_move);
1693 memcpy(pos, filename, filename_len);
1694
1695 ttrace->filename.ptr = 0;
1696 ttrace->filename.entry_str_pos = 0;
1697out:
c522739d
ACM
1698 return 0;
1699}
1700
1302d88e 1701static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1702 union perf_event *event __maybe_unused,
1302d88e
ACM
1703 struct perf_sample *sample)
1704{
1705 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1706 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1707 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1708 sample->pid,
1709 sample->tid);
c24ff998 1710 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1711
1712 if (ttrace == NULL)
1713 goto out_dump;
1714
1715 ttrace->runtime_ms += runtime_ms;
1716 trace->runtime_ms += runtime_ms;
b91fc39f 1717 thread__put(thread);
1302d88e
ACM
1718 return 0;
1719
1720out_dump:
c24ff998 1721 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1722 evsel->name,
1723 perf_evsel__strval(evsel, sample, "comm"),
1724 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1725 runtime,
1726 perf_evsel__intval(evsel, sample, "vruntime"));
b91fc39f 1727 thread__put(thread);
1302d88e
ACM
1728 return 0;
1729}
1730
1d6c9407
WN
1731static void bpf_output__printer(enum binary_printer_ops op,
1732 unsigned int val, void *extra)
1733{
1734 FILE *output = extra;
1735 unsigned char ch = (unsigned char)val;
1736
1737 switch (op) {
1738 case BINARY_PRINT_CHAR_DATA:
1739 fprintf(output, "%c", isprint(ch) ? ch : '.');
1740 break;
1741 case BINARY_PRINT_DATA_BEGIN:
1742 case BINARY_PRINT_LINE_BEGIN:
1743 case BINARY_PRINT_ADDR:
1744 case BINARY_PRINT_NUM_DATA:
1745 case BINARY_PRINT_NUM_PAD:
1746 case BINARY_PRINT_SEP:
1747 case BINARY_PRINT_CHAR_PAD:
1748 case BINARY_PRINT_LINE_END:
1749 case BINARY_PRINT_DATA_END:
1750 default:
1751 break;
1752 }
1753}
1754
1755static void bpf_output__fprintf(struct trace *trace,
1756 struct perf_sample *sample)
1757{
1758 print_binary(sample->raw_data, sample->raw_size, 8,
1759 bpf_output__printer, trace->output);
1760}
1761
14a052df
ACM
1762static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1763 union perf_event *event __maybe_unused,
1764 struct perf_sample *sample)
1765{
7ad35615
ACM
1766 int callchain_ret = 0;
1767
1768 if (sample->callchain) {
1769 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1770 if (callchain_ret == 0) {
1771 if (callchain_cursor.nr < trace->min_stack)
1772 goto out;
1773 callchain_ret = 1;
1774 }
1775 }
1776
14a052df
ACM
1777 trace__printf_interrupted_entry(trace, sample);
1778 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1779
1780 if (trace->trace_syscalls)
1781 fprintf(trace->output, "( ): ");
1782
1783 fprintf(trace->output, "%s:", evsel->name);
14a052df 1784
1d6c9407
WN
1785 if (perf_evsel__is_bpf_output(evsel)) {
1786 bpf_output__fprintf(trace, sample);
1787 } else if (evsel->tp_format) {
14a052df
ACM
1788 event_format__fprintf(evsel->tp_format, sample->cpu,
1789 sample->raw_data, sample->raw_size,
1790 trace->output);
1791 }
1792
1793 fprintf(trace->output, ")\n");
202ff968 1794
7ad35615
ACM
1795 if (callchain_ret > 0)
1796 trace__fprintf_callchain(trace, sample);
1797 else if (callchain_ret < 0)
1798 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1799out:
14a052df
ACM
1800 return 0;
1801}
1802
598d02c5
SF
1803static void print_location(FILE *f, struct perf_sample *sample,
1804 struct addr_location *al,
1805 bool print_dso, bool print_sym)
1806{
1807
1808 if ((verbose || print_dso) && al->map)
1809 fprintf(f, "%s@", al->map->dso->long_name);
1810
1811 if ((verbose || print_sym) && al->sym)
4414a3c5 1812 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1813 al->addr - al->sym->start);
1814 else if (al->map)
4414a3c5 1815 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1816 else
4414a3c5 1817 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1818}
1819
1820static int trace__pgfault(struct trace *trace,
1821 struct perf_evsel *evsel,
473398a2 1822 union perf_event *event __maybe_unused,
598d02c5
SF
1823 struct perf_sample *sample)
1824{
1825 struct thread *thread;
598d02c5
SF
1826 struct addr_location al;
1827 char map_type = 'd';
a2ea67d7 1828 struct thread_trace *ttrace;
b91fc39f 1829 int err = -1;
1df54290 1830 int callchain_ret = 0;
598d02c5
SF
1831
1832 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1833
1834 if (sample->callchain) {
1835 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1836 if (callchain_ret == 0) {
1837 if (callchain_cursor.nr < trace->min_stack)
1838 goto out_put;
1839 callchain_ret = 1;
1840 }
1841 }
1842
a2ea67d7
SF
1843 ttrace = thread__trace(thread, trace->output);
1844 if (ttrace == NULL)
b91fc39f 1845 goto out_put;
a2ea67d7
SF
1846
1847 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1848 ttrace->pfmaj++;
1849 else
1850 ttrace->pfmin++;
1851
1852 if (trace->summary_only)
b91fc39f 1853 goto out;
598d02c5 1854
473398a2 1855 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1856 sample->ip, &al);
1857
1858 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1859
1860 fprintf(trace->output, "%sfault [",
1861 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1862 "maj" : "min");
1863
1864 print_location(trace->output, sample, &al, false, true);
1865
1866 fprintf(trace->output, "] => ");
1867
473398a2 1868 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1869 sample->addr, &al);
1870
1871 if (!al.map) {
473398a2 1872 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1873 MAP__FUNCTION, sample->addr, &al);
1874
1875 if (al.map)
1876 map_type = 'x';
1877 else
1878 map_type = '?';
1879 }
1880
1881 print_location(trace->output, sample, &al, true, false);
1882
1883 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1884
1df54290
ACM
1885 if (callchain_ret > 0)
1886 trace__fprintf_callchain(trace, sample);
1887 else if (callchain_ret < 0)
1888 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1889out:
1890 err = 0;
1891out_put:
1892 thread__put(thread);
1893 return err;
598d02c5
SF
1894}
1895
bdc89661
DA
1896static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1897{
1898 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1899 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1900 return false;
1901
1902 if (trace->pid_list || trace->tid_list)
1903 return true;
1904
1905 return false;
1906}
1907
e6001980 1908static void trace__set_base_time(struct trace *trace,
8a07a809 1909 struct perf_evsel *evsel,
e6001980
ACM
1910 struct perf_sample *sample)
1911{
8a07a809
ACM
1912 /*
1913 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1914 * and don't use sample->time unconditionally, we may end up having
1915 * some other event in the future without PERF_SAMPLE_TIME for good
1916 * reason, i.e. we may not be interested in its timestamps, just in
1917 * it taking place, picking some piece of information when it
1918 * appears in our event stream (vfs_getname comes to mind).
1919 */
1920 if (trace->base_time == 0 && !trace->full_time &&
1921 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1922 trace->base_time = sample->time;
1923}
1924
6810fc91 1925static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1926 union perf_event *event,
6810fc91
DA
1927 struct perf_sample *sample,
1928 struct perf_evsel *evsel,
1929 struct machine *machine __maybe_unused)
1930{
1931 struct trace *trace = container_of(tool, struct trace, tool);
1932 int err = 0;
1933
744a9719 1934 tracepoint_handler handler = evsel->handler;
6810fc91 1935
bdc89661
DA
1936 if (skip_sample(trace, sample))
1937 return 0;
1938
e6001980 1939 trace__set_base_time(trace, evsel, sample);
6810fc91 1940
3160565f
DA
1941 if (handler) {
1942 ++trace->nr_events;
0c82adcf 1943 handler(trace, evsel, event, sample);
3160565f 1944 }
6810fc91
DA
1945
1946 return err;
1947}
1948
bdc89661
DA
1949static int parse_target_str(struct trace *trace)
1950{
1951 if (trace->opts.target.pid) {
1952 trace->pid_list = intlist__new(trace->opts.target.pid);
1953 if (trace->pid_list == NULL) {
1954 pr_err("Error parsing process id string\n");
1955 return -EINVAL;
1956 }
1957 }
1958
1959 if (trace->opts.target.tid) {
1960 trace->tid_list = intlist__new(trace->opts.target.tid);
1961 if (trace->tid_list == NULL) {
1962 pr_err("Error parsing thread id string\n");
1963 return -EINVAL;
1964 }
1965 }
1966
1967 return 0;
1968}
1969
1e28fe0a 1970static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1971{
1972 unsigned int rec_argc, i, j;
1973 const char **rec_argv;
1974 const char * const record_args[] = {
1975 "record",
1976 "-R",
1977 "-m", "1024",
1978 "-c", "1",
5e2485b1
DA
1979 };
1980
1e28fe0a
SF
1981 const char * const sc_args[] = { "-e", };
1982 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1983 const char * const majpf_args[] = { "-e", "major-faults" };
1984 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1985 const char * const minpf_args[] = { "-e", "minor-faults" };
1986 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1987
9aca7f17 1988 /* +1 is for the event string below */
1e28fe0a
SF
1989 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1990 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1991 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1992
1993 if (rec_argv == NULL)
1994 return -ENOMEM;
1995
1e28fe0a 1996 j = 0;
5e2485b1 1997 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1998 rec_argv[j++] = record_args[i];
1999
e281a960
SF
2000 if (trace->trace_syscalls) {
2001 for (i = 0; i < sc_args_nr; i++)
2002 rec_argv[j++] = sc_args[i];
2003
2004 /* event string may be different for older kernels - e.g., RHEL6 */
2005 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2006 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2007 else if (is_valid_tracepoint("syscalls:sys_enter"))
2008 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2009 else {
2010 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2011 return -1;
2012 }
9aca7f17 2013 }
9aca7f17 2014
1e28fe0a
SF
2015 if (trace->trace_pgfaults & TRACE_PFMAJ)
2016 for (i = 0; i < majpf_args_nr; i++)
2017 rec_argv[j++] = majpf_args[i];
2018
2019 if (trace->trace_pgfaults & TRACE_PFMIN)
2020 for (i = 0; i < minpf_args_nr; i++)
2021 rec_argv[j++] = minpf_args[i];
2022
2023 for (i = 0; i < (unsigned int)argc; i++)
2024 rec_argv[j++] = argv[i];
5e2485b1 2025
1e28fe0a 2026 return cmd_record(j, rec_argv, NULL);
5e2485b1
DA
2027}
2028
bf2575c1
DA
2029static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2030
08c98776 2031static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2032{
ef503831 2033 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2034
2035 if (IS_ERR(evsel))
08c98776 2036 return false;
c522739d
ACM
2037
2038 if (perf_evsel__field(evsel, "pathname") == NULL) {
2039 perf_evsel__delete(evsel);
08c98776 2040 return false;
c522739d
ACM
2041 }
2042
744a9719 2043 evsel->handler = trace__vfs_getname;
c522739d 2044 perf_evlist__add(evlist, evsel);
08c98776 2045 return true;
c522739d
ACM
2046}
2047
0ae537cb 2048static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2049{
2050 struct perf_evsel *evsel;
2051 struct perf_event_attr attr = {
2052 .type = PERF_TYPE_SOFTWARE,
2053 .mmap_data = 1,
598d02c5
SF
2054 };
2055
2056 attr.config = config;
0524798c 2057 attr.sample_period = 1;
598d02c5
SF
2058
2059 event_attr_init(&attr);
2060
2061 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2062 if (evsel)
2063 evsel->handler = trace__pgfault;
598d02c5 2064
0ae537cb 2065 return evsel;
598d02c5
SF
2066}
2067
ddbb1b13
ACM
2068static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2069{
2070 const u32 type = event->header.type;
2071 struct perf_evsel *evsel;
2072
ddbb1b13
ACM
2073 if (type != PERF_RECORD_SAMPLE) {
2074 trace__process_event(trace, trace->host, event, sample);
2075 return;
2076 }
2077
2078 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2079 if (evsel == NULL) {
2080 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2081 return;
2082 }
2083
e6001980
ACM
2084 trace__set_base_time(trace, evsel, sample);
2085
ddbb1b13
ACM
2086 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2087 sample->raw_data == NULL) {
2088 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2089 perf_evsel__name(evsel), sample->tid,
2090 sample->cpu, sample->raw_size);
2091 } else {
2092 tracepoint_handler handler = evsel->handler;
2093 handler(trace, evsel, event, sample);
2094 }
2095}
2096
c27366f0
ACM
2097static int trace__add_syscall_newtp(struct trace *trace)
2098{
2099 int ret = -1;
2100 struct perf_evlist *evlist = trace->evlist;
2101 struct perf_evsel *sys_enter, *sys_exit;
2102
2103 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2104 if (sys_enter == NULL)
2105 goto out;
2106
2107 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2108 goto out_delete_sys_enter;
2109
2110 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2111 if (sys_exit == NULL)
2112 goto out_delete_sys_enter;
2113
2114 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2115 goto out_delete_sys_exit;
2116
2117 perf_evlist__add(evlist, sys_enter);
2118 perf_evlist__add(evlist, sys_exit);
2119
2ddd5c04 2120 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2121 /*
2122 * We're interested only in the user space callchain
2123 * leading to the syscall, allow overriding that for
2124 * debugging reasons using --kernel_syscall_callchains
2125 */
2126 sys_exit->attr.exclude_callchain_kernel = 1;
2127 }
2128
8b3ce757
ACM
2129 trace->syscalls.events.sys_enter = sys_enter;
2130 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2131
2132 ret = 0;
2133out:
2134 return ret;
2135
2136out_delete_sys_exit:
2137 perf_evsel__delete_priv(sys_exit);
2138out_delete_sys_enter:
2139 perf_evsel__delete_priv(sys_enter);
2140 goto out;
2141}
2142
19867b61
ACM
2143static int trace__set_ev_qualifier_filter(struct trace *trace)
2144{
2145 int err = -1;
b15d0a4c 2146 struct perf_evsel *sys_exit;
19867b61
ACM
2147 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2148 trace->ev_qualifier_ids.nr,
2149 trace->ev_qualifier_ids.entries);
2150
2151 if (filter == NULL)
2152 goto out_enomem;
2153
3541c034
MP
2154 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2155 filter)) {
b15d0a4c 2156 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2157 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2158 }
19867b61
ACM
2159
2160 free(filter);
2161out:
2162 return err;
2163out_enomem:
2164 errno = ENOMEM;
2165 goto out;
2166}
c27366f0 2167
f15eb531 2168static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2169{
14a052df 2170 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2171 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2172 int err = -1, i;
2173 unsigned long before;
f15eb531 2174 const bool forks = argc > 0;
46fb3c21 2175 bool draining = false;
514f1c67 2176
75b757ca
ACM
2177 trace->live = true;
2178
c27366f0 2179 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2180 goto out_error_raw_syscalls;
514f1c67 2181
e281a960 2182 if (trace->trace_syscalls)
08c98776 2183 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2184
0ae537cb
ACM
2185 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2186 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2187 if (pgfault_maj == NULL)
2188 goto out_error_mem;
2189 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2190 }
598d02c5 2191
0ae537cb
ACM
2192 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2193 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2194 if (pgfault_min == NULL)
2195 goto out_error_mem;
2196 perf_evlist__add(evlist, pgfault_min);
2197 }
598d02c5 2198
1302d88e 2199 if (trace->sched &&
2cc990ba
ACM
2200 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2201 trace__sched_stat_runtime))
2202 goto out_error_sched_stat_runtime;
1302d88e 2203
514f1c67
ACM
2204 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2205 if (err < 0) {
c24ff998 2206 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2207 goto out_delete_evlist;
2208 }
2209
752fde44
ACM
2210 err = trace__symbols_init(trace, evlist);
2211 if (err < 0) {
c24ff998 2212 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2213 goto out_delete_evlist;
752fde44
ACM
2214 }
2215
fde54b78
ACM
2216 perf_evlist__config(evlist, &trace->opts, NULL);
2217
0c3a6ef4
ACM
2218 if (callchain_param.enabled) {
2219 bool use_identifier = false;
2220
2221 if (trace->syscalls.events.sys_exit) {
2222 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2223 &trace->opts, &callchain_param);
2224 use_identifier = true;
2225 }
2226
2227 if (pgfault_maj) {
2228 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2229 use_identifier = true;
2230 }
2231
2232 if (pgfault_min) {
2233 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2234 use_identifier = true;
2235 }
2236
2237 if (use_identifier) {
2238 /*
2239 * Now we have evsels with different sample_ids, use
2240 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2241 * from a fixed position in each ring buffer record.
2242 *
2243 * As of this the changeset introducing this comment, this
2244 * isn't strictly needed, as the fields that can come before
2245 * PERF_SAMPLE_ID are all used, but we'll probably disable
2246 * some of those for things like copying the payload of
2247 * pointer syscall arguments, and for vfs_getname we don't
2248 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2249 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2250 */
2251 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2252 perf_evlist__reset_sample_bit(evlist, ID);
2253 }
fde54b78 2254 }
514f1c67 2255
f15eb531
NK
2256 signal(SIGCHLD, sig_handler);
2257 signal(SIGINT, sig_handler);
2258
2259 if (forks) {
6ef73ec4 2260 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2261 argv, false, NULL);
f15eb531 2262 if (err < 0) {
c24ff998 2263 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2264 goto out_delete_evlist;
f15eb531
NK
2265 }
2266 }
2267
514f1c67 2268 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2269 if (err < 0)
2270 goto out_error_open;
514f1c67 2271
ba504235
WN
2272 err = bpf__apply_obj_config();
2273 if (err) {
2274 char errbuf[BUFSIZ];
2275
2276 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2277 pr_err("ERROR: Apply config to BPF failed: %s\n",
2278 errbuf);
2279 goto out_error_open;
2280 }
2281
241b057c
ACM
2282 /*
2283 * Better not use !target__has_task() here because we need to cover the
2284 * case where no threads were specified in the command line, but a
2285 * workload was, and in that case we will fill in the thread_map when
2286 * we fork the workload in perf_evlist__prepare_workload.
2287 */
f078c385
ACM
2288 if (trace->filter_pids.nr > 0)
2289 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2290 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2291 err = perf_evlist__set_filter_pid(evlist, getpid());
2292
94ad89bc
ACM
2293 if (err < 0)
2294 goto out_error_mem;
2295
19867b61
ACM
2296 if (trace->ev_qualifier_ids.nr > 0) {
2297 err = trace__set_ev_qualifier_filter(trace);
2298 if (err < 0)
2299 goto out_errno;
19867b61 2300
2e5e5f87
ACM
2301 pr_debug("event qualifier tracepoint filter: %s\n",
2302 trace->syscalls.events.sys_exit->filter);
2303 }
19867b61 2304
94ad89bc
ACM
2305 err = perf_evlist__apply_filters(evlist, &evsel);
2306 if (err < 0)
2307 goto out_error_apply_filters;
241b057c 2308
f885037e 2309 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2310 if (err < 0)
2311 goto out_error_mmap;
514f1c67 2312
cb24d01d
ACM
2313 if (!target__none(&trace->opts.target))
2314 perf_evlist__enable(evlist);
2315
f15eb531
NK
2316 if (forks)
2317 perf_evlist__start_workload(evlist);
2318
e13798c7 2319 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2320 evlist->threads->nr > 1 ||
2321 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2322again:
efd5745e 2323 before = trace->nr_events;
514f1c67
ACM
2324
2325 for (i = 0; i < evlist->nr_mmaps; i++) {
2326 union perf_event *event;
2327
2328 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2329 struct perf_sample sample;
514f1c67 2330
efd5745e 2331 ++trace->nr_events;
514f1c67 2332
514f1c67
ACM
2333 err = perf_evlist__parse_sample(evlist, event, &sample);
2334 if (err) {
c24ff998 2335 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2336 goto next_event;
514f1c67
ACM
2337 }
2338
ddbb1b13 2339 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2340next_event:
2341 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2342
ba209f85
ACM
2343 if (interrupted)
2344 goto out_disable;
02ac5421
ACM
2345
2346 if (done && !draining) {
2347 perf_evlist__disable(evlist);
2348 draining = true;
2349 }
514f1c67
ACM
2350 }
2351 }
2352
efd5745e 2353 if (trace->nr_events == before) {
ba209f85 2354 int timeout = done ? 100 : -1;
f15eb531 2355
46fb3c21
ACM
2356 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2357 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2358 draining = true;
2359
ba209f85 2360 goto again;
46fb3c21 2361 }
ba209f85
ACM
2362 } else {
2363 goto again;
f15eb531
NK
2364 }
2365
ba209f85 2366out_disable:
f3b623b8
ACM
2367 thread__zput(trace->current);
2368
ba209f85 2369 perf_evlist__disable(evlist);
514f1c67 2370
c522739d
ACM
2371 if (!err) {
2372 if (trace->summary)
2373 trace__fprintf_thread_summary(trace, trace->output);
2374
2375 if (trace->show_tool_stats) {
2376 fprintf(trace->output, "Stats:\n "
2377 " vfs_getname : %" PRIu64 "\n"
2378 " proc_getname: %" PRIu64 "\n",
2379 trace->stats.vfs_getname,
2380 trace->stats.proc_getname);
2381 }
2382 }
bf2575c1 2383
514f1c67
ACM
2384out_delete_evlist:
2385 perf_evlist__delete(evlist);
14a052df 2386 trace->evlist = NULL;
75b757ca 2387 trace->live = false;
514f1c67 2388 return err;
6ef068cb
ACM
2389{
2390 char errbuf[BUFSIZ];
a8f23d8f 2391
2cc990ba 2392out_error_sched_stat_runtime:
988bdb31 2393 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2394 goto out_error;
2395
801c67b0 2396out_error_raw_syscalls:
988bdb31 2397 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2398 goto out_error;
2399
e09b18d4
ACM
2400out_error_mmap:
2401 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2402 goto out_error;
2403
a8f23d8f
ACM
2404out_error_open:
2405 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2406
2407out_error:
6ef068cb 2408 fprintf(trace->output, "%s\n", errbuf);
87f91868 2409 goto out_delete_evlist;
94ad89bc
ACM
2410
2411out_error_apply_filters:
2412 fprintf(trace->output,
2413 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2414 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2415 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2416 goto out_delete_evlist;
514f1c67 2417}
5ed08dae
ACM
2418out_error_mem:
2419 fprintf(trace->output, "Not enough memory to run!\n");
2420 goto out_delete_evlist;
19867b61
ACM
2421
2422out_errno:
2423 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2424 goto out_delete_evlist;
a8f23d8f 2425}
514f1c67 2426
6810fc91
DA
2427static int trace__replay(struct trace *trace)
2428{
2429 const struct perf_evsel_str_handler handlers[] = {
c522739d 2430 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2431 };
f5fc1412
JO
2432 struct perf_data_file file = {
2433 .path = input_name,
2434 .mode = PERF_DATA_MODE_READ,
e366a6d8 2435 .force = trace->force,
f5fc1412 2436 };
6810fc91 2437 struct perf_session *session;
003824e8 2438 struct perf_evsel *evsel;
6810fc91
DA
2439 int err = -1;
2440
2441 trace->tool.sample = trace__process_sample;
2442 trace->tool.mmap = perf_event__process_mmap;
384c671e 2443 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2444 trace->tool.comm = perf_event__process_comm;
2445 trace->tool.exit = perf_event__process_exit;
2446 trace->tool.fork = perf_event__process_fork;
2447 trace->tool.attr = perf_event__process_attr;
2448 trace->tool.tracing_data = perf_event__process_tracing_data;
2449 trace->tool.build_id = perf_event__process_build_id;
2450
0a8cb85c 2451 trace->tool.ordered_events = true;
6810fc91
DA
2452 trace->tool.ordering_requires_timestamps = true;
2453
2454 /* add tid to output */
2455 trace->multiple_threads = true;
2456
f5fc1412 2457 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2458 if (session == NULL)
52e02834 2459 return -1;
6810fc91 2460
0a7e6d1b 2461 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2462 goto out;
2463
8fb598e5
DA
2464 trace->host = &session->machines.host;
2465
6810fc91
DA
2466 err = perf_session__set_tracepoints_handlers(session, handlers);
2467 if (err)
2468 goto out;
2469
003824e8
NK
2470 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2471 "raw_syscalls:sys_enter");
9aca7f17
DA
2472 /* older kernels have syscalls tp versus raw_syscalls */
2473 if (evsel == NULL)
2474 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2475 "syscalls:sys_enter");
003824e8 2476
e281a960
SF
2477 if (evsel &&
2478 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2479 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2480 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2481 goto out;
2482 }
2483
2484 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2485 "raw_syscalls:sys_exit");
9aca7f17
DA
2486 if (evsel == NULL)
2487 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2488 "syscalls:sys_exit");
e281a960
SF
2489 if (evsel &&
2490 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2491 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2492 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2493 goto out;
2494 }
2495
e5cadb93 2496 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2497 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2498 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2499 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2500 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2501 evsel->handler = trace__pgfault;
2502 }
2503
bdc89661
DA
2504 err = parse_target_str(trace);
2505 if (err != 0)
2506 goto out;
2507
6810fc91
DA
2508 setup_pager();
2509
b7b61cbe 2510 err = perf_session__process_events(session);
6810fc91
DA
2511 if (err)
2512 pr_err("Failed to process events, error %d", err);
2513
bf2575c1
DA
2514 else if (trace->summary)
2515 trace__fprintf_thread_summary(trace, trace->output);
2516
6810fc91
DA
2517out:
2518 perf_session__delete(session);
2519
2520 return err;
2521}
2522
1302d88e
ACM
2523static size_t trace__fprintf_threads_header(FILE *fp)
2524{
2525 size_t printed;
2526
99ff7150 2527 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2528
2529 return printed;
2530}
2531
b535d523
ACM
2532DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2533 struct stats *stats;
2534 double msecs;
2535 int syscall;
2536)
2537{
2538 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2539 struct stats *stats = source->priv;
2540
2541 entry->syscall = source->i;
2542 entry->stats = stats;
2543 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2544}
2545
bf2575c1
DA
2546static size_t thread__dump_stats(struct thread_trace *ttrace,
2547 struct trace *trace, FILE *fp)
2548{
bf2575c1
DA
2549 size_t printed = 0;
2550 struct syscall *sc;
b535d523
ACM
2551 struct rb_node *nd;
2552 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2553
b535d523 2554 if (syscall_stats == NULL)
bf2575c1
DA
2555 return 0;
2556
2557 printed += fprintf(fp, "\n");
2558
834fd46d
MW
2559 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2560 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2561 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2562
98a91837 2563 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2564 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2565 if (stats) {
2566 double min = (double)(stats->min) / NSEC_PER_MSEC;
2567 double max = (double)(stats->max) / NSEC_PER_MSEC;
2568 double avg = avg_stats(stats);
2569 double pct;
2570 u64 n = (u64) stats->n;
2571
2572 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2573 avg /= NSEC_PER_MSEC;
2574
b535d523 2575 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2576 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2577 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2578 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2579 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2580 }
bf2575c1
DA
2581 }
2582
b535d523 2583 resort_rb__delete(syscall_stats);
bf2575c1 2584 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2585
2586 return printed;
2587}
2588
96c14451 2589static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2590{
96c14451 2591 size_t printed = 0;
89dceb22 2592 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2593 double ratio;
2594
2595 if (ttrace == NULL)
2596 return 0;
2597
2598 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2599
15e65c69 2600 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2601 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2602 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2603 if (ttrace->pfmaj)
2604 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2605 if (ttrace->pfmin)
2606 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2607 if (trace->sched)
2608 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2609 else if (fputc('\n', fp) != EOF)
2610 ++printed;
2611
bf2575c1 2612 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2613
96c14451
ACM
2614 return printed;
2615}
896cbb56 2616
96c14451
ACM
2617static unsigned long thread__nr_events(struct thread_trace *ttrace)
2618{
2619 return ttrace ? ttrace->nr_events : 0;
2620}
2621
2622DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2623 struct thread *thread;
2624)
2625{
2626 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2627}
2628
1302d88e
ACM
2629static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2630{
96c14451
ACM
2631 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2632 size_t printed = trace__fprintf_threads_header(fp);
2633 struct rb_node *nd;
1302d88e 2634
96c14451
ACM
2635 if (threads == NULL) {
2636 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2637 return 0;
2638 }
2639
98a91837 2640 resort_rb__for_each_entry(nd, threads)
96c14451 2641 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2642
96c14451
ACM
2643 resort_rb__delete(threads);
2644
2645 return printed;
1302d88e
ACM
2646}
2647
ae9ed035
ACM
2648static int trace__set_duration(const struct option *opt, const char *str,
2649 int unset __maybe_unused)
2650{
2651 struct trace *trace = opt->value;
2652
2653 trace->duration_filter = atof(str);
2654 return 0;
2655}
2656
f078c385
ACM
2657static int trace__set_filter_pids(const struct option *opt, const char *str,
2658 int unset __maybe_unused)
2659{
2660 int ret = -1;
2661 size_t i;
2662 struct trace *trace = opt->value;
2663 /*
2664 * FIXME: introduce a intarray class, plain parse csv and create a
2665 * { int nr, int entries[] } struct...
2666 */
2667 struct intlist *list = intlist__new(str);
2668
2669 if (list == NULL)
2670 return -1;
2671
2672 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2673 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2674
2675 if (trace->filter_pids.entries == NULL)
2676 goto out;
2677
2678 trace->filter_pids.entries[0] = getpid();
2679
2680 for (i = 1; i < trace->filter_pids.nr; ++i)
2681 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2682
2683 intlist__delete(list);
2684 ret = 0;
2685out:
2686 return ret;
2687}
2688
c24ff998
ACM
2689static int trace__open_output(struct trace *trace, const char *filename)
2690{
2691 struct stat st;
2692
2693 if (!stat(filename, &st) && st.st_size) {
2694 char oldname[PATH_MAX];
2695
2696 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2697 unlink(oldname);
2698 rename(filename, oldname);
2699 }
2700
2701 trace->output = fopen(filename, "w");
2702
2703 return trace->output == NULL ? -errno : 0;
2704}
2705
598d02c5
SF
2706static int parse_pagefaults(const struct option *opt, const char *str,
2707 int unset __maybe_unused)
2708{
2709 int *trace_pgfaults = opt->value;
2710
2711 if (strcmp(str, "all") == 0)
2712 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2713 else if (strcmp(str, "maj") == 0)
2714 *trace_pgfaults |= TRACE_PFMAJ;
2715 else if (strcmp(str, "min") == 0)
2716 *trace_pgfaults |= TRACE_PFMIN;
2717 else
2718 return -1;
2719
2720 return 0;
2721}
2722
14a052df
ACM
2723static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2724{
2725 struct perf_evsel *evsel;
2726
e5cadb93 2727 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2728 evsel->handler = handler;
2729}
2730
514f1c67
ACM
2731int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2732{
6fdd9cb7 2733 const char *trace_usage[] = {
f15eb531
NK
2734 "perf trace [<options>] [<command>]",
2735 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2736 "perf trace record [<options>] [<command>]",
2737 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2738 NULL
2739 };
2740 struct trace trace = {
514f1c67
ACM
2741 .syscalls = {
2742 . max = -1,
2743 },
2744 .opts = {
2745 .target = {
2746 .uid = UINT_MAX,
2747 .uses_mmap = true,
2748 },
2749 .user_freq = UINT_MAX,
2750 .user_interval = ULLONG_MAX,
509051ea 2751 .no_buffering = true,
38d5447d 2752 .mmap_pages = UINT_MAX,
9d9cad76 2753 .proc_map_timeout = 500,
514f1c67 2754 },
007d66a0 2755 .output = stderr,
50c95cbd 2756 .show_comm = true,
e281a960 2757 .trace_syscalls = true,
44621819 2758 .kernel_syscallchains = false,
05614993 2759 .max_stack = UINT_MAX,
514f1c67 2760 };
c24ff998 2761 const char *output_name = NULL;
2ae3a312 2762 const char *ev_qualifier_str = NULL;
514f1c67 2763 const struct option trace_options[] = {
14a052df
ACM
2764 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2765 "event selector. use 'perf list' to list available events",
2766 parse_events_option),
50c95cbd
ACM
2767 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2768 "show the thread COMM next to its id"),
c522739d 2769 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
d303e85a 2770 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
c24ff998 2771 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2772 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2773 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2774 "trace events on existing process id"),
ac9be8ee 2775 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2776 "trace events on existing thread id"),
fa0e4ffe
ACM
2777 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2778 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2779 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2780 "system-wide collection from all CPUs"),
ac9be8ee 2781 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2782 "list of cpus to monitor"),
6810fc91 2783 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2784 "child tasks do not inherit counters"),
994a1f78
JO
2785 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2786 "number of mmap data pages",
2787 perf_evlist__parse_mmap_pages),
ac9be8ee 2788 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2789 "user to profile"),
ae9ed035
ACM
2790 OPT_CALLBACK(0, "duration", &trace, "float",
2791 "show only events with duration > N.M ms",
2792 trace__set_duration),
1302d88e 2793 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2794 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2795 OPT_BOOLEAN('T', "time", &trace.full_time,
2796 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2797 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2798 "Show only syscall summary with statistics"),
2799 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2800 "Show all syscalls and summary with statistics"),
598d02c5
SF
2801 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2802 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2803 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2804 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2805 OPT_CALLBACK(0, "call-graph", &trace.opts,
2806 "record_mode[,record_size]", record_callchain_help,
2807 &record_parse_callchain_opt),
44621819
ACM
2808 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2809 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2810 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2811 "Set the minimum stack depth when parsing the callchain, "
2812 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2813 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2814 "Set the maximum stack depth when parsing the callchain, "
2815 "anything beyond the specified depth will be ignored. "
4cb93446 2816 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2817 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2818 "per thread proc mmap processing timeout in ms"),
514f1c67
ACM
2819 OPT_END()
2820 };
ccd62a89 2821 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2822 bool mmap_pages_user_set = true;
6fdd9cb7 2823 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2824 int err;
32caf0d1 2825 char bf[BUFSIZ];
514f1c67 2826
4d08cb80
ACM
2827 signal(SIGSEGV, sighandler_dump_stack);
2828 signal(SIGFPE, sighandler_dump_stack);
2829
14a052df 2830 trace.evlist = perf_evlist__new();
fd0db102 2831 trace.sctbl = syscalltbl__new();
14a052df 2832
fd0db102 2833 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2834 pr_err("Not enough memory to run!\n");
ff8f695c 2835 err = -ENOMEM;
14a052df
ACM
2836 goto out;
2837 }
2838
6fdd9cb7
YS
2839 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2840 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2841
d7888573
WN
2842 err = bpf__setup_stdout(trace.evlist);
2843 if (err) {
2844 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2845 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2846 goto out;
2847 }
2848
59247e33
ACM
2849 err = -1;
2850
598d02c5
SF
2851 if (trace.trace_pgfaults) {
2852 trace.opts.sample_address = true;
2853 trace.opts.sample_time = true;
2854 }
2855
f3e459d1
ACM
2856 if (trace.opts.mmap_pages == UINT_MAX)
2857 mmap_pages_user_set = false;
2858
05614993 2859 if (trace.max_stack == UINT_MAX) {
fe176085 2860 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2861 max_stack_user_set = false;
2862 }
2863
2864#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2865 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2866 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2867#endif
2868
2ddd5c04 2869 if (callchain_param.enabled) {
f3e459d1
ACM
2870 if (!mmap_pages_user_set && geteuid() == 0)
2871 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2872
566a0885 2873 symbol_conf.use_callchain = true;
f3e459d1 2874 }
566a0885 2875
14a052df
ACM
2876 if (trace.evlist->nr_entries > 0)
2877 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2878
1e28fe0a
SF
2879 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2880 return trace__record(&trace, argc-1, &argv[1]);
2881
2882 /* summary_only implies summary option, but don't overwrite summary if set */
2883 if (trace.summary_only)
2884 trace.summary = trace.summary_only;
2885
726f3234
ACM
2886 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2887 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2888 pr_err("Please specify something to trace.\n");
2889 return -1;
2890 }
2891
59247e33
ACM
2892 if (!trace.trace_syscalls && ev_qualifier_str) {
2893 pr_err("The -e option can't be used with --no-syscalls.\n");
2894 goto out;
2895 }
2896
c24ff998
ACM
2897 if (output_name != NULL) {
2898 err = trace__open_output(&trace, output_name);
2899 if (err < 0) {
2900 perror("failed to create output file");
2901 goto out;
2902 }
2903 }
2904
fd0db102
ACM
2905 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2906
2ae3a312 2907 if (ev_qualifier_str != NULL) {
b059efdf 2908 const char *s = ev_qualifier_str;
005438a8
ACM
2909 struct strlist_config slist_config = {
2910 .dirname = system_path(STRACE_GROUPS_DIR),
2911 };
b059efdf
ACM
2912
2913 trace.not_ev_qualifier = *s == '!';
2914 if (trace.not_ev_qualifier)
2915 ++s;
005438a8 2916 trace.ev_qualifier = strlist__new(s, &slist_config);
2ae3a312 2917 if (trace.ev_qualifier == NULL) {
c24ff998
ACM
2918 fputs("Not enough memory to parse event qualifier",
2919 trace.output);
2920 err = -ENOMEM;
2921 goto out_close;
2ae3a312 2922 }
d0cc439b
ACM
2923
2924 err = trace__validate_ev_qualifier(&trace);
2925 if (err)
2926 goto out_close;
2ae3a312
ACM
2927 }
2928
602ad878 2929 err = target__validate(&trace.opts.target);
32caf0d1 2930 if (err) {
602ad878 2931 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2932 fprintf(trace.output, "%s", bf);
2933 goto out_close;
32caf0d1
NK
2934 }
2935
602ad878 2936 err = target__parse_uid(&trace.opts.target);
514f1c67 2937 if (err) {
602ad878 2938 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2939 fprintf(trace.output, "%s", bf);
2940 goto out_close;
514f1c67
ACM
2941 }
2942
602ad878 2943 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
2944 trace.opts.target.system_wide = true;
2945
6810fc91
DA
2946 if (input_name)
2947 err = trace__replay(&trace);
2948 else
2949 err = trace__run(&trace, argc, argv);
1302d88e 2950
c24ff998
ACM
2951out_close:
2952 if (output_name != NULL)
2953 fclose(trace.output);
2954out:
1302d88e 2955 return err;
514f1c67 2956}