Merge tag 'powerpc-4.8-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[linux-2.6-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
6810fc91 27#include "util/session.h"
752fde44 28#include "util/thread.h"
4b6ab94e 29#include <subcmd/parse-options.h>
2ae3a312 30#include "util/strlist.h"
bdc89661 31#include "util/intlist.h"
514f1c67 32#include "util/thread_map.h"
bf2575c1 33#include "util/stat.h"
97978b3e 34#include "trace-event.h"
9aca7f17 35#include "util/parse-events.h"
ba504235 36#include "util/bpf-loader.h"
566a0885 37#include "callchain.h"
fd0db102 38#include "syscalltbl.h"
96c14451 39#include "rb_resort.h"
514f1c67 40
fd0db102 41#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 42#include <stdlib.h>
8dd2a131 43#include <linux/err.h>
997bba8c
ACM
44#include <linux/filter.h>
45#include <linux/audit.h>
39878d49 46#include <linux/random.h>
c6d4a494 47#include <linux/stringify.h>
514f1c67 48
c188e7ac
ACM
49#ifndef O_CLOEXEC
50# define O_CLOEXEC 02000000
51#endif
52
d1d438a3
ACM
53struct trace {
54 struct perf_tool tool;
fd0db102 55 struct syscalltbl *sctbl;
d1d438a3
ACM
56 struct {
57 int max;
58 struct syscall *table;
59 struct {
60 struct perf_evsel *sys_enter,
61 *sys_exit;
62 } events;
63 } syscalls;
64 struct record_opts opts;
65 struct perf_evlist *evlist;
66 struct machine *host;
67 struct thread *current;
68 u64 base_time;
69 FILE *output;
70 unsigned long nr_events;
71 struct strlist *ev_qualifier;
72 struct {
73 size_t nr;
74 int *entries;
75 } ev_qualifier_ids;
76 struct intlist *tid_list;
77 struct intlist *pid_list;
78 struct {
79 size_t nr;
80 pid_t *entries;
81 } filter_pids;
82 double duration_filter;
83 double runtime_ms;
84 struct {
85 u64 vfs_getname,
86 proc_getname;
87 } stats;
c6d4a494 88 unsigned int max_stack;
5cf9c84e 89 unsigned int min_stack;
d1d438a3
ACM
90 bool not_ev_qualifier;
91 bool live;
92 bool full_time;
93 bool sched;
94 bool multiple_threads;
95 bool summary;
96 bool summary_only;
97 bool show_comm;
98 bool show_tool_stats;
99 bool trace_syscalls;
44621819 100 bool kernel_syscallchains;
d1d438a3
ACM
101 bool force;
102 bool vfs_getname;
103 int trace_pgfaults;
fd0db102 104 int open_id;
d1d438a3 105};
a1c2552d 106
77170988
ACM
107struct tp_field {
108 int offset;
109 union {
110 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
111 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
112 };
113};
114
115#define TP_UINT_FIELD(bits) \
116static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
117{ \
55d43bca
DA
118 u##bits value; \
119 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
120 return value; \
77170988
ACM
121}
122
123TP_UINT_FIELD(8);
124TP_UINT_FIELD(16);
125TP_UINT_FIELD(32);
126TP_UINT_FIELD(64);
127
128#define TP_UINT_FIELD__SWAPPED(bits) \
129static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
130{ \
55d43bca
DA
131 u##bits value; \
132 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
133 return bswap_##bits(value);\
134}
135
136TP_UINT_FIELD__SWAPPED(16);
137TP_UINT_FIELD__SWAPPED(32);
138TP_UINT_FIELD__SWAPPED(64);
139
140static int tp_field__init_uint(struct tp_field *field,
141 struct format_field *format_field,
142 bool needs_swap)
143{
144 field->offset = format_field->offset;
145
146 switch (format_field->size) {
147 case 1:
148 field->integer = tp_field__u8;
149 break;
150 case 2:
151 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
152 break;
153 case 4:
154 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
155 break;
156 case 8:
157 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
158 break;
159 default:
160 return -1;
161 }
162
163 return 0;
164}
165
166static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
167{
168 return sample->raw_data + field->offset;
169}
170
171static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
172{
173 field->offset = format_field->offset;
174 field->pointer = tp_field__ptr;
175 return 0;
176}
177
178struct syscall_tp {
179 struct tp_field id;
180 union {
181 struct tp_field args, ret;
182 };
183};
184
185static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
186 struct tp_field *field,
187 const char *name)
188{
189 struct format_field *format_field = perf_evsel__field(evsel, name);
190
191 if (format_field == NULL)
192 return -1;
193
194 return tp_field__init_uint(field, format_field, evsel->needs_swap);
195}
196
197#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
198 ({ struct syscall_tp *sc = evsel->priv;\
199 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
200
201static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
202 struct tp_field *field,
203 const char *name)
204{
205 struct format_field *format_field = perf_evsel__field(evsel, name);
206
207 if (format_field == NULL)
208 return -1;
209
210 return tp_field__init_ptr(field, format_field);
211}
212
213#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
216
217static void perf_evsel__delete_priv(struct perf_evsel *evsel)
218{
04662523 219 zfree(&evsel->priv);
77170988
ACM
220 perf_evsel__delete(evsel);
221}
222
96695d44
NK
223static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
224{
225 evsel->priv = malloc(sizeof(struct syscall_tp));
226 if (evsel->priv != NULL) {
227 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
228 goto out_delete;
229
230 evsel->handler = handler;
231 return 0;
232 }
233
234 return -ENOMEM;
235
236out_delete:
04662523 237 zfree(&evsel->priv);
96695d44
NK
238 return -ENOENT;
239}
240
ef503831 241static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 242{
ef503831 243 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 244
9aca7f17 245 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 246 if (IS_ERR(evsel))
9aca7f17
DA
247 evsel = perf_evsel__newtp("syscalls", direction);
248
8dd2a131
JO
249 if (IS_ERR(evsel))
250 return NULL;
251
252 if (perf_evsel__init_syscall_tp(evsel, handler))
253 goto out_delete;
77170988
ACM
254
255 return evsel;
256
257out_delete:
258 perf_evsel__delete_priv(evsel);
259 return NULL;
260}
261
262#define perf_evsel__sc_tp_uint(evsel, name, sample) \
263 ({ struct syscall_tp *fields = evsel->priv; \
264 fields->name.integer(&fields->name, sample); })
265
266#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
267 ({ struct syscall_tp *fields = evsel->priv; \
268 fields->name.pointer(&fields->name, sample); })
269
01533e97
ACM
270struct syscall_arg {
271 unsigned long val;
75b757ca
ACM
272 struct thread *thread;
273 struct trace *trace;
1f115cb7 274 void *parm;
01533e97
ACM
275 u8 idx;
276 u8 mask;
277};
278
1f115cb7 279struct strarray {
03e3adc9 280 int offset;
1f115cb7
ACM
281 int nr_entries;
282 const char **entries;
283};
284
285#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
286 .nr_entries = ARRAY_SIZE(array), \
287 .entries = array, \
288}
289
03e3adc9
ACM
290#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
291 .offset = off, \
292 .nr_entries = ARRAY_SIZE(array), \
293 .entries = array, \
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
1f115cb7 300 struct strarray *sa = arg->parm;
03e3adc9 301 int idx = arg->val - sa->offset;
1f115cb7
ACM
302
303 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 304 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
305
306 return scnprintf(bf, size, "%s", sa->entries[idx]);
307}
308
975b7c2f
ACM
309static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
310 struct syscall_arg *arg)
311{
312 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
313}
314
1f115cb7
ACM
315#define SCA_STRARRAY syscall_arg__scnprintf_strarray
316
844ae5b4
ACM
317#if defined(__i386__) || defined(__x86_64__)
318/*
319 * FIXME: Make this available to all arches as soon as the ioctl beautifier
320 * gets rewritten to support all arches.
321 */
78645cf3
ACM
322static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
323 struct syscall_arg *arg)
324{
325 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
326}
327
328#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 329#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 330
75b757ca
ACM
331static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
332 struct syscall_arg *arg);
333
334#define SCA_FD syscall_arg__scnprintf_fd
335
48e1f91a
ACM
336#ifndef AT_FDCWD
337#define AT_FDCWD -100
338#endif
339
75b757ca
ACM
340static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
341 struct syscall_arg *arg)
342{
343 int fd = arg->val;
344
345 if (fd == AT_FDCWD)
346 return scnprintf(bf, size, "CWD");
347
348 return syscall_arg__scnprintf_fd(bf, size, arg);
349}
350
351#define SCA_FDAT syscall_arg__scnprintf_fd_at
352
353static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
354 struct syscall_arg *arg);
355
356#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
357
6e7eeb51 358static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 359 struct syscall_arg *arg)
13d4ff3e 360{
01533e97 361 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
362}
363
beccb2b5
ACM
364#define SCA_HEX syscall_arg__scnprintf_hex
365
a1c2552d
ACM
366static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
367 struct syscall_arg *arg)
368{
369 return scnprintf(bf, size, "%d", arg->val);
370}
371
372#define SCA_INT syscall_arg__scnprintf_int
373
729a7841
ACM
374static const char *bpf_cmd[] = {
375 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
376 "MAP_GET_NEXT_KEY", "PROG_LOAD",
377};
378static DEFINE_STRARRAY(bpf_cmd);
379
03e3adc9
ACM
380static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
381static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 382
1f115cb7
ACM
383static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
384static DEFINE_STRARRAY(itimers);
385
b62bee1b
ACM
386static const char *keyctl_options[] = {
387 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
388 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
389 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
390 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
391 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
392};
393static DEFINE_STRARRAY(keyctl_options);
394
efe6b882
ACM
395static const char *whences[] = { "SET", "CUR", "END",
396#ifdef SEEK_DATA
397"DATA",
398#endif
399#ifdef SEEK_HOLE
400"HOLE",
401#endif
402};
403static DEFINE_STRARRAY(whences);
f9da0b0c 404
80f587d5
ACM
405static const char *fcntl_cmds[] = {
406 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
407 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
408 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
409 "F_GETOWNER_UIDS",
410};
411static DEFINE_STRARRAY(fcntl_cmds);
412
c045bf02
ACM
413static const char *rlimit_resources[] = {
414 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
415 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
416 "RTTIME",
417};
418static DEFINE_STRARRAY(rlimit_resources);
419
eb5b1b14
ACM
420static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
421static DEFINE_STRARRAY(sighow);
422
4f8c1b74
DA
423static const char *clockid[] = {
424 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
425 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
426 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
427};
428static DEFINE_STRARRAY(clockid);
429
e10bce81
ACM
430static const char *socket_families[] = {
431 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
432 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
433 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
434 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
435 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
436 "ALG", "NFC", "VSOCK",
437};
438static DEFINE_STRARRAY(socket_families);
439
51108999
ACM
440static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
441 struct syscall_arg *arg)
442{
443 size_t printed = 0;
444 int mode = arg->val;
445
446 if (mode == F_OK) /* 0 */
447 return scnprintf(bf, size, "F");
448#define P_MODE(n) \
449 if (mode & n##_OK) { \
450 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
451 mode &= ~n##_OK; \
452 }
453
454 P_MODE(R);
455 P_MODE(W);
456 P_MODE(X);
457#undef P_MODE
458
459 if (mode)
460 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
461
462 return printed;
463}
464
465#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
466
f994592d
ACM
467static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
468 struct syscall_arg *arg);
469
470#define SCA_FILENAME syscall_arg__scnprintf_filename
471
46cce19b
ACM
472static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
473 struct syscall_arg *arg)
474{
475 int printed = 0, flags = arg->val;
476
477#define P_FLAG(n) \
478 if (flags & O_##n) { \
479 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
480 flags &= ~O_##n; \
481 }
482
483 P_FLAG(CLOEXEC);
484 P_FLAG(NONBLOCK);
485#undef P_FLAG
486
487 if (flags)
488 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
489
490 return printed;
491}
492
493#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
494
844ae5b4
ACM
495#if defined(__i386__) || defined(__x86_64__)
496/*
497 * FIXME: Make this available to all arches.
498 */
78645cf3
ACM
499#define TCGETS 0x5401
500
501static const char *tioctls[] = {
502 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
503 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
504 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
505 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
506 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
507 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
508 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
509 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
510 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
511 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
512 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
513 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
514 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
515 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
516 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
517};
518
519static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 520#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 521
a355a61e
ACM
522#ifndef GRND_NONBLOCK
523#define GRND_NONBLOCK 0x0001
524#endif
525#ifndef GRND_RANDOM
526#define GRND_RANDOM 0x0002
527#endif
528
39878d49
ACM
529static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
530 struct syscall_arg *arg)
531{
532 int printed = 0, flags = arg->val;
533
534#define P_FLAG(n) \
535 if (flags & GRND_##n) { \
536 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
537 flags &= ~GRND_##n; \
538 }
539
540 P_FLAG(RANDOM);
541 P_FLAG(NONBLOCK);
542#undef P_FLAG
543
544 if (flags)
545 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
546
547 return printed;
548}
549
550#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
551
453350dd
ACM
552#define STRARRAY(arg, name, array) \
553 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
554 .arg_parm = { [arg] = &strarray__##array, }
555
ea8dc3ce 556#include "trace/beauty/eventfd.c"
8bf382ce 557#include "trace/beauty/flock.c"
d5d71e86 558#include "trace/beauty/futex_op.c"
df4cb167 559#include "trace/beauty/mmap.c"
ba2f22cf 560#include "trace/beauty/mode_t.c"
a30e6259 561#include "trace/beauty/msg_flags.c"
8f48df69 562#include "trace/beauty/open_flags.c"
62de344e 563#include "trace/beauty/perf_event_open.c"
d5d71e86 564#include "trace/beauty/pid.c"
a3bca91f 565#include "trace/beauty/sched_policy.c"
f5cd95ea 566#include "trace/beauty/seccomp.c"
12199d8e 567#include "trace/beauty/signum.c"
bbf86c43 568#include "trace/beauty/socket_type.c"
7206b900 569#include "trace/beauty/waitid_options.c"
a3bca91f 570
514f1c67
ACM
571static struct syscall_fmt {
572 const char *name;
aec1930b 573 const char *alias;
01533e97 574 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 575 void *arg_parm[6];
514f1c67 576 bool errmsg;
11c8e39f 577 bool errpid;
514f1c67 578 bool timeout;
04b34729 579 bool hexret;
514f1c67 580} syscall_fmts[] = {
51108999 581 { .name = "access", .errmsg = true,
12f3ca4f 582 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 583 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 584 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
585 { .name = "brk", .hexret = true,
586 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
587 { .name = "chdir", .errmsg = true, },
588 { .name = "chmod", .errmsg = true, },
589 { .name = "chroot", .errmsg = true, },
4f8c1b74 590 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 591 { .name = "clone", .errpid = true, },
75b757ca 592 { .name = "close", .errmsg = true,
48000a1a 593 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 594 { .name = "connect", .errmsg = true, },
12f3ca4f 595 { .name = "creat", .errmsg = true, },
b6565c90
ACM
596 { .name = "dup", .errmsg = true, },
597 { .name = "dup2", .errmsg = true, },
598 { .name = "dup3", .errmsg = true, },
453350dd 599 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
600 { .name = "eventfd2", .errmsg = true,
601 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 602 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
603 { .name = "fadvise64", .errmsg = true, },
604 { .name = "fallocate", .errmsg = true, },
605 { .name = "fchdir", .errmsg = true, },
606 { .name = "fchmod", .errmsg = true, },
75b757ca 607 { .name = "fchmodat", .errmsg = true,
12f3ca4f 608 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 609 { .name = "fchown", .errmsg = true, },
75b757ca 610 { .name = "fchownat", .errmsg = true,
12f3ca4f 611 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 612 { .name = "fcntl", .errmsg = true,
b6565c90 613 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 614 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 615 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 616 { .name = "flock", .errmsg = true,
b6565c90
ACM
617 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
618 { .name = "fsetxattr", .errmsg = true, },
619 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 620 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
621 { .name = "fstatfs", .errmsg = true, },
622 { .name = "fsync", .errmsg = true, },
623 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
624 { .name = "futex", .errmsg = true,
625 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 626 { .name = "futimesat", .errmsg = true,
12f3ca4f 627 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
628 { .name = "getdents", .errmsg = true, },
629 { .name = "getdents64", .errmsg = true, },
453350dd 630 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 631 { .name = "getpid", .errpid = true, },
d1d438a3 632 { .name = "getpgid", .errpid = true, },
c65f1070 633 { .name = "getppid", .errpid = true, },
39878d49
ACM
634 { .name = "getrandom", .errmsg = true,
635 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 636 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
637 { .name = "getxattr", .errmsg = true, },
638 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 639 { .name = "ioctl", .errmsg = true,
b6565c90 640 .arg_scnprintf = {
844ae5b4
ACM
641#if defined(__i386__) || defined(__x86_64__)
642/*
643 * FIXME: Make this available to all arches.
644 */
78645cf3
ACM
645 [1] = SCA_STRHEXARRAY, /* cmd */
646 [2] = SCA_HEX, /* arg */ },
647 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
648#else
649 [2] = SCA_HEX, /* arg */ }, },
650#endif
b62bee1b 651 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
652 { .name = "kill", .errmsg = true,
653 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
654 { .name = "lchown", .errmsg = true, },
655 { .name = "lgetxattr", .errmsg = true, },
75b757ca 656 { .name = "linkat", .errmsg = true,
48000a1a 657 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
658 { .name = "listxattr", .errmsg = true, },
659 { .name = "llistxattr", .errmsg = true, },
660 { .name = "lremovexattr", .errmsg = true, },
75b757ca 661 { .name = "lseek", .errmsg = true,
b6565c90 662 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 663 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
664 { .name = "lsetxattr", .errmsg = true, },
665 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
666 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
667 { .name = "madvise", .errmsg = true,
668 .arg_scnprintf = { [0] = SCA_HEX, /* start */
669 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 670 { .name = "mkdir", .errmsg = true, },
75b757ca 671 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
672 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
673 { .name = "mknod", .errmsg = true, },
75b757ca 674 { .name = "mknodat", .errmsg = true,
12f3ca4f 675 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
676 { .name = "mlock", .errmsg = true,
677 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
678 { .name = "mlockall", .errmsg = true,
679 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 680 { .name = "mmap", .hexret = true,
ae685380 681 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 682 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 683 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 684 { .name = "mprotect", .errmsg = true,
ae685380
ACM
685 .arg_scnprintf = { [0] = SCA_HEX, /* start */
686 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
687 { .name = "mq_unlink", .errmsg = true,
688 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
689 { .name = "mremap", .hexret = true,
690 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 691 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 692 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
693 { .name = "munlock", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
695 { .name = "munmap", .errmsg = true,
696 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 697 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 698 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 699 { .name = "newfstatat", .errmsg = true,
12f3ca4f 700 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 701 { .name = "open", .errmsg = true,
12f3ca4f 702 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 703 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
704 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
705 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 706 { .name = "openat", .errmsg = true,
75b757ca
ACM
707 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
708 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 709 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 710 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
711 [3] = SCA_FD, /* group_fd */
712 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
713 { .name = "pipe2", .errmsg = true,
714 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
715 { .name = "poll", .errmsg = true, .timeout = true, },
716 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
717 { .name = "pread", .errmsg = true, .alias = "pread64", },
718 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 719 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
720 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
721 { .name = "pwritev", .errmsg = true, },
722 { .name = "read", .errmsg = true, },
12f3ca4f 723 { .name = "readlink", .errmsg = true, },
75b757ca 724 { .name = "readlinkat", .errmsg = true,
12f3ca4f 725 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 726 { .name = "readv", .errmsg = true, },
b2cc99fd 727 { .name = "recvfrom", .errmsg = true,
b6565c90 728 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 729 { .name = "recvmmsg", .errmsg = true,
b6565c90 730 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 731 { .name = "recvmsg", .errmsg = true,
b6565c90 732 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 733 { .name = "removexattr", .errmsg = true, },
75b757ca 734 { .name = "renameat", .errmsg = true,
48000a1a 735 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 736 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
737 { .name = "rt_sigaction", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 739 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
740 { .name = "rt_sigqueueinfo", .errmsg = true,
741 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
742 { .name = "rt_tgsigqueueinfo", .errmsg = true,
743 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
a3bca91f
ACM
744 { .name = "sched_setscheduler", .errmsg = true,
745 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
746 { .name = "seccomp", .errmsg = true,
747 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
748 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 749 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 750 { .name = "sendmmsg", .errmsg = true,
b6565c90 751 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 752 { .name = "sendmsg", .errmsg = true,
b6565c90 753 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 754 { .name = "sendto", .errmsg = true,
b6565c90 755 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 756 { .name = "set_tid_address", .errpid = true, },
453350dd 757 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 758 { .name = "setpgid", .errmsg = true, },
453350dd 759 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 760 { .name = "setxattr", .errmsg = true, },
b6565c90 761 { .name = "shutdown", .errmsg = true, },
e10bce81 762 { .name = "socket", .errmsg = true,
a28b24b2
ACM
763 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
764 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
765 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
766 { .name = "socketpair", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
768 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 769 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
770 { .name = "stat", .errmsg = true, .alias = "newstat", },
771 { .name = "statfs", .errmsg = true, },
34221118
ACM
772 { .name = "swapoff", .errmsg = true,
773 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
774 { .name = "swapon", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 776 { .name = "symlinkat", .errmsg = true,
48000a1a 777 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
778 { .name = "tgkill", .errmsg = true,
779 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
780 { .name = "tkill", .errmsg = true,
781 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 782 { .name = "truncate", .errmsg = true, },
e5959683 783 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 784 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "utime", .errmsg = true, },
75b757ca 787 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
788 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
789 { .name = "utimes", .errmsg = true, },
b6565c90 790 { .name = "vmsplice", .errmsg = true, },
11c8e39f 791 { .name = "wait4", .errpid = true,
7206b900 792 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 793 { .name = "waitid", .errpid = true,
7206b900 794 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
795 { .name = "write", .errmsg = true, },
796 { .name = "writev", .errmsg = true, },
514f1c67
ACM
797};
798
799static int syscall_fmt__cmp(const void *name, const void *fmtp)
800{
801 const struct syscall_fmt *fmt = fmtp;
802 return strcmp(name, fmt->name);
803}
804
805static struct syscall_fmt *syscall_fmt__find(const char *name)
806{
807 const int nmemb = ARRAY_SIZE(syscall_fmts);
808 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
809}
810
811struct syscall {
812 struct event_format *tp_format;
f208bd8d
ACM
813 int nr_args;
814 struct format_field *args;
514f1c67 815 const char *name;
5089f20e 816 bool is_exit;
514f1c67 817 struct syscall_fmt *fmt;
01533e97 818 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 819 void **arg_parm;
514f1c67
ACM
820};
821
60c907ab
ACM
822static size_t fprintf_duration(unsigned long t, FILE *fp)
823{
824 double duration = (double)t / NSEC_PER_MSEC;
825 size_t printed = fprintf(fp, "(");
826
827 if (duration >= 1.0)
828 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
829 else if (duration >= 0.01)
830 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
831 else
832 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 833 return printed + fprintf(fp, "): ");
60c907ab
ACM
834}
835
f994592d
ACM
836/**
837 * filename.ptr: The filename char pointer that will be vfs_getname'd
838 * filename.entry_str_pos: Where to insert the string translated from
839 * filename.ptr by the vfs_getname tracepoint/kprobe.
840 */
752fde44
ACM
841struct thread_trace {
842 u64 entry_time;
843 u64 exit_time;
844 bool entry_pending;
efd5745e 845 unsigned long nr_events;
a2ea67d7 846 unsigned long pfmaj, pfmin;
752fde44 847 char *entry_str;
1302d88e 848 double runtime_ms;
f994592d
ACM
849 struct {
850 unsigned long ptr;
7f4f8001
ACM
851 short int entry_str_pos;
852 bool pending_open;
853 unsigned int namelen;
854 char *name;
f994592d 855 } filename;
75b757ca
ACM
856 struct {
857 int max;
858 char **table;
859 } paths;
bf2575c1
DA
860
861 struct intlist *syscall_stats;
752fde44
ACM
862};
863
864static struct thread_trace *thread_trace__new(void)
865{
75b757ca
ACM
866 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
867
868 if (ttrace)
869 ttrace->paths.max = -1;
870
bf2575c1
DA
871 ttrace->syscall_stats = intlist__new(NULL);
872
75b757ca 873 return ttrace;
752fde44
ACM
874}
875
c24ff998 876static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 877{
efd5745e
ACM
878 struct thread_trace *ttrace;
879
752fde44
ACM
880 if (thread == NULL)
881 goto fail;
882
89dceb22
NK
883 if (thread__priv(thread) == NULL)
884 thread__set_priv(thread, thread_trace__new());
48000a1a 885
89dceb22 886 if (thread__priv(thread) == NULL)
752fde44
ACM
887 goto fail;
888
89dceb22 889 ttrace = thread__priv(thread);
efd5745e
ACM
890 ++ttrace->nr_events;
891
892 return ttrace;
752fde44 893fail:
c24ff998 894 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
895 "WARNING: not enough memory, dropping samples!\n");
896 return NULL;
897}
898
598d02c5
SF
899#define TRACE_PFMAJ (1 << 0)
900#define TRACE_PFMIN (1 << 1)
901
e4d44e83
ACM
902static const size_t trace__entry_str_size = 2048;
903
97119f37 904static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 905{
89dceb22 906 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
907
908 if (fd > ttrace->paths.max) {
909 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
910
911 if (npath == NULL)
912 return -1;
913
914 if (ttrace->paths.max != -1) {
915 memset(npath + ttrace->paths.max + 1, 0,
916 (fd - ttrace->paths.max) * sizeof(char *));
917 } else {
918 memset(npath, 0, (fd + 1) * sizeof(char *));
919 }
920
921 ttrace->paths.table = npath;
922 ttrace->paths.max = fd;
923 }
924
925 ttrace->paths.table[fd] = strdup(pathname);
926
927 return ttrace->paths.table[fd] != NULL ? 0 : -1;
928}
929
97119f37
ACM
930static int thread__read_fd_path(struct thread *thread, int fd)
931{
932 char linkname[PATH_MAX], pathname[PATH_MAX];
933 struct stat st;
934 int ret;
935
936 if (thread->pid_ == thread->tid) {
937 scnprintf(linkname, sizeof(linkname),
938 "/proc/%d/fd/%d", thread->pid_, fd);
939 } else {
940 scnprintf(linkname, sizeof(linkname),
941 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
942 }
943
944 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
945 return -1;
946
947 ret = readlink(linkname, pathname, sizeof(pathname));
948
949 if (ret < 0 || ret > st.st_size)
950 return -1;
951
952 pathname[ret] = '\0';
953 return trace__set_fd_pathname(thread, fd, pathname);
954}
955
c522739d
ACM
956static const char *thread__fd_path(struct thread *thread, int fd,
957 struct trace *trace)
75b757ca 958{
89dceb22 959 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
960
961 if (ttrace == NULL)
962 return NULL;
963
964 if (fd < 0)
965 return NULL;
966
cdcd1e6b 967 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
968 if (!trace->live)
969 return NULL;
970 ++trace->stats.proc_getname;
cdcd1e6b 971 if (thread__read_fd_path(thread, fd))
c522739d
ACM
972 return NULL;
973 }
75b757ca
ACM
974
975 return ttrace->paths.table[fd];
976}
977
978static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
979 struct syscall_arg *arg)
980{
981 int fd = arg->val;
982 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 983 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
984
985 if (path)
986 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
987
988 return printed;
989}
990
991static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
992 struct syscall_arg *arg)
993{
994 int fd = arg->val;
995 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 996 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 997
04662523
ACM
998 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
999 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1000
1001 return printed;
1002}
1003
f994592d
ACM
1004static void thread__set_filename_pos(struct thread *thread, const char *bf,
1005 unsigned long ptr)
1006{
1007 struct thread_trace *ttrace = thread__priv(thread);
1008
1009 ttrace->filename.ptr = ptr;
1010 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1011}
1012
1013static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1014 struct syscall_arg *arg)
1015{
1016 unsigned long ptr = arg->val;
1017
1018 if (!arg->trace->vfs_getname)
1019 return scnprintf(bf, size, "%#x", ptr);
1020
1021 thread__set_filename_pos(arg->thread, bf, ptr);
1022 return 0;
1023}
1024
ae9ed035
ACM
1025static bool trace__filter_duration(struct trace *trace, double t)
1026{
1027 return t < (trace->duration_filter * NSEC_PER_MSEC);
1028}
1029
752fde44
ACM
1030static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1031{
1032 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1033
60c907ab 1034 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1035}
1036
f15eb531 1037static bool done = false;
ba209f85 1038static bool interrupted = false;
f15eb531 1039
ba209f85 1040static void sig_handler(int sig)
f15eb531
NK
1041{
1042 done = true;
ba209f85 1043 interrupted = sig == SIGINT;
f15eb531
NK
1044}
1045
752fde44 1046static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
60c907ab 1047 u64 duration, u64 tstamp, FILE *fp)
752fde44
ACM
1048{
1049 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
60c907ab 1050 printed += fprintf_duration(duration, fp);
752fde44 1051
50c95cbd
ACM
1052 if (trace->multiple_threads) {
1053 if (trace->show_comm)
1902efe7 1054 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1055 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1056 }
752fde44
ACM
1057
1058 return printed;
1059}
1060
c24ff998 1061static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1062 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1063{
1064 int ret = 0;
1065
1066 switch (event->header.type) {
1067 case PERF_RECORD_LOST:
c24ff998 1068 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1069 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1070 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1071 break;
752fde44 1072 default:
162f0bef 1073 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1074 break;
1075 }
1076
1077 return ret;
1078}
1079
c24ff998 1080static int trace__tool_process(struct perf_tool *tool,
752fde44 1081 union perf_event *event,
162f0bef 1082 struct perf_sample *sample,
752fde44
ACM
1083 struct machine *machine)
1084{
c24ff998 1085 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1086 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1087}
1088
caf8a0d0
ACM
1089static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1090{
1091 struct machine *machine = vmachine;
1092
1093 if (machine->kptr_restrict_warned)
1094 return NULL;
1095
1096 if (symbol_conf.kptr_restrict) {
1097 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1098 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1099 "Kernel samples will not be resolved.\n");
1100 machine->kptr_restrict_warned = true;
1101 return NULL;
1102 }
1103
1104 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1105}
1106
752fde44
ACM
1107static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1108{
0a7e6d1b 1109 int err = symbol__init(NULL);
752fde44
ACM
1110
1111 if (err)
1112 return err;
1113
8fb598e5
DA
1114 trace->host = machine__new_host();
1115 if (trace->host == NULL)
1116 return -ENOMEM;
752fde44 1117
caf8a0d0 1118 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1119 return -errno;
1120
a33fbd56 1121 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1122 evlist->threads, trace__tool_process, false,
1123 trace->opts.proc_map_timeout);
752fde44
ACM
1124 if (err)
1125 symbol__exit();
1126
1127 return err;
1128}
1129
13d4ff3e
ACM
1130static int syscall__set_arg_fmts(struct syscall *sc)
1131{
1132 struct format_field *field;
b6565c90 1133 int idx = 0, len;
13d4ff3e 1134
f208bd8d 1135 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1136 if (sc->arg_scnprintf == NULL)
1137 return -1;
1138
1f115cb7
ACM
1139 if (sc->fmt)
1140 sc->arg_parm = sc->fmt->arg_parm;
1141
f208bd8d 1142 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1143 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1144 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1145 else if (strcmp(field->type, "const char *") == 0 &&
1146 (strcmp(field->name, "filename") == 0 ||
1147 strcmp(field->name, "path") == 0 ||
1148 strcmp(field->name, "pathname") == 0))
1149 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1150 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1151 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1152 else if (strcmp(field->type, "pid_t") == 0)
1153 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1154 else if (strcmp(field->type, "umode_t") == 0)
1155 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1156 else if ((strcmp(field->type, "int") == 0 ||
1157 strcmp(field->type, "unsigned int") == 0 ||
1158 strcmp(field->type, "long") == 0) &&
1159 (len = strlen(field->name)) >= 2 &&
1160 strcmp(field->name + len - 2, "fd") == 0) {
1161 /*
1162 * /sys/kernel/tracing/events/syscalls/sys_enter*
1163 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1164 * 65 int
1165 * 23 unsigned int
1166 * 7 unsigned long
1167 */
1168 sc->arg_scnprintf[idx] = SCA_FD;
1169 }
13d4ff3e
ACM
1170 ++idx;
1171 }
1172
1173 return 0;
1174}
1175
514f1c67
ACM
1176static int trace__read_syscall_info(struct trace *trace, int id)
1177{
1178 char tp_name[128];
1179 struct syscall *sc;
fd0db102 1180 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1181
1182 if (name == NULL)
1183 return -1;
514f1c67
ACM
1184
1185 if (id > trace->syscalls.max) {
1186 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1187
1188 if (nsyscalls == NULL)
1189 return -1;
1190
1191 if (trace->syscalls.max != -1) {
1192 memset(nsyscalls + trace->syscalls.max + 1, 0,
1193 (id - trace->syscalls.max) * sizeof(*sc));
1194 } else {
1195 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1196 }
1197
1198 trace->syscalls.table = nsyscalls;
1199 trace->syscalls.max = id;
1200 }
1201
1202 sc = trace->syscalls.table + id;
3a531260 1203 sc->name = name;
2ae3a312 1204
3a531260 1205 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1206
aec1930b 1207 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1208 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1209
8dd2a131 1210 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1211 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1212 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1213 }
514f1c67 1214
8dd2a131 1215 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1216 return -1;
1217
f208bd8d
ACM
1218 sc->args = sc->tp_format->format.fields;
1219 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1220 /*
1221 * We need to check and discard the first variable '__syscall_nr'
1222 * or 'nr' that mean the syscall number. It is needless here.
1223 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1224 */
1225 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1226 sc->args = sc->args->next;
1227 --sc->nr_args;
1228 }
1229
5089f20e
ACM
1230 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1231
13d4ff3e 1232 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1233}
1234
d0cc439b
ACM
1235static int trace__validate_ev_qualifier(struct trace *trace)
1236{
8b3ce757 1237 int err = 0, i;
d0cc439b
ACM
1238 struct str_node *pos;
1239
8b3ce757
ACM
1240 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1241 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1242 sizeof(trace->ev_qualifier_ids.entries[0]));
1243
1244 if (trace->ev_qualifier_ids.entries == NULL) {
1245 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1246 trace->output);
1247 err = -EINVAL;
1248 goto out;
1249 }
1250
1251 i = 0;
1252
602a1f4d 1253 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1254 const char *sc = pos->s;
fd0db102 1255 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1256
8b3ce757 1257 if (id < 0) {
d0cc439b
ACM
1258 if (err == 0) {
1259 fputs("Error:\tInvalid syscall ", trace->output);
1260 err = -EINVAL;
1261 } else {
1262 fputs(", ", trace->output);
1263 }
1264
1265 fputs(sc, trace->output);
1266 }
8b3ce757
ACM
1267
1268 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1269 }
1270
1271 if (err < 0) {
1272 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1273 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1274 zfree(&trace->ev_qualifier_ids.entries);
1275 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1276 }
8b3ce757 1277out:
d0cc439b
ACM
1278 return err;
1279}
1280
55d43bca
DA
1281/*
1282 * args is to be interpreted as a series of longs but we need to handle
1283 * 8-byte unaligned accesses. args points to raw_data within the event
1284 * and raw_data is guaranteed to be 8-byte unaligned because it is
1285 * preceded by raw_size which is a u32. So we need to copy args to a temp
1286 * variable to read it. Most notably this avoids extended load instructions
1287 * on unaligned addresses
1288 */
1289
752fde44 1290static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1291 unsigned char *args, struct trace *trace,
75b757ca 1292 struct thread *thread)
514f1c67 1293{
514f1c67 1294 size_t printed = 0;
55d43bca
DA
1295 unsigned char *p;
1296 unsigned long val;
514f1c67 1297
f208bd8d 1298 if (sc->args != NULL) {
514f1c67 1299 struct format_field *field;
01533e97
ACM
1300 u8 bit = 1;
1301 struct syscall_arg arg = {
75b757ca
ACM
1302 .idx = 0,
1303 .mask = 0,
1304 .trace = trace,
1305 .thread = thread,
01533e97 1306 };
6e7eeb51 1307
f208bd8d 1308 for (field = sc->args; field;
01533e97
ACM
1309 field = field->next, ++arg.idx, bit <<= 1) {
1310 if (arg.mask & bit)
6e7eeb51 1311 continue;
55d43bca
DA
1312
1313 /* special care for unaligned accesses */
1314 p = args + sizeof(unsigned long) * arg.idx;
1315 memcpy(&val, p, sizeof(val));
1316
4aa58232
ACM
1317 /*
1318 * Suppress this argument if its value is zero and
1319 * and we don't have a string associated in an
1320 * strarray for it.
1321 */
55d43bca 1322 if (val == 0 &&
4aa58232
ACM
1323 !(sc->arg_scnprintf &&
1324 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1325 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1326 continue;
1327
752fde44 1328 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1329 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1330 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1331 arg.val = val;
1f115cb7
ACM
1332 if (sc->arg_parm)
1333 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1334 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1335 size - printed, &arg);
6e7eeb51 1336 } else {
13d4ff3e 1337 printed += scnprintf(bf + printed, size - printed,
55d43bca 1338 "%ld", val);
6e7eeb51 1339 }
514f1c67 1340 }
4c4d6e51
ACM
1341 } else if (IS_ERR(sc->tp_format)) {
1342 /*
1343 * If we managed to read the tracepoint /format file, then we
1344 * may end up not having any args, like with gettid(), so only
1345 * print the raw args when we didn't manage to read it.
1346 */
01533e97
ACM
1347 int i = 0;
1348
514f1c67 1349 while (i < 6) {
55d43bca
DA
1350 /* special care for unaligned accesses */
1351 p = args + sizeof(unsigned long) * i;
1352 memcpy(&val, p, sizeof(val));
752fde44
ACM
1353 printed += scnprintf(bf + printed, size - printed,
1354 "%sarg%d: %ld",
55d43bca 1355 printed ? ", " : "", i, val);
514f1c67
ACM
1356 ++i;
1357 }
1358 }
1359
1360 return printed;
1361}
1362
ba3d7dee 1363typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1364 union perf_event *event,
ba3d7dee
ACM
1365 struct perf_sample *sample);
1366
1367static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1368 struct perf_evsel *evsel, int id)
ba3d7dee 1369{
ba3d7dee
ACM
1370
1371 if (id < 0) {
adaa18bf
ACM
1372
1373 /*
1374 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1375 * before that, leaving at a higher verbosity level till that is
1376 * explained. Reproduced with plain ftrace with:
1377 *
1378 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1379 * grep "NR -1 " /t/trace_pipe
1380 *
1381 * After generating some load on the machine.
1382 */
1383 if (verbose > 1) {
1384 static u64 n;
1385 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1386 id, perf_evsel__name(evsel), ++n);
1387 }
ba3d7dee
ACM
1388 return NULL;
1389 }
1390
1391 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1392 trace__read_syscall_info(trace, id))
1393 goto out_cant_read;
1394
1395 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1396 goto out_cant_read;
1397
1398 return &trace->syscalls.table[id];
1399
1400out_cant_read:
7c304ee0
ACM
1401 if (verbose) {
1402 fprintf(trace->output, "Problems reading syscall %d", id);
1403 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1404 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1405 fputs(" information\n", trace->output);
1406 }
ba3d7dee
ACM
1407 return NULL;
1408}
1409
bf2575c1
DA
1410static void thread__update_stats(struct thread_trace *ttrace,
1411 int id, struct perf_sample *sample)
1412{
1413 struct int_node *inode;
1414 struct stats *stats;
1415 u64 duration = 0;
1416
1417 inode = intlist__findnew(ttrace->syscall_stats, id);
1418 if (inode == NULL)
1419 return;
1420
1421 stats = inode->priv;
1422 if (stats == NULL) {
1423 stats = malloc(sizeof(struct stats));
1424 if (stats == NULL)
1425 return;
1426 init_stats(stats);
1427 inode->priv = stats;
1428 }
1429
1430 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1431 duration = sample->time - ttrace->entry_time;
1432
1433 update_stats(stats, duration);
1434}
1435
e596663e
ACM
1436static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1437{
1438 struct thread_trace *ttrace;
1439 u64 duration;
1440 size_t printed;
1441
1442 if (trace->current == NULL)
1443 return 0;
1444
1445 ttrace = thread__priv(trace->current);
1446
1447 if (!ttrace->entry_pending)
1448 return 0;
1449
1450 duration = sample->time - ttrace->entry_time;
1451
1452 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1453 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1454 ttrace->entry_pending = false;
1455
1456 return printed;
1457}
1458
ba3d7dee 1459static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1460 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1461 struct perf_sample *sample)
1462{
752fde44 1463 char *msg;
ba3d7dee 1464 void *args;
752fde44 1465 size_t printed = 0;
2ae3a312 1466 struct thread *thread;
b91fc39f 1467 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1468 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1469 struct thread_trace *ttrace;
1470
1471 if (sc == NULL)
1472 return -1;
ba3d7dee 1473
8fb598e5 1474 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1475 ttrace = thread__trace(thread, trace->output);
2ae3a312 1476 if (ttrace == NULL)
b91fc39f 1477 goto out_put;
ba3d7dee 1478
77170988 1479 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1480
1481 if (ttrace->entry_str == NULL) {
e4d44e83 1482 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1483 if (!ttrace->entry_str)
b91fc39f 1484 goto out_put;
752fde44
ACM
1485 }
1486
5cf9c84e 1487 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1488 trace__printf_interrupted_entry(trace, sample);
e596663e 1489
752fde44
ACM
1490 ttrace->entry_time = sample->time;
1491 msg = ttrace->entry_str;
e4d44e83 1492 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1493
e4d44e83 1494 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1495 args, trace, thread);
752fde44 1496
5089f20e 1497 if (sc->is_exit) {
5cf9c84e 1498 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
c24ff998 1499 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
c008f78f 1500 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1501 }
7f4f8001 1502 } else {
752fde44 1503 ttrace->entry_pending = true;
7f4f8001
ACM
1504 /* See trace__vfs_getname & trace__sys_exit */
1505 ttrace->filename.pending_open = false;
1506 }
ba3d7dee 1507
f3b623b8
ACM
1508 if (trace->current != thread) {
1509 thread__put(trace->current);
1510 trace->current = thread__get(thread);
1511 }
b91fc39f
ACM
1512 err = 0;
1513out_put:
1514 thread__put(thread);
1515 return err;
ba3d7dee
ACM
1516}
1517
5cf9c84e
ACM
1518static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1519 struct perf_sample *sample,
1520 struct callchain_cursor *cursor)
202ff968
ACM
1521{
1522 struct addr_location al;
5cf9c84e
ACM
1523
1524 if (machine__resolve(trace->host, &al, sample) < 0 ||
1525 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1526 return -1;
1527
1528 return 0;
1529}
1530
1531static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1532{
202ff968 1533 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1534 const unsigned int print_opts = EVSEL__PRINT_SYM |
1535 EVSEL__PRINT_DSO |
1536 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1537
d327e60c 1538 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1539}
1540
ba3d7dee 1541static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1542 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1543 struct perf_sample *sample)
1544{
2c82c3ad 1545 long ret;
60c907ab 1546 u64 duration = 0;
2ae3a312 1547 struct thread *thread;
5cf9c84e 1548 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1549 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1550 struct thread_trace *ttrace;
1551
1552 if (sc == NULL)
1553 return -1;
ba3d7dee 1554
8fb598e5 1555 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1556 ttrace = thread__trace(thread, trace->output);
2ae3a312 1557 if (ttrace == NULL)
b91fc39f 1558 goto out_put;
ba3d7dee 1559
bf2575c1
DA
1560 if (trace->summary)
1561 thread__update_stats(ttrace, id, sample);
1562
77170988 1563 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1564
fd0db102 1565 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1566 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1567 ttrace->filename.pending_open = false;
c522739d
ACM
1568 ++trace->stats.vfs_getname;
1569 }
1570
752fde44
ACM
1571 ttrace->exit_time = sample->time;
1572
ae9ed035 1573 if (ttrace->entry_time) {
60c907ab 1574 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1575 if (trace__filter_duration(trace, duration))
1576 goto out;
1577 } else if (trace->duration_filter)
1578 goto out;
60c907ab 1579
5cf9c84e
ACM
1580 if (sample->callchain) {
1581 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1582 if (callchain_ret == 0) {
1583 if (callchain_cursor.nr < trace->min_stack)
1584 goto out;
1585 callchain_ret = 1;
1586 }
1587 }
1588
fd2eabaf
DA
1589 if (trace->summary_only)
1590 goto out;
1591
c24ff998 1592 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
752fde44
ACM
1593
1594 if (ttrace->entry_pending) {
c24ff998 1595 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1596 } else {
c24ff998
ACM
1597 fprintf(trace->output, " ... [");
1598 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1599 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1600 }
1601
da3c9a44
ACM
1602 if (sc->fmt == NULL) {
1603signed_print:
2c82c3ad 1604 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1605 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1606 char bf[STRERR_BUFSIZE];
c8b5f2c9 1607 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1608 *e = audit_errno_to_name(-ret);
1609
c24ff998 1610 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1611 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1612 fprintf(trace->output, ") = 0 Timeout");
04b34729 1613 else if (sc->fmt->hexret)
2c82c3ad 1614 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1615 else if (sc->fmt->errpid) {
1616 struct thread *child = machine__find_thread(trace->host, ret, ret);
1617
1618 if (child != NULL) {
1619 fprintf(trace->output, ") = %ld", ret);
1620 if (child->comm_set)
1621 fprintf(trace->output, " (%s)", thread__comm_str(child));
1622 thread__put(child);
1623 }
1624 } else
da3c9a44 1625 goto signed_print;
ba3d7dee 1626
c24ff998 1627 fputc('\n', trace->output);
566a0885 1628
5cf9c84e
ACM
1629 if (callchain_ret > 0)
1630 trace__fprintf_callchain(trace, sample);
1631 else if (callchain_ret < 0)
1632 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1633out:
752fde44 1634 ttrace->entry_pending = false;
b91fc39f
ACM
1635 err = 0;
1636out_put:
1637 thread__put(thread);
1638 return err;
ba3d7dee
ACM
1639}
1640
c522739d 1641static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1642 union perf_event *event __maybe_unused,
c522739d
ACM
1643 struct perf_sample *sample)
1644{
f994592d
ACM
1645 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1646 struct thread_trace *ttrace;
1647 size_t filename_len, entry_str_len, to_move;
1648 ssize_t remaining_space;
1649 char *pos;
7f4f8001 1650 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1651
1652 if (!thread)
1653 goto out;
1654
1655 ttrace = thread__priv(thread);
1656 if (!ttrace)
1657 goto out;
1658
7f4f8001
ACM
1659 filename_len = strlen(filename);
1660
1661 if (ttrace->filename.namelen < filename_len) {
1662 char *f = realloc(ttrace->filename.name, filename_len + 1);
1663
1664 if (f == NULL)
1665 goto out;
1666
1667 ttrace->filename.namelen = filename_len;
1668 ttrace->filename.name = f;
1669 }
1670
1671 strcpy(ttrace->filename.name, filename);
1672 ttrace->filename.pending_open = true;
1673
f994592d
ACM
1674 if (!ttrace->filename.ptr)
1675 goto out;
1676
1677 entry_str_len = strlen(ttrace->entry_str);
1678 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1679 if (remaining_space <= 0)
1680 goto out;
1681
f994592d
ACM
1682 if (filename_len > (size_t)remaining_space) {
1683 filename += filename_len - remaining_space;
1684 filename_len = remaining_space;
1685 }
1686
1687 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1688 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1689 memmove(pos + filename_len, pos, to_move);
1690 memcpy(pos, filename, filename_len);
1691
1692 ttrace->filename.ptr = 0;
1693 ttrace->filename.entry_str_pos = 0;
1694out:
c522739d
ACM
1695 return 0;
1696}
1697
1302d88e 1698static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1699 union perf_event *event __maybe_unused,
1302d88e
ACM
1700 struct perf_sample *sample)
1701{
1702 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1703 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1704 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1705 sample->pid,
1706 sample->tid);
c24ff998 1707 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1708
1709 if (ttrace == NULL)
1710 goto out_dump;
1711
1712 ttrace->runtime_ms += runtime_ms;
1713 trace->runtime_ms += runtime_ms;
b91fc39f 1714 thread__put(thread);
1302d88e
ACM
1715 return 0;
1716
1717out_dump:
c24ff998 1718 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1719 evsel->name,
1720 perf_evsel__strval(evsel, sample, "comm"),
1721 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1722 runtime,
1723 perf_evsel__intval(evsel, sample, "vruntime"));
b91fc39f 1724 thread__put(thread);
1302d88e
ACM
1725 return 0;
1726}
1727
1d6c9407
WN
1728static void bpf_output__printer(enum binary_printer_ops op,
1729 unsigned int val, void *extra)
1730{
1731 FILE *output = extra;
1732 unsigned char ch = (unsigned char)val;
1733
1734 switch (op) {
1735 case BINARY_PRINT_CHAR_DATA:
1736 fprintf(output, "%c", isprint(ch) ? ch : '.');
1737 break;
1738 case BINARY_PRINT_DATA_BEGIN:
1739 case BINARY_PRINT_LINE_BEGIN:
1740 case BINARY_PRINT_ADDR:
1741 case BINARY_PRINT_NUM_DATA:
1742 case BINARY_PRINT_NUM_PAD:
1743 case BINARY_PRINT_SEP:
1744 case BINARY_PRINT_CHAR_PAD:
1745 case BINARY_PRINT_LINE_END:
1746 case BINARY_PRINT_DATA_END:
1747 default:
1748 break;
1749 }
1750}
1751
1752static void bpf_output__fprintf(struct trace *trace,
1753 struct perf_sample *sample)
1754{
1755 print_binary(sample->raw_data, sample->raw_size, 8,
1756 bpf_output__printer, trace->output);
1757}
1758
14a052df
ACM
1759static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1760 union perf_event *event __maybe_unused,
1761 struct perf_sample *sample)
1762{
7ad35615
ACM
1763 int callchain_ret = 0;
1764
1765 if (sample->callchain) {
1766 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1767 if (callchain_ret == 0) {
1768 if (callchain_cursor.nr < trace->min_stack)
1769 goto out;
1770 callchain_ret = 1;
1771 }
1772 }
1773
14a052df
ACM
1774 trace__printf_interrupted_entry(trace, sample);
1775 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1776
1777 if (trace->trace_syscalls)
1778 fprintf(trace->output, "( ): ");
1779
1780 fprintf(trace->output, "%s:", evsel->name);
14a052df 1781
1d6c9407
WN
1782 if (perf_evsel__is_bpf_output(evsel)) {
1783 bpf_output__fprintf(trace, sample);
1784 } else if (evsel->tp_format) {
14a052df
ACM
1785 event_format__fprintf(evsel->tp_format, sample->cpu,
1786 sample->raw_data, sample->raw_size,
1787 trace->output);
1788 }
1789
1790 fprintf(trace->output, ")\n");
202ff968 1791
7ad35615
ACM
1792 if (callchain_ret > 0)
1793 trace__fprintf_callchain(trace, sample);
1794 else if (callchain_ret < 0)
1795 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1796out:
14a052df
ACM
1797 return 0;
1798}
1799
598d02c5
SF
1800static void print_location(FILE *f, struct perf_sample *sample,
1801 struct addr_location *al,
1802 bool print_dso, bool print_sym)
1803{
1804
1805 if ((verbose || print_dso) && al->map)
1806 fprintf(f, "%s@", al->map->dso->long_name);
1807
1808 if ((verbose || print_sym) && al->sym)
4414a3c5 1809 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1810 al->addr - al->sym->start);
1811 else if (al->map)
4414a3c5 1812 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1813 else
4414a3c5 1814 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1815}
1816
1817static int trace__pgfault(struct trace *trace,
1818 struct perf_evsel *evsel,
473398a2 1819 union perf_event *event __maybe_unused,
598d02c5
SF
1820 struct perf_sample *sample)
1821{
1822 struct thread *thread;
598d02c5
SF
1823 struct addr_location al;
1824 char map_type = 'd';
a2ea67d7 1825 struct thread_trace *ttrace;
b91fc39f 1826 int err = -1;
1df54290 1827 int callchain_ret = 0;
598d02c5
SF
1828
1829 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1830
1831 if (sample->callchain) {
1832 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1833 if (callchain_ret == 0) {
1834 if (callchain_cursor.nr < trace->min_stack)
1835 goto out_put;
1836 callchain_ret = 1;
1837 }
1838 }
1839
a2ea67d7
SF
1840 ttrace = thread__trace(thread, trace->output);
1841 if (ttrace == NULL)
b91fc39f 1842 goto out_put;
a2ea67d7
SF
1843
1844 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1845 ttrace->pfmaj++;
1846 else
1847 ttrace->pfmin++;
1848
1849 if (trace->summary_only)
b91fc39f 1850 goto out;
598d02c5 1851
473398a2 1852 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1853 sample->ip, &al);
1854
1855 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1856
1857 fprintf(trace->output, "%sfault [",
1858 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1859 "maj" : "min");
1860
1861 print_location(trace->output, sample, &al, false, true);
1862
1863 fprintf(trace->output, "] => ");
1864
473398a2 1865 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1866 sample->addr, &al);
1867
1868 if (!al.map) {
473398a2 1869 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1870 MAP__FUNCTION, sample->addr, &al);
1871
1872 if (al.map)
1873 map_type = 'x';
1874 else
1875 map_type = '?';
1876 }
1877
1878 print_location(trace->output, sample, &al, true, false);
1879
1880 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1881
1df54290
ACM
1882 if (callchain_ret > 0)
1883 trace__fprintf_callchain(trace, sample);
1884 else if (callchain_ret < 0)
1885 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1886out:
1887 err = 0;
1888out_put:
1889 thread__put(thread);
1890 return err;
598d02c5
SF
1891}
1892
bdc89661
DA
1893static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1894{
1895 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1896 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1897 return false;
1898
1899 if (trace->pid_list || trace->tid_list)
1900 return true;
1901
1902 return false;
1903}
1904
e6001980 1905static void trace__set_base_time(struct trace *trace,
8a07a809 1906 struct perf_evsel *evsel,
e6001980
ACM
1907 struct perf_sample *sample)
1908{
8a07a809
ACM
1909 /*
1910 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1911 * and don't use sample->time unconditionally, we may end up having
1912 * some other event in the future without PERF_SAMPLE_TIME for good
1913 * reason, i.e. we may not be interested in its timestamps, just in
1914 * it taking place, picking some piece of information when it
1915 * appears in our event stream (vfs_getname comes to mind).
1916 */
1917 if (trace->base_time == 0 && !trace->full_time &&
1918 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1919 trace->base_time = sample->time;
1920}
1921
6810fc91 1922static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1923 union perf_event *event,
6810fc91
DA
1924 struct perf_sample *sample,
1925 struct perf_evsel *evsel,
1926 struct machine *machine __maybe_unused)
1927{
1928 struct trace *trace = container_of(tool, struct trace, tool);
1929 int err = 0;
1930
744a9719 1931 tracepoint_handler handler = evsel->handler;
6810fc91 1932
bdc89661
DA
1933 if (skip_sample(trace, sample))
1934 return 0;
1935
e6001980 1936 trace__set_base_time(trace, evsel, sample);
6810fc91 1937
3160565f
DA
1938 if (handler) {
1939 ++trace->nr_events;
0c82adcf 1940 handler(trace, evsel, event, sample);
3160565f 1941 }
6810fc91
DA
1942
1943 return err;
1944}
1945
bdc89661
DA
1946static int parse_target_str(struct trace *trace)
1947{
1948 if (trace->opts.target.pid) {
1949 trace->pid_list = intlist__new(trace->opts.target.pid);
1950 if (trace->pid_list == NULL) {
1951 pr_err("Error parsing process id string\n");
1952 return -EINVAL;
1953 }
1954 }
1955
1956 if (trace->opts.target.tid) {
1957 trace->tid_list = intlist__new(trace->opts.target.tid);
1958 if (trace->tid_list == NULL) {
1959 pr_err("Error parsing thread id string\n");
1960 return -EINVAL;
1961 }
1962 }
1963
1964 return 0;
1965}
1966
1e28fe0a 1967static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1968{
1969 unsigned int rec_argc, i, j;
1970 const char **rec_argv;
1971 const char * const record_args[] = {
1972 "record",
1973 "-R",
1974 "-m", "1024",
1975 "-c", "1",
5e2485b1
DA
1976 };
1977
1e28fe0a
SF
1978 const char * const sc_args[] = { "-e", };
1979 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1980 const char * const majpf_args[] = { "-e", "major-faults" };
1981 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1982 const char * const minpf_args[] = { "-e", "minor-faults" };
1983 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1984
9aca7f17 1985 /* +1 is for the event string below */
1e28fe0a
SF
1986 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1987 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1988 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1989
1990 if (rec_argv == NULL)
1991 return -ENOMEM;
1992
1e28fe0a 1993 j = 0;
5e2485b1 1994 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1995 rec_argv[j++] = record_args[i];
1996
e281a960
SF
1997 if (trace->trace_syscalls) {
1998 for (i = 0; i < sc_args_nr; i++)
1999 rec_argv[j++] = sc_args[i];
2000
2001 /* event string may be different for older kernels - e.g., RHEL6 */
2002 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2003 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2004 else if (is_valid_tracepoint("syscalls:sys_enter"))
2005 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2006 else {
2007 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2008 return -1;
2009 }
9aca7f17 2010 }
9aca7f17 2011
1e28fe0a
SF
2012 if (trace->trace_pgfaults & TRACE_PFMAJ)
2013 for (i = 0; i < majpf_args_nr; i++)
2014 rec_argv[j++] = majpf_args[i];
2015
2016 if (trace->trace_pgfaults & TRACE_PFMIN)
2017 for (i = 0; i < minpf_args_nr; i++)
2018 rec_argv[j++] = minpf_args[i];
2019
2020 for (i = 0; i < (unsigned int)argc; i++)
2021 rec_argv[j++] = argv[i];
5e2485b1 2022
1e28fe0a 2023 return cmd_record(j, rec_argv, NULL);
5e2485b1
DA
2024}
2025
bf2575c1
DA
2026static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2027
08c98776 2028static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2029{
ef503831 2030 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2031
2032 if (IS_ERR(evsel))
08c98776 2033 return false;
c522739d
ACM
2034
2035 if (perf_evsel__field(evsel, "pathname") == NULL) {
2036 perf_evsel__delete(evsel);
08c98776 2037 return false;
c522739d
ACM
2038 }
2039
744a9719 2040 evsel->handler = trace__vfs_getname;
c522739d 2041 perf_evlist__add(evlist, evsel);
08c98776 2042 return true;
c522739d
ACM
2043}
2044
0ae537cb 2045static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2046{
2047 struct perf_evsel *evsel;
2048 struct perf_event_attr attr = {
2049 .type = PERF_TYPE_SOFTWARE,
2050 .mmap_data = 1,
598d02c5
SF
2051 };
2052
2053 attr.config = config;
0524798c 2054 attr.sample_period = 1;
598d02c5
SF
2055
2056 event_attr_init(&attr);
2057
2058 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2059 if (evsel)
2060 evsel->handler = trace__pgfault;
598d02c5 2061
0ae537cb 2062 return evsel;
598d02c5
SF
2063}
2064
ddbb1b13
ACM
2065static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2066{
2067 const u32 type = event->header.type;
2068 struct perf_evsel *evsel;
2069
ddbb1b13
ACM
2070 if (type != PERF_RECORD_SAMPLE) {
2071 trace__process_event(trace, trace->host, event, sample);
2072 return;
2073 }
2074
2075 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2076 if (evsel == NULL) {
2077 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2078 return;
2079 }
2080
e6001980
ACM
2081 trace__set_base_time(trace, evsel, sample);
2082
ddbb1b13
ACM
2083 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2084 sample->raw_data == NULL) {
2085 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2086 perf_evsel__name(evsel), sample->tid,
2087 sample->cpu, sample->raw_size);
2088 } else {
2089 tracepoint_handler handler = evsel->handler;
2090 handler(trace, evsel, event, sample);
2091 }
2092}
2093
c27366f0
ACM
2094static int trace__add_syscall_newtp(struct trace *trace)
2095{
2096 int ret = -1;
2097 struct perf_evlist *evlist = trace->evlist;
2098 struct perf_evsel *sys_enter, *sys_exit;
2099
2100 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2101 if (sys_enter == NULL)
2102 goto out;
2103
2104 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2105 goto out_delete_sys_enter;
2106
2107 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2108 if (sys_exit == NULL)
2109 goto out_delete_sys_enter;
2110
2111 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2112 goto out_delete_sys_exit;
2113
2114 perf_evlist__add(evlist, sys_enter);
2115 perf_evlist__add(evlist, sys_exit);
2116
2ddd5c04 2117 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2118 /*
2119 * We're interested only in the user space callchain
2120 * leading to the syscall, allow overriding that for
2121 * debugging reasons using --kernel_syscall_callchains
2122 */
2123 sys_exit->attr.exclude_callchain_kernel = 1;
2124 }
2125
8b3ce757
ACM
2126 trace->syscalls.events.sys_enter = sys_enter;
2127 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2128
2129 ret = 0;
2130out:
2131 return ret;
2132
2133out_delete_sys_exit:
2134 perf_evsel__delete_priv(sys_exit);
2135out_delete_sys_enter:
2136 perf_evsel__delete_priv(sys_enter);
2137 goto out;
2138}
2139
19867b61
ACM
2140static int trace__set_ev_qualifier_filter(struct trace *trace)
2141{
2142 int err = -1;
2143 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2144 trace->ev_qualifier_ids.nr,
2145 trace->ev_qualifier_ids.entries);
2146
2147 if (filter == NULL)
2148 goto out_enomem;
2149
2150 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2151 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2152
2153 free(filter);
2154out:
2155 return err;
2156out_enomem:
2157 errno = ENOMEM;
2158 goto out;
2159}
c27366f0 2160
f15eb531 2161static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2162{
14a052df 2163 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2164 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2165 int err = -1, i;
2166 unsigned long before;
f15eb531 2167 const bool forks = argc > 0;
46fb3c21 2168 bool draining = false;
514f1c67 2169
75b757ca
ACM
2170 trace->live = true;
2171
c27366f0 2172 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2173 goto out_error_raw_syscalls;
514f1c67 2174
e281a960 2175 if (trace->trace_syscalls)
08c98776 2176 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2177
0ae537cb
ACM
2178 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2179 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2180 if (pgfault_maj == NULL)
2181 goto out_error_mem;
2182 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2183 }
598d02c5 2184
0ae537cb
ACM
2185 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2186 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2187 if (pgfault_min == NULL)
2188 goto out_error_mem;
2189 perf_evlist__add(evlist, pgfault_min);
2190 }
598d02c5 2191
1302d88e 2192 if (trace->sched &&
2cc990ba
ACM
2193 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2194 trace__sched_stat_runtime))
2195 goto out_error_sched_stat_runtime;
1302d88e 2196
514f1c67
ACM
2197 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2198 if (err < 0) {
c24ff998 2199 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2200 goto out_delete_evlist;
2201 }
2202
752fde44
ACM
2203 err = trace__symbols_init(trace, evlist);
2204 if (err < 0) {
c24ff998 2205 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2206 goto out_delete_evlist;
752fde44
ACM
2207 }
2208
fde54b78
ACM
2209 perf_evlist__config(evlist, &trace->opts, NULL);
2210
0c3a6ef4
ACM
2211 if (callchain_param.enabled) {
2212 bool use_identifier = false;
2213
2214 if (trace->syscalls.events.sys_exit) {
2215 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2216 &trace->opts, &callchain_param);
2217 use_identifier = true;
2218 }
2219
2220 if (pgfault_maj) {
2221 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2222 use_identifier = true;
2223 }
2224
2225 if (pgfault_min) {
2226 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2227 use_identifier = true;
2228 }
2229
2230 if (use_identifier) {
2231 /*
2232 * Now we have evsels with different sample_ids, use
2233 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2234 * from a fixed position in each ring buffer record.
2235 *
2236 * As of this the changeset introducing this comment, this
2237 * isn't strictly needed, as the fields that can come before
2238 * PERF_SAMPLE_ID are all used, but we'll probably disable
2239 * some of those for things like copying the payload of
2240 * pointer syscall arguments, and for vfs_getname we don't
2241 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2242 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2243 */
2244 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2245 perf_evlist__reset_sample_bit(evlist, ID);
2246 }
fde54b78 2247 }
514f1c67 2248
f15eb531
NK
2249 signal(SIGCHLD, sig_handler);
2250 signal(SIGINT, sig_handler);
2251
2252 if (forks) {
6ef73ec4 2253 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2254 argv, false, NULL);
f15eb531 2255 if (err < 0) {
c24ff998 2256 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2257 goto out_delete_evlist;
f15eb531
NK
2258 }
2259 }
2260
514f1c67 2261 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2262 if (err < 0)
2263 goto out_error_open;
514f1c67 2264
ba504235
WN
2265 err = bpf__apply_obj_config();
2266 if (err) {
2267 char errbuf[BUFSIZ];
2268
2269 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2270 pr_err("ERROR: Apply config to BPF failed: %s\n",
2271 errbuf);
2272 goto out_error_open;
2273 }
2274
241b057c
ACM
2275 /*
2276 * Better not use !target__has_task() here because we need to cover the
2277 * case where no threads were specified in the command line, but a
2278 * workload was, and in that case we will fill in the thread_map when
2279 * we fork the workload in perf_evlist__prepare_workload.
2280 */
f078c385
ACM
2281 if (trace->filter_pids.nr > 0)
2282 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2283 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2284 err = perf_evlist__set_filter_pid(evlist, getpid());
2285
94ad89bc
ACM
2286 if (err < 0)
2287 goto out_error_mem;
2288
19867b61
ACM
2289 if (trace->ev_qualifier_ids.nr > 0) {
2290 err = trace__set_ev_qualifier_filter(trace);
2291 if (err < 0)
2292 goto out_errno;
19867b61 2293
2e5e5f87
ACM
2294 pr_debug("event qualifier tracepoint filter: %s\n",
2295 trace->syscalls.events.sys_exit->filter);
2296 }
19867b61 2297
94ad89bc
ACM
2298 err = perf_evlist__apply_filters(evlist, &evsel);
2299 if (err < 0)
2300 goto out_error_apply_filters;
241b057c 2301
f885037e 2302 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2303 if (err < 0)
2304 goto out_error_mmap;
514f1c67 2305
cb24d01d
ACM
2306 if (!target__none(&trace->opts.target))
2307 perf_evlist__enable(evlist);
2308
f15eb531
NK
2309 if (forks)
2310 perf_evlist__start_workload(evlist);
2311
e13798c7 2312 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2313 evlist->threads->nr > 1 ||
2314 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2315again:
efd5745e 2316 before = trace->nr_events;
514f1c67
ACM
2317
2318 for (i = 0; i < evlist->nr_mmaps; i++) {
2319 union perf_event *event;
2320
2321 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2322 struct perf_sample sample;
514f1c67 2323
efd5745e 2324 ++trace->nr_events;
514f1c67 2325
514f1c67
ACM
2326 err = perf_evlist__parse_sample(evlist, event, &sample);
2327 if (err) {
c24ff998 2328 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2329 goto next_event;
514f1c67
ACM
2330 }
2331
ddbb1b13 2332 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2333next_event:
2334 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2335
ba209f85
ACM
2336 if (interrupted)
2337 goto out_disable;
02ac5421
ACM
2338
2339 if (done && !draining) {
2340 perf_evlist__disable(evlist);
2341 draining = true;
2342 }
514f1c67
ACM
2343 }
2344 }
2345
efd5745e 2346 if (trace->nr_events == before) {
ba209f85 2347 int timeout = done ? 100 : -1;
f15eb531 2348
46fb3c21
ACM
2349 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2350 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2351 draining = true;
2352
ba209f85 2353 goto again;
46fb3c21 2354 }
ba209f85
ACM
2355 } else {
2356 goto again;
f15eb531
NK
2357 }
2358
ba209f85 2359out_disable:
f3b623b8
ACM
2360 thread__zput(trace->current);
2361
ba209f85 2362 perf_evlist__disable(evlist);
514f1c67 2363
c522739d
ACM
2364 if (!err) {
2365 if (trace->summary)
2366 trace__fprintf_thread_summary(trace, trace->output);
2367
2368 if (trace->show_tool_stats) {
2369 fprintf(trace->output, "Stats:\n "
2370 " vfs_getname : %" PRIu64 "\n"
2371 " proc_getname: %" PRIu64 "\n",
2372 trace->stats.vfs_getname,
2373 trace->stats.proc_getname);
2374 }
2375 }
bf2575c1 2376
514f1c67
ACM
2377out_delete_evlist:
2378 perf_evlist__delete(evlist);
14a052df 2379 trace->evlist = NULL;
75b757ca 2380 trace->live = false;
514f1c67 2381 return err;
6ef068cb
ACM
2382{
2383 char errbuf[BUFSIZ];
a8f23d8f 2384
2cc990ba 2385out_error_sched_stat_runtime:
988bdb31 2386 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2387 goto out_error;
2388
801c67b0 2389out_error_raw_syscalls:
988bdb31 2390 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2391 goto out_error;
2392
e09b18d4
ACM
2393out_error_mmap:
2394 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2395 goto out_error;
2396
a8f23d8f
ACM
2397out_error_open:
2398 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2399
2400out_error:
6ef068cb 2401 fprintf(trace->output, "%s\n", errbuf);
87f91868 2402 goto out_delete_evlist;
94ad89bc
ACM
2403
2404out_error_apply_filters:
2405 fprintf(trace->output,
2406 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2407 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2408 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2409 goto out_delete_evlist;
514f1c67 2410}
5ed08dae
ACM
2411out_error_mem:
2412 fprintf(trace->output, "Not enough memory to run!\n");
2413 goto out_delete_evlist;
19867b61
ACM
2414
2415out_errno:
2416 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2417 goto out_delete_evlist;
a8f23d8f 2418}
514f1c67 2419
6810fc91
DA
2420static int trace__replay(struct trace *trace)
2421{
2422 const struct perf_evsel_str_handler handlers[] = {
c522739d 2423 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2424 };
f5fc1412
JO
2425 struct perf_data_file file = {
2426 .path = input_name,
2427 .mode = PERF_DATA_MODE_READ,
e366a6d8 2428 .force = trace->force,
f5fc1412 2429 };
6810fc91 2430 struct perf_session *session;
003824e8 2431 struct perf_evsel *evsel;
6810fc91
DA
2432 int err = -1;
2433
2434 trace->tool.sample = trace__process_sample;
2435 trace->tool.mmap = perf_event__process_mmap;
384c671e 2436 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2437 trace->tool.comm = perf_event__process_comm;
2438 trace->tool.exit = perf_event__process_exit;
2439 trace->tool.fork = perf_event__process_fork;
2440 trace->tool.attr = perf_event__process_attr;
2441 trace->tool.tracing_data = perf_event__process_tracing_data;
2442 trace->tool.build_id = perf_event__process_build_id;
2443
0a8cb85c 2444 trace->tool.ordered_events = true;
6810fc91
DA
2445 trace->tool.ordering_requires_timestamps = true;
2446
2447 /* add tid to output */
2448 trace->multiple_threads = true;
2449
f5fc1412 2450 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2451 if (session == NULL)
52e02834 2452 return -1;
6810fc91 2453
0a7e6d1b 2454 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2455 goto out;
2456
8fb598e5
DA
2457 trace->host = &session->machines.host;
2458
6810fc91
DA
2459 err = perf_session__set_tracepoints_handlers(session, handlers);
2460 if (err)
2461 goto out;
2462
003824e8
NK
2463 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2464 "raw_syscalls:sys_enter");
9aca7f17
DA
2465 /* older kernels have syscalls tp versus raw_syscalls */
2466 if (evsel == NULL)
2467 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2468 "syscalls:sys_enter");
003824e8 2469
e281a960
SF
2470 if (evsel &&
2471 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2472 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2473 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2474 goto out;
2475 }
2476
2477 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2478 "raw_syscalls:sys_exit");
9aca7f17
DA
2479 if (evsel == NULL)
2480 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2481 "syscalls:sys_exit");
e281a960
SF
2482 if (evsel &&
2483 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2484 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2485 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2486 goto out;
2487 }
2488
e5cadb93 2489 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2490 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2491 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2492 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2493 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2494 evsel->handler = trace__pgfault;
2495 }
2496
bdc89661
DA
2497 err = parse_target_str(trace);
2498 if (err != 0)
2499 goto out;
2500
6810fc91
DA
2501 setup_pager();
2502
b7b61cbe 2503 err = perf_session__process_events(session);
6810fc91
DA
2504 if (err)
2505 pr_err("Failed to process events, error %d", err);
2506
bf2575c1
DA
2507 else if (trace->summary)
2508 trace__fprintf_thread_summary(trace, trace->output);
2509
6810fc91
DA
2510out:
2511 perf_session__delete(session);
2512
2513 return err;
2514}
2515
1302d88e
ACM
2516static size_t trace__fprintf_threads_header(FILE *fp)
2517{
2518 size_t printed;
2519
99ff7150 2520 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2521
2522 return printed;
2523}
2524
b535d523
ACM
2525DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2526 struct stats *stats;
2527 double msecs;
2528 int syscall;
2529)
2530{
2531 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2532 struct stats *stats = source->priv;
2533
2534 entry->syscall = source->i;
2535 entry->stats = stats;
2536 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2537}
2538
bf2575c1
DA
2539static size_t thread__dump_stats(struct thread_trace *ttrace,
2540 struct trace *trace, FILE *fp)
2541{
bf2575c1
DA
2542 size_t printed = 0;
2543 struct syscall *sc;
b535d523
ACM
2544 struct rb_node *nd;
2545 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2546
b535d523 2547 if (syscall_stats == NULL)
bf2575c1
DA
2548 return 0;
2549
2550 printed += fprintf(fp, "\n");
2551
834fd46d
MW
2552 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2553 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2554 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2555
98a91837 2556 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2557 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2558 if (stats) {
2559 double min = (double)(stats->min) / NSEC_PER_MSEC;
2560 double max = (double)(stats->max) / NSEC_PER_MSEC;
2561 double avg = avg_stats(stats);
2562 double pct;
2563 u64 n = (u64) stats->n;
2564
2565 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2566 avg /= NSEC_PER_MSEC;
2567
b535d523 2568 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2569 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2570 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2571 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2572 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2573 }
bf2575c1
DA
2574 }
2575
b535d523 2576 resort_rb__delete(syscall_stats);
bf2575c1 2577 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2578
2579 return printed;
2580}
2581
96c14451 2582static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2583{
96c14451 2584 size_t printed = 0;
89dceb22 2585 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2586 double ratio;
2587
2588 if (ttrace == NULL)
2589 return 0;
2590
2591 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2592
15e65c69 2593 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2594 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2595 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2596 if (ttrace->pfmaj)
2597 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2598 if (ttrace->pfmin)
2599 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2600 if (trace->sched)
2601 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2602 else if (fputc('\n', fp) != EOF)
2603 ++printed;
2604
bf2575c1 2605 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2606
96c14451
ACM
2607 return printed;
2608}
896cbb56 2609
96c14451
ACM
2610static unsigned long thread__nr_events(struct thread_trace *ttrace)
2611{
2612 return ttrace ? ttrace->nr_events : 0;
2613}
2614
2615DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2616 struct thread *thread;
2617)
2618{
2619 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2620}
2621
1302d88e
ACM
2622static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2623{
96c14451
ACM
2624 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2625 size_t printed = trace__fprintf_threads_header(fp);
2626 struct rb_node *nd;
1302d88e 2627
96c14451
ACM
2628 if (threads == NULL) {
2629 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2630 return 0;
2631 }
2632
98a91837 2633 resort_rb__for_each_entry(nd, threads)
96c14451 2634 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2635
96c14451
ACM
2636 resort_rb__delete(threads);
2637
2638 return printed;
1302d88e
ACM
2639}
2640
ae9ed035
ACM
2641static int trace__set_duration(const struct option *opt, const char *str,
2642 int unset __maybe_unused)
2643{
2644 struct trace *trace = opt->value;
2645
2646 trace->duration_filter = atof(str);
2647 return 0;
2648}
2649
f078c385
ACM
2650static int trace__set_filter_pids(const struct option *opt, const char *str,
2651 int unset __maybe_unused)
2652{
2653 int ret = -1;
2654 size_t i;
2655 struct trace *trace = opt->value;
2656 /*
2657 * FIXME: introduce a intarray class, plain parse csv and create a
2658 * { int nr, int entries[] } struct...
2659 */
2660 struct intlist *list = intlist__new(str);
2661
2662 if (list == NULL)
2663 return -1;
2664
2665 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2666 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2667
2668 if (trace->filter_pids.entries == NULL)
2669 goto out;
2670
2671 trace->filter_pids.entries[0] = getpid();
2672
2673 for (i = 1; i < trace->filter_pids.nr; ++i)
2674 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2675
2676 intlist__delete(list);
2677 ret = 0;
2678out:
2679 return ret;
2680}
2681
c24ff998
ACM
2682static int trace__open_output(struct trace *trace, const char *filename)
2683{
2684 struct stat st;
2685
2686 if (!stat(filename, &st) && st.st_size) {
2687 char oldname[PATH_MAX];
2688
2689 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2690 unlink(oldname);
2691 rename(filename, oldname);
2692 }
2693
2694 trace->output = fopen(filename, "w");
2695
2696 return trace->output == NULL ? -errno : 0;
2697}
2698
598d02c5
SF
2699static int parse_pagefaults(const struct option *opt, const char *str,
2700 int unset __maybe_unused)
2701{
2702 int *trace_pgfaults = opt->value;
2703
2704 if (strcmp(str, "all") == 0)
2705 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2706 else if (strcmp(str, "maj") == 0)
2707 *trace_pgfaults |= TRACE_PFMAJ;
2708 else if (strcmp(str, "min") == 0)
2709 *trace_pgfaults |= TRACE_PFMIN;
2710 else
2711 return -1;
2712
2713 return 0;
2714}
2715
14a052df
ACM
2716static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2717{
2718 struct perf_evsel *evsel;
2719
e5cadb93 2720 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2721 evsel->handler = handler;
2722}
2723
514f1c67
ACM
2724int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2725{
6fdd9cb7 2726 const char *trace_usage[] = {
f15eb531
NK
2727 "perf trace [<options>] [<command>]",
2728 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2729 "perf trace record [<options>] [<command>]",
2730 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2731 NULL
2732 };
2733 struct trace trace = {
514f1c67
ACM
2734 .syscalls = {
2735 . max = -1,
2736 },
2737 .opts = {
2738 .target = {
2739 .uid = UINT_MAX,
2740 .uses_mmap = true,
2741 },
2742 .user_freq = UINT_MAX,
2743 .user_interval = ULLONG_MAX,
509051ea 2744 .no_buffering = true,
38d5447d 2745 .mmap_pages = UINT_MAX,
9d9cad76 2746 .proc_map_timeout = 500,
514f1c67 2747 },
007d66a0 2748 .output = stderr,
50c95cbd 2749 .show_comm = true,
e281a960 2750 .trace_syscalls = true,
44621819 2751 .kernel_syscallchains = false,
05614993 2752 .max_stack = UINT_MAX,
514f1c67 2753 };
c24ff998 2754 const char *output_name = NULL;
2ae3a312 2755 const char *ev_qualifier_str = NULL;
514f1c67 2756 const struct option trace_options[] = {
14a052df
ACM
2757 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2758 "event selector. use 'perf list' to list available events",
2759 parse_events_option),
50c95cbd
ACM
2760 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2761 "show the thread COMM next to its id"),
c522739d 2762 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
d303e85a 2763 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
c24ff998 2764 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2765 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2766 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2767 "trace events on existing process id"),
ac9be8ee 2768 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2769 "trace events on existing thread id"),
fa0e4ffe
ACM
2770 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2771 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2772 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2773 "system-wide collection from all CPUs"),
ac9be8ee 2774 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2775 "list of cpus to monitor"),
6810fc91 2776 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2777 "child tasks do not inherit counters"),
994a1f78
JO
2778 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2779 "number of mmap data pages",
2780 perf_evlist__parse_mmap_pages),
ac9be8ee 2781 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2782 "user to profile"),
ae9ed035
ACM
2783 OPT_CALLBACK(0, "duration", &trace, "float",
2784 "show only events with duration > N.M ms",
2785 trace__set_duration),
1302d88e 2786 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2787 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2788 OPT_BOOLEAN('T', "time", &trace.full_time,
2789 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2790 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2791 "Show only syscall summary with statistics"),
2792 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2793 "Show all syscalls and summary with statistics"),
598d02c5
SF
2794 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2795 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2796 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2797 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2798 OPT_CALLBACK(0, "call-graph", &trace.opts,
2799 "record_mode[,record_size]", record_callchain_help,
2800 &record_parse_callchain_opt),
44621819
ACM
2801 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2802 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2803 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2804 "Set the minimum stack depth when parsing the callchain, "
2805 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2806 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2807 "Set the maximum stack depth when parsing the callchain, "
2808 "anything beyond the specified depth will be ignored. "
4cb93446 2809 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2810 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2811 "per thread proc mmap processing timeout in ms"),
514f1c67
ACM
2812 OPT_END()
2813 };
ccd62a89 2814 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2815 bool mmap_pages_user_set = true;
6fdd9cb7 2816 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2817 int err;
32caf0d1 2818 char bf[BUFSIZ];
514f1c67 2819
4d08cb80
ACM
2820 signal(SIGSEGV, sighandler_dump_stack);
2821 signal(SIGFPE, sighandler_dump_stack);
2822
14a052df 2823 trace.evlist = perf_evlist__new();
fd0db102 2824 trace.sctbl = syscalltbl__new();
14a052df 2825
fd0db102 2826 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2827 pr_err("Not enough memory to run!\n");
ff8f695c 2828 err = -ENOMEM;
14a052df
ACM
2829 goto out;
2830 }
2831
6fdd9cb7
YS
2832 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2833 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2834
d7888573
WN
2835 err = bpf__setup_stdout(trace.evlist);
2836 if (err) {
2837 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2838 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2839 goto out;
2840 }
2841
59247e33
ACM
2842 err = -1;
2843
598d02c5
SF
2844 if (trace.trace_pgfaults) {
2845 trace.opts.sample_address = true;
2846 trace.opts.sample_time = true;
2847 }
2848
f3e459d1
ACM
2849 if (trace.opts.mmap_pages == UINT_MAX)
2850 mmap_pages_user_set = false;
2851
05614993 2852 if (trace.max_stack == UINT_MAX) {
fe176085 2853 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2854 max_stack_user_set = false;
2855 }
2856
2857#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2858 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2859 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2860#endif
2861
2ddd5c04 2862 if (callchain_param.enabled) {
f3e459d1
ACM
2863 if (!mmap_pages_user_set && geteuid() == 0)
2864 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2865
566a0885 2866 symbol_conf.use_callchain = true;
f3e459d1 2867 }
566a0885 2868
14a052df
ACM
2869 if (trace.evlist->nr_entries > 0)
2870 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2871
1e28fe0a
SF
2872 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2873 return trace__record(&trace, argc-1, &argv[1]);
2874
2875 /* summary_only implies summary option, but don't overwrite summary if set */
2876 if (trace.summary_only)
2877 trace.summary = trace.summary_only;
2878
726f3234
ACM
2879 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2880 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2881 pr_err("Please specify something to trace.\n");
2882 return -1;
2883 }
2884
59247e33
ACM
2885 if (!trace.trace_syscalls && ev_qualifier_str) {
2886 pr_err("The -e option can't be used with --no-syscalls.\n");
2887 goto out;
2888 }
2889
c24ff998
ACM
2890 if (output_name != NULL) {
2891 err = trace__open_output(&trace, output_name);
2892 if (err < 0) {
2893 perror("failed to create output file");
2894 goto out;
2895 }
2896 }
2897
fd0db102
ACM
2898 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2899
2ae3a312 2900 if (ev_qualifier_str != NULL) {
b059efdf 2901 const char *s = ev_qualifier_str;
005438a8
ACM
2902 struct strlist_config slist_config = {
2903 .dirname = system_path(STRACE_GROUPS_DIR),
2904 };
b059efdf
ACM
2905
2906 trace.not_ev_qualifier = *s == '!';
2907 if (trace.not_ev_qualifier)
2908 ++s;
005438a8 2909 trace.ev_qualifier = strlist__new(s, &slist_config);
2ae3a312 2910 if (trace.ev_qualifier == NULL) {
c24ff998
ACM
2911 fputs("Not enough memory to parse event qualifier",
2912 trace.output);
2913 err = -ENOMEM;
2914 goto out_close;
2ae3a312 2915 }
d0cc439b
ACM
2916
2917 err = trace__validate_ev_qualifier(&trace);
2918 if (err)
2919 goto out_close;
2ae3a312
ACM
2920 }
2921
602ad878 2922 err = target__validate(&trace.opts.target);
32caf0d1 2923 if (err) {
602ad878 2924 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2925 fprintf(trace.output, "%s", bf);
2926 goto out_close;
32caf0d1
NK
2927 }
2928
602ad878 2929 err = target__parse_uid(&trace.opts.target);
514f1c67 2930 if (err) {
602ad878 2931 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2932 fprintf(trace.output, "%s", bf);
2933 goto out_close;
514f1c67
ACM
2934 }
2935
602ad878 2936 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
2937 trace.opts.target.system_wide = true;
2938
6810fc91
DA
2939 if (input_name)
2940 err = trace__replay(&trace);
2941 else
2942 err = trace__run(&trace, argc, argv);
1302d88e 2943
c24ff998
ACM
2944out_close:
2945 if (output_name != NULL)
2946 fclose(trace.output);
2947out:
1302d88e 2948 return err;
514f1c67 2949}