Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
[linux-2.6-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
6810fc91 27#include "util/session.h"
752fde44 28#include "util/thread.h"
4b6ab94e 29#include <subcmd/parse-options.h>
2ae3a312 30#include "util/strlist.h"
bdc89661 31#include "util/intlist.h"
514f1c67 32#include "util/thread_map.h"
bf2575c1 33#include "util/stat.h"
97978b3e 34#include "trace-event.h"
9aca7f17 35#include "util/parse-events.h"
ba504235 36#include "util/bpf-loader.h"
566a0885 37#include "callchain.h"
fd0db102 38#include "syscalltbl.h"
96c14451 39#include "rb_resort.h"
514f1c67 40
fd0db102 41#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 42#include <stdlib.h>
8dd2a131 43#include <linux/err.h>
997bba8c
ACM
44#include <linux/filter.h>
45#include <linux/audit.h>
46#include <sys/ptrace.h>
39878d49 47#include <linux/random.h>
c6d4a494 48#include <linux/stringify.h>
514f1c67 49
c188e7ac
ACM
50#ifndef O_CLOEXEC
51# define O_CLOEXEC 02000000
52#endif
53
d1d438a3
ACM
54struct trace {
55 struct perf_tool tool;
fd0db102 56 struct syscalltbl *sctbl;
d1d438a3
ACM
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
77 struct intlist *tid_list;
78 struct intlist *pid_list;
79 struct {
80 size_t nr;
81 pid_t *entries;
82 } filter_pids;
83 double duration_filter;
84 double runtime_ms;
85 struct {
86 u64 vfs_getname,
87 proc_getname;
88 } stats;
c6d4a494 89 unsigned int max_stack;
5cf9c84e 90 unsigned int min_stack;
d1d438a3
ACM
91 bool not_ev_qualifier;
92 bool live;
93 bool full_time;
94 bool sched;
95 bool multiple_threads;
96 bool summary;
97 bool summary_only;
98 bool show_comm;
99 bool show_tool_stats;
100 bool trace_syscalls;
44621819 101 bool kernel_syscallchains;
d1d438a3
ACM
102 bool force;
103 bool vfs_getname;
104 int trace_pgfaults;
fd0db102 105 int open_id;
d1d438a3 106};
a1c2552d 107
77170988
ACM
108struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 };
114};
115
116#define TP_UINT_FIELD(bits) \
117static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118{ \
55d43bca
DA
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
77170988
ACM
122}
123
124TP_UINT_FIELD(8);
125TP_UINT_FIELD(16);
126TP_UINT_FIELD(32);
127TP_UINT_FIELD(64);
128
129#define TP_UINT_FIELD__SWAPPED(bits) \
130static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131{ \
55d43bca
DA
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
134 return bswap_##bits(value);\
135}
136
137TP_UINT_FIELD__SWAPPED(16);
138TP_UINT_FIELD__SWAPPED(32);
139TP_UINT_FIELD__SWAPPED(64);
140
141static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
144{
145 field->offset = format_field->offset;
146
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
162 }
163
164 return 0;
165}
166
167static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168{
169 return sample->raw_data + field->offset;
170}
171
172static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173{
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
177}
178
179struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
183 };
184};
185
186static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
189{
190 struct format_field *format_field = perf_evsel__field(evsel, name);
191
192 if (format_field == NULL)
193 return -1;
194
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
196}
197
198#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
202static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
205{
206 struct format_field *format_field = perf_evsel__field(evsel, name);
207
208 if (format_field == NULL)
209 return -1;
210
211 return tp_field__init_ptr(field, format_field);
212}
213
214#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
218static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219{
04662523 220 zfree(&evsel->priv);
77170988
ACM
221 perf_evsel__delete(evsel);
222}
223
96695d44
NK
224static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225{
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
230
231 evsel->handler = handler;
232 return 0;
233 }
234
235 return -ENOMEM;
236
237out_delete:
04662523 238 zfree(&evsel->priv);
96695d44
NK
239 return -ENOENT;
240}
241
ef503831 242static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 243{
ef503831 244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 245
9aca7f17 246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 247 if (IS_ERR(evsel))
9aca7f17
DA
248 evsel = perf_evsel__newtp("syscalls", direction);
249
8dd2a131
JO
250 if (IS_ERR(evsel))
251 return NULL;
252
253 if (perf_evsel__init_syscall_tp(evsel, handler))
254 goto out_delete;
77170988
ACM
255
256 return evsel;
257
258out_delete:
259 perf_evsel__delete_priv(evsel);
260 return NULL;
261}
262
263#define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 ({ struct syscall_tp *fields = evsel->priv; \
265 fields->name.integer(&fields->name, sample); })
266
267#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.pointer(&fields->name, sample); })
270
01533e97
ACM
271struct syscall_arg {
272 unsigned long val;
75b757ca
ACM
273 struct thread *thread;
274 struct trace *trace;
1f115cb7 275 void *parm;
01533e97
ACM
276 u8 idx;
277 u8 mask;
278};
279
1f115cb7 280struct strarray {
03e3adc9 281 int offset;
1f115cb7
ACM
282 int nr_entries;
283 const char **entries;
284};
285
286#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 .nr_entries = ARRAY_SIZE(array), \
288 .entries = array, \
289}
290
03e3adc9
ACM
291#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 .offset = off, \
293 .nr_entries = ARRAY_SIZE(array), \
294 .entries = array, \
295}
296
975b7c2f
ACM
297static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
1f115cb7 300{
1f115cb7 301 struct strarray *sa = arg->parm;
03e3adc9 302 int idx = arg->val - sa->offset;
1f115cb7
ACM
303
304 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 305 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
306
307 return scnprintf(bf, size, "%s", sa->entries[idx]);
308}
309
975b7c2f
ACM
310static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 struct syscall_arg *arg)
312{
313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314}
315
1f115cb7
ACM
316#define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
844ae5b4
ACM
318#if defined(__i386__) || defined(__x86_64__)
319/*
320 * FIXME: Make this available to all arches as soon as the ioctl beautifier
321 * gets rewritten to support all arches.
322 */
78645cf3
ACM
323static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 struct syscall_arg *arg)
325{
326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327}
328
329#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 330#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 331
75b757ca
ACM
332static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 struct syscall_arg *arg);
334
335#define SCA_FD syscall_arg__scnprintf_fd
336
337static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
338 struct syscall_arg *arg)
339{
340 int fd = arg->val;
341
342 if (fd == AT_FDCWD)
343 return scnprintf(bf, size, "CWD");
344
345 return syscall_arg__scnprintf_fd(bf, size, arg);
346}
347
348#define SCA_FDAT syscall_arg__scnprintf_fd_at
349
350static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
351 struct syscall_arg *arg);
352
353#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354
6e7eeb51 355static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 356 struct syscall_arg *arg)
13d4ff3e 357{
01533e97 358 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
359}
360
beccb2b5
ACM
361#define SCA_HEX syscall_arg__scnprintf_hex
362
a1c2552d
ACM
363static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
364 struct syscall_arg *arg)
365{
366 return scnprintf(bf, size, "%d", arg->val);
367}
368
369#define SCA_INT syscall_arg__scnprintf_int
370
729a7841
ACM
371static const char *bpf_cmd[] = {
372 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
373 "MAP_GET_NEXT_KEY", "PROG_LOAD",
374};
375static DEFINE_STRARRAY(bpf_cmd);
376
03e3adc9
ACM
377static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
378static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 379
1f115cb7
ACM
380static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
381static DEFINE_STRARRAY(itimers);
382
b62bee1b
ACM
383static const char *keyctl_options[] = {
384 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
385 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
386 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
387 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
388 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
389};
390static DEFINE_STRARRAY(keyctl_options);
391
efe6b882
ACM
392static const char *whences[] = { "SET", "CUR", "END",
393#ifdef SEEK_DATA
394"DATA",
395#endif
396#ifdef SEEK_HOLE
397"HOLE",
398#endif
399};
400static DEFINE_STRARRAY(whences);
f9da0b0c 401
80f587d5
ACM
402static const char *fcntl_cmds[] = {
403 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
404 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
405 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
406 "F_GETOWNER_UIDS",
407};
408static DEFINE_STRARRAY(fcntl_cmds);
409
c045bf02
ACM
410static const char *rlimit_resources[] = {
411 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
412 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
413 "RTTIME",
414};
415static DEFINE_STRARRAY(rlimit_resources);
416
eb5b1b14
ACM
417static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
418static DEFINE_STRARRAY(sighow);
419
4f8c1b74
DA
420static const char *clockid[] = {
421 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
422 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
423 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
424};
425static DEFINE_STRARRAY(clockid);
426
e10bce81
ACM
427static const char *socket_families[] = {
428 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
429 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
430 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
431 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
432 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
433 "ALG", "NFC", "VSOCK",
434};
435static DEFINE_STRARRAY(socket_families);
436
51108999
ACM
437static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
438 struct syscall_arg *arg)
439{
440 size_t printed = 0;
441 int mode = arg->val;
442
443 if (mode == F_OK) /* 0 */
444 return scnprintf(bf, size, "F");
445#define P_MODE(n) \
446 if (mode & n##_OK) { \
447 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
448 mode &= ~n##_OK; \
449 }
450
451 P_MODE(R);
452 P_MODE(W);
453 P_MODE(X);
454#undef P_MODE
455
456 if (mode)
457 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
458
459 return printed;
460}
461
462#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
463
f994592d
ACM
464static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
465 struct syscall_arg *arg);
466
467#define SCA_FILENAME syscall_arg__scnprintf_filename
468
46cce19b
ACM
469static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
470 struct syscall_arg *arg)
471{
472 int printed = 0, flags = arg->val;
473
474#define P_FLAG(n) \
475 if (flags & O_##n) { \
476 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
477 flags &= ~O_##n; \
478 }
479
480 P_FLAG(CLOEXEC);
481 P_FLAG(NONBLOCK);
482#undef P_FLAG
483
484 if (flags)
485 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
486
487 return printed;
488}
489
490#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
491
844ae5b4
ACM
492#if defined(__i386__) || defined(__x86_64__)
493/*
494 * FIXME: Make this available to all arches.
495 */
78645cf3
ACM
496#define TCGETS 0x5401
497
498static const char *tioctls[] = {
499 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
500 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
501 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
502 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
503 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
504 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
505 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
506 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
507 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
508 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
509 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
510 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
511 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
512 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
513 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
514};
515
516static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 517#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 518
a355a61e
ACM
519#ifndef GRND_NONBLOCK
520#define GRND_NONBLOCK 0x0001
521#endif
522#ifndef GRND_RANDOM
523#define GRND_RANDOM 0x0002
524#endif
525
39878d49
ACM
526static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
527 struct syscall_arg *arg)
528{
529 int printed = 0, flags = arg->val;
530
531#define P_FLAG(n) \
532 if (flags & GRND_##n) { \
533 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
534 flags &= ~GRND_##n; \
535 }
536
537 P_FLAG(RANDOM);
538 P_FLAG(NONBLOCK);
539#undef P_FLAG
540
541 if (flags)
542 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
543
544 return printed;
545}
546
547#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548
453350dd
ACM
549#define STRARRAY(arg, name, array) \
550 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
551 .arg_parm = { [arg] = &strarray__##array, }
552
ea8dc3ce 553#include "trace/beauty/eventfd.c"
8bf382ce 554#include "trace/beauty/flock.c"
d5d71e86 555#include "trace/beauty/futex_op.c"
df4cb167 556#include "trace/beauty/mmap.c"
ba2f22cf 557#include "trace/beauty/mode_t.c"
a30e6259 558#include "trace/beauty/msg_flags.c"
8f48df69 559#include "trace/beauty/open_flags.c"
62de344e 560#include "trace/beauty/perf_event_open.c"
d5d71e86 561#include "trace/beauty/pid.c"
a3bca91f 562#include "trace/beauty/sched_policy.c"
f5cd95ea 563#include "trace/beauty/seccomp.c"
12199d8e 564#include "trace/beauty/signum.c"
bbf86c43 565#include "trace/beauty/socket_type.c"
7206b900 566#include "trace/beauty/waitid_options.c"
a3bca91f 567
514f1c67
ACM
568static struct syscall_fmt {
569 const char *name;
aec1930b 570 const char *alias;
01533e97 571 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 572 void *arg_parm[6];
514f1c67 573 bool errmsg;
11c8e39f 574 bool errpid;
514f1c67 575 bool timeout;
04b34729 576 bool hexret;
514f1c67 577} syscall_fmts[] = {
51108999 578 { .name = "access", .errmsg = true,
12f3ca4f 579 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 580 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 581 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
582 { .name = "brk", .hexret = true,
583 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
584 { .name = "chdir", .errmsg = true, },
585 { .name = "chmod", .errmsg = true, },
586 { .name = "chroot", .errmsg = true, },
4f8c1b74 587 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 588 { .name = "clone", .errpid = true, },
75b757ca 589 { .name = "close", .errmsg = true,
48000a1a 590 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 591 { .name = "connect", .errmsg = true, },
12f3ca4f 592 { .name = "creat", .errmsg = true, },
b6565c90
ACM
593 { .name = "dup", .errmsg = true, },
594 { .name = "dup2", .errmsg = true, },
595 { .name = "dup3", .errmsg = true, },
453350dd 596 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
597 { .name = "eventfd2", .errmsg = true,
598 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 599 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
600 { .name = "fadvise64", .errmsg = true, },
601 { .name = "fallocate", .errmsg = true, },
602 { .name = "fchdir", .errmsg = true, },
603 { .name = "fchmod", .errmsg = true, },
75b757ca 604 { .name = "fchmodat", .errmsg = true,
12f3ca4f 605 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 606 { .name = "fchown", .errmsg = true, },
75b757ca 607 { .name = "fchownat", .errmsg = true,
12f3ca4f 608 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 609 { .name = "fcntl", .errmsg = true,
b6565c90 610 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 611 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 612 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 613 { .name = "flock", .errmsg = true,
b6565c90
ACM
614 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
615 { .name = "fsetxattr", .errmsg = true, },
616 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 617 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
618 { .name = "fstatfs", .errmsg = true, },
619 { .name = "fsync", .errmsg = true, },
620 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
621 { .name = "futex", .errmsg = true,
622 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 623 { .name = "futimesat", .errmsg = true,
12f3ca4f 624 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
625 { .name = "getdents", .errmsg = true, },
626 { .name = "getdents64", .errmsg = true, },
453350dd 627 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 628 { .name = "getpid", .errpid = true, },
d1d438a3 629 { .name = "getpgid", .errpid = true, },
c65f1070 630 { .name = "getppid", .errpid = true, },
39878d49
ACM
631 { .name = "getrandom", .errmsg = true,
632 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 633 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
634 { .name = "getxattr", .errmsg = true, },
635 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 636 { .name = "ioctl", .errmsg = true,
b6565c90 637 .arg_scnprintf = {
844ae5b4
ACM
638#if defined(__i386__) || defined(__x86_64__)
639/*
640 * FIXME: Make this available to all arches.
641 */
78645cf3
ACM
642 [1] = SCA_STRHEXARRAY, /* cmd */
643 [2] = SCA_HEX, /* arg */ },
644 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
645#else
646 [2] = SCA_HEX, /* arg */ }, },
647#endif
b62bee1b 648 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
649 { .name = "kill", .errmsg = true,
650 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
651 { .name = "lchown", .errmsg = true, },
652 { .name = "lgetxattr", .errmsg = true, },
75b757ca 653 { .name = "linkat", .errmsg = true,
48000a1a 654 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
655 { .name = "listxattr", .errmsg = true, },
656 { .name = "llistxattr", .errmsg = true, },
657 { .name = "lremovexattr", .errmsg = true, },
75b757ca 658 { .name = "lseek", .errmsg = true,
b6565c90 659 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 660 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
661 { .name = "lsetxattr", .errmsg = true, },
662 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
663 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
664 { .name = "madvise", .errmsg = true,
665 .arg_scnprintf = { [0] = SCA_HEX, /* start */
666 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 667 { .name = "mkdir", .errmsg = true, },
75b757ca 668 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
669 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
670 { .name = "mknod", .errmsg = true, },
75b757ca 671 { .name = "mknodat", .errmsg = true,
12f3ca4f 672 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
673 { .name = "mlock", .errmsg = true,
674 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
675 { .name = "mlockall", .errmsg = true,
676 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 677 { .name = "mmap", .hexret = true,
ae685380 678 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 679 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 680 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 681 { .name = "mprotect", .errmsg = true,
ae685380
ACM
682 .arg_scnprintf = { [0] = SCA_HEX, /* start */
683 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
684 { .name = "mq_unlink", .errmsg = true,
685 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
686 { .name = "mremap", .hexret = true,
687 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 688 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 689 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
690 { .name = "munlock", .errmsg = true,
691 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
692 { .name = "munmap", .errmsg = true,
693 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 694 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 695 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 696 { .name = "newfstatat", .errmsg = true,
12f3ca4f 697 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 698 { .name = "open", .errmsg = true,
12f3ca4f 699 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 700 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
701 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
702 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 703 { .name = "openat", .errmsg = true,
75b757ca
ACM
704 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
705 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 706 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 707 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
708 [3] = SCA_FD, /* group_fd */
709 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
710 { .name = "pipe2", .errmsg = true,
711 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
712 { .name = "poll", .errmsg = true, .timeout = true, },
713 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
714 { .name = "pread", .errmsg = true, .alias = "pread64", },
715 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 716 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
717 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
718 { .name = "pwritev", .errmsg = true, },
719 { .name = "read", .errmsg = true, },
12f3ca4f 720 { .name = "readlink", .errmsg = true, },
75b757ca 721 { .name = "readlinkat", .errmsg = true,
12f3ca4f 722 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 723 { .name = "readv", .errmsg = true, },
b2cc99fd 724 { .name = "recvfrom", .errmsg = true,
b6565c90 725 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 726 { .name = "recvmmsg", .errmsg = true,
b6565c90 727 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 728 { .name = "recvmsg", .errmsg = true,
b6565c90 729 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 730 { .name = "removexattr", .errmsg = true, },
75b757ca 731 { .name = "renameat", .errmsg = true,
48000a1a 732 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 733 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
734 { .name = "rt_sigaction", .errmsg = true,
735 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 736 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
737 { .name = "rt_sigqueueinfo", .errmsg = true,
738 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
739 { .name = "rt_tgsigqueueinfo", .errmsg = true,
740 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
a3bca91f
ACM
741 { .name = "sched_setscheduler", .errmsg = true,
742 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
743 { .name = "seccomp", .errmsg = true,
744 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
745 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 746 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 747 { .name = "sendmmsg", .errmsg = true,
b6565c90 748 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 749 { .name = "sendmsg", .errmsg = true,
b6565c90 750 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 751 { .name = "sendto", .errmsg = true,
b6565c90 752 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 753 { .name = "set_tid_address", .errpid = true, },
453350dd 754 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 755 { .name = "setpgid", .errmsg = true, },
453350dd 756 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 757 { .name = "setxattr", .errmsg = true, },
b6565c90 758 { .name = "shutdown", .errmsg = true, },
e10bce81 759 { .name = "socket", .errmsg = true,
a28b24b2
ACM
760 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
761 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
762 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
763 { .name = "socketpair", .errmsg = true,
764 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
765 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 766 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
767 { .name = "stat", .errmsg = true, .alias = "newstat", },
768 { .name = "statfs", .errmsg = true, },
34221118
ACM
769 { .name = "swapoff", .errmsg = true,
770 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
771 { .name = "swapon", .errmsg = true,
772 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 773 { .name = "symlinkat", .errmsg = true,
48000a1a 774 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
775 { .name = "tgkill", .errmsg = true,
776 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
777 { .name = "tkill", .errmsg = true,
778 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 779 { .name = "truncate", .errmsg = true, },
e5959683 780 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 781 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
782 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
783 { .name = "utime", .errmsg = true, },
75b757ca 784 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
786 { .name = "utimes", .errmsg = true, },
b6565c90 787 { .name = "vmsplice", .errmsg = true, },
11c8e39f 788 { .name = "wait4", .errpid = true,
7206b900 789 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 790 { .name = "waitid", .errpid = true,
7206b900 791 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
792 { .name = "write", .errmsg = true, },
793 { .name = "writev", .errmsg = true, },
514f1c67
ACM
794};
795
796static int syscall_fmt__cmp(const void *name, const void *fmtp)
797{
798 const struct syscall_fmt *fmt = fmtp;
799 return strcmp(name, fmt->name);
800}
801
802static struct syscall_fmt *syscall_fmt__find(const char *name)
803{
804 const int nmemb = ARRAY_SIZE(syscall_fmts);
805 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
806}
807
808struct syscall {
809 struct event_format *tp_format;
f208bd8d
ACM
810 int nr_args;
811 struct format_field *args;
514f1c67 812 const char *name;
5089f20e 813 bool is_exit;
514f1c67 814 struct syscall_fmt *fmt;
01533e97 815 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 816 void **arg_parm;
514f1c67
ACM
817};
818
60c907ab
ACM
819static size_t fprintf_duration(unsigned long t, FILE *fp)
820{
821 double duration = (double)t / NSEC_PER_MSEC;
822 size_t printed = fprintf(fp, "(");
823
824 if (duration >= 1.0)
825 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
826 else if (duration >= 0.01)
827 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
828 else
829 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 830 return printed + fprintf(fp, "): ");
60c907ab
ACM
831}
832
f994592d
ACM
833/**
834 * filename.ptr: The filename char pointer that will be vfs_getname'd
835 * filename.entry_str_pos: Where to insert the string translated from
836 * filename.ptr by the vfs_getname tracepoint/kprobe.
837 */
752fde44
ACM
838struct thread_trace {
839 u64 entry_time;
840 u64 exit_time;
841 bool entry_pending;
efd5745e 842 unsigned long nr_events;
a2ea67d7 843 unsigned long pfmaj, pfmin;
752fde44 844 char *entry_str;
1302d88e 845 double runtime_ms;
f994592d
ACM
846 struct {
847 unsigned long ptr;
7f4f8001
ACM
848 short int entry_str_pos;
849 bool pending_open;
850 unsigned int namelen;
851 char *name;
f994592d 852 } filename;
75b757ca
ACM
853 struct {
854 int max;
855 char **table;
856 } paths;
bf2575c1
DA
857
858 struct intlist *syscall_stats;
752fde44
ACM
859};
860
861static struct thread_trace *thread_trace__new(void)
862{
75b757ca
ACM
863 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
864
865 if (ttrace)
866 ttrace->paths.max = -1;
867
bf2575c1
DA
868 ttrace->syscall_stats = intlist__new(NULL);
869
75b757ca 870 return ttrace;
752fde44
ACM
871}
872
c24ff998 873static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 874{
efd5745e
ACM
875 struct thread_trace *ttrace;
876
752fde44
ACM
877 if (thread == NULL)
878 goto fail;
879
89dceb22
NK
880 if (thread__priv(thread) == NULL)
881 thread__set_priv(thread, thread_trace__new());
48000a1a 882
89dceb22 883 if (thread__priv(thread) == NULL)
752fde44
ACM
884 goto fail;
885
89dceb22 886 ttrace = thread__priv(thread);
efd5745e
ACM
887 ++ttrace->nr_events;
888
889 return ttrace;
752fde44 890fail:
c24ff998 891 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
892 "WARNING: not enough memory, dropping samples!\n");
893 return NULL;
894}
895
598d02c5
SF
896#define TRACE_PFMAJ (1 << 0)
897#define TRACE_PFMIN (1 << 1)
898
e4d44e83
ACM
899static const size_t trace__entry_str_size = 2048;
900
97119f37 901static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 902{
89dceb22 903 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
904
905 if (fd > ttrace->paths.max) {
906 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
907
908 if (npath == NULL)
909 return -1;
910
911 if (ttrace->paths.max != -1) {
912 memset(npath + ttrace->paths.max + 1, 0,
913 (fd - ttrace->paths.max) * sizeof(char *));
914 } else {
915 memset(npath, 0, (fd + 1) * sizeof(char *));
916 }
917
918 ttrace->paths.table = npath;
919 ttrace->paths.max = fd;
920 }
921
922 ttrace->paths.table[fd] = strdup(pathname);
923
924 return ttrace->paths.table[fd] != NULL ? 0 : -1;
925}
926
97119f37
ACM
927static int thread__read_fd_path(struct thread *thread, int fd)
928{
929 char linkname[PATH_MAX], pathname[PATH_MAX];
930 struct stat st;
931 int ret;
932
933 if (thread->pid_ == thread->tid) {
934 scnprintf(linkname, sizeof(linkname),
935 "/proc/%d/fd/%d", thread->pid_, fd);
936 } else {
937 scnprintf(linkname, sizeof(linkname),
938 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
939 }
940
941 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
942 return -1;
943
944 ret = readlink(linkname, pathname, sizeof(pathname));
945
946 if (ret < 0 || ret > st.st_size)
947 return -1;
948
949 pathname[ret] = '\0';
950 return trace__set_fd_pathname(thread, fd, pathname);
951}
952
c522739d
ACM
953static const char *thread__fd_path(struct thread *thread, int fd,
954 struct trace *trace)
75b757ca 955{
89dceb22 956 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
957
958 if (ttrace == NULL)
959 return NULL;
960
961 if (fd < 0)
962 return NULL;
963
cdcd1e6b 964 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
965 if (!trace->live)
966 return NULL;
967 ++trace->stats.proc_getname;
cdcd1e6b 968 if (thread__read_fd_path(thread, fd))
c522739d
ACM
969 return NULL;
970 }
75b757ca
ACM
971
972 return ttrace->paths.table[fd];
973}
974
975static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
976 struct syscall_arg *arg)
977{
978 int fd = arg->val;
979 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 980 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
981
982 if (path)
983 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
984
985 return printed;
986}
987
988static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
989 struct syscall_arg *arg)
990{
991 int fd = arg->val;
992 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 993 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 994
04662523
ACM
995 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
996 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
997
998 return printed;
999}
1000
f994592d
ACM
1001static void thread__set_filename_pos(struct thread *thread, const char *bf,
1002 unsigned long ptr)
1003{
1004 struct thread_trace *ttrace = thread__priv(thread);
1005
1006 ttrace->filename.ptr = ptr;
1007 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1008}
1009
1010static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1011 struct syscall_arg *arg)
1012{
1013 unsigned long ptr = arg->val;
1014
1015 if (!arg->trace->vfs_getname)
1016 return scnprintf(bf, size, "%#x", ptr);
1017
1018 thread__set_filename_pos(arg->thread, bf, ptr);
1019 return 0;
1020}
1021
ae9ed035
ACM
1022static bool trace__filter_duration(struct trace *trace, double t)
1023{
1024 return t < (trace->duration_filter * NSEC_PER_MSEC);
1025}
1026
752fde44
ACM
1027static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1028{
1029 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1030
60c907ab 1031 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1032}
1033
f15eb531 1034static bool done = false;
ba209f85 1035static bool interrupted = false;
f15eb531 1036
ba209f85 1037static void sig_handler(int sig)
f15eb531
NK
1038{
1039 done = true;
ba209f85 1040 interrupted = sig == SIGINT;
f15eb531
NK
1041}
1042
752fde44 1043static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
60c907ab 1044 u64 duration, u64 tstamp, FILE *fp)
752fde44
ACM
1045{
1046 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
60c907ab 1047 printed += fprintf_duration(duration, fp);
752fde44 1048
50c95cbd
ACM
1049 if (trace->multiple_threads) {
1050 if (trace->show_comm)
1902efe7 1051 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1052 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1053 }
752fde44
ACM
1054
1055 return printed;
1056}
1057
c24ff998 1058static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1059 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1060{
1061 int ret = 0;
1062
1063 switch (event->header.type) {
1064 case PERF_RECORD_LOST:
c24ff998 1065 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1066 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1067 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1068 break;
752fde44 1069 default:
162f0bef 1070 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1071 break;
1072 }
1073
1074 return ret;
1075}
1076
c24ff998 1077static int trace__tool_process(struct perf_tool *tool,
752fde44 1078 union perf_event *event,
162f0bef 1079 struct perf_sample *sample,
752fde44
ACM
1080 struct machine *machine)
1081{
c24ff998 1082 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1083 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1084}
1085
caf8a0d0
ACM
1086static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1087{
1088 struct machine *machine = vmachine;
1089
1090 if (machine->kptr_restrict_warned)
1091 return NULL;
1092
1093 if (symbol_conf.kptr_restrict) {
1094 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1095 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1096 "Kernel samples will not be resolved.\n");
1097 machine->kptr_restrict_warned = true;
1098 return NULL;
1099 }
1100
1101 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1102}
1103
752fde44
ACM
1104static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1105{
0a7e6d1b 1106 int err = symbol__init(NULL);
752fde44
ACM
1107
1108 if (err)
1109 return err;
1110
8fb598e5
DA
1111 trace->host = machine__new_host();
1112 if (trace->host == NULL)
1113 return -ENOMEM;
752fde44 1114
caf8a0d0 1115 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1116 return -errno;
1117
a33fbd56 1118 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1119 evlist->threads, trace__tool_process, false,
1120 trace->opts.proc_map_timeout);
752fde44
ACM
1121 if (err)
1122 symbol__exit();
1123
1124 return err;
1125}
1126
13d4ff3e
ACM
1127static int syscall__set_arg_fmts(struct syscall *sc)
1128{
1129 struct format_field *field;
b6565c90 1130 int idx = 0, len;
13d4ff3e 1131
f208bd8d 1132 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1133 if (sc->arg_scnprintf == NULL)
1134 return -1;
1135
1f115cb7
ACM
1136 if (sc->fmt)
1137 sc->arg_parm = sc->fmt->arg_parm;
1138
f208bd8d 1139 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1140 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1141 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1142 else if (strcmp(field->type, "const char *") == 0 &&
1143 (strcmp(field->name, "filename") == 0 ||
1144 strcmp(field->name, "path") == 0 ||
1145 strcmp(field->name, "pathname") == 0))
1146 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1147 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1148 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1149 else if (strcmp(field->type, "pid_t") == 0)
1150 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1151 else if (strcmp(field->type, "umode_t") == 0)
1152 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1153 else if ((strcmp(field->type, "int") == 0 ||
1154 strcmp(field->type, "unsigned int") == 0 ||
1155 strcmp(field->type, "long") == 0) &&
1156 (len = strlen(field->name)) >= 2 &&
1157 strcmp(field->name + len - 2, "fd") == 0) {
1158 /*
1159 * /sys/kernel/tracing/events/syscalls/sys_enter*
1160 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1161 * 65 int
1162 * 23 unsigned int
1163 * 7 unsigned long
1164 */
1165 sc->arg_scnprintf[idx] = SCA_FD;
1166 }
13d4ff3e
ACM
1167 ++idx;
1168 }
1169
1170 return 0;
1171}
1172
514f1c67
ACM
1173static int trace__read_syscall_info(struct trace *trace, int id)
1174{
1175 char tp_name[128];
1176 struct syscall *sc;
fd0db102 1177 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1178
1179 if (name == NULL)
1180 return -1;
514f1c67
ACM
1181
1182 if (id > trace->syscalls.max) {
1183 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1184
1185 if (nsyscalls == NULL)
1186 return -1;
1187
1188 if (trace->syscalls.max != -1) {
1189 memset(nsyscalls + trace->syscalls.max + 1, 0,
1190 (id - trace->syscalls.max) * sizeof(*sc));
1191 } else {
1192 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1193 }
1194
1195 trace->syscalls.table = nsyscalls;
1196 trace->syscalls.max = id;
1197 }
1198
1199 sc = trace->syscalls.table + id;
3a531260 1200 sc->name = name;
2ae3a312 1201
3a531260 1202 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1203
aec1930b 1204 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1205 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1206
8dd2a131 1207 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1208 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1209 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1210 }
514f1c67 1211
8dd2a131 1212 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1213 return -1;
1214
f208bd8d
ACM
1215 sc->args = sc->tp_format->format.fields;
1216 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1217 /*
1218 * We need to check and discard the first variable '__syscall_nr'
1219 * or 'nr' that mean the syscall number. It is needless here.
1220 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1221 */
1222 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1223 sc->args = sc->args->next;
1224 --sc->nr_args;
1225 }
1226
5089f20e
ACM
1227 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1228
13d4ff3e 1229 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1230}
1231
d0cc439b
ACM
1232static int trace__validate_ev_qualifier(struct trace *trace)
1233{
8b3ce757 1234 int err = 0, i;
d0cc439b
ACM
1235 struct str_node *pos;
1236
8b3ce757
ACM
1237 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1238 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1239 sizeof(trace->ev_qualifier_ids.entries[0]));
1240
1241 if (trace->ev_qualifier_ids.entries == NULL) {
1242 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1243 trace->output);
1244 err = -EINVAL;
1245 goto out;
1246 }
1247
1248 i = 0;
1249
d0cc439b
ACM
1250 strlist__for_each(pos, trace->ev_qualifier) {
1251 const char *sc = pos->s;
fd0db102 1252 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1253
8b3ce757 1254 if (id < 0) {
d0cc439b
ACM
1255 if (err == 0) {
1256 fputs("Error:\tInvalid syscall ", trace->output);
1257 err = -EINVAL;
1258 } else {
1259 fputs(", ", trace->output);
1260 }
1261
1262 fputs(sc, trace->output);
1263 }
8b3ce757
ACM
1264
1265 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1266 }
1267
1268 if (err < 0) {
1269 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1270 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1271 zfree(&trace->ev_qualifier_ids.entries);
1272 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1273 }
8b3ce757 1274out:
d0cc439b
ACM
1275 return err;
1276}
1277
55d43bca
DA
1278/*
1279 * args is to be interpreted as a series of longs but we need to handle
1280 * 8-byte unaligned accesses. args points to raw_data within the event
1281 * and raw_data is guaranteed to be 8-byte unaligned because it is
1282 * preceded by raw_size which is a u32. So we need to copy args to a temp
1283 * variable to read it. Most notably this avoids extended load instructions
1284 * on unaligned addresses
1285 */
1286
752fde44 1287static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1288 unsigned char *args, struct trace *trace,
75b757ca 1289 struct thread *thread)
514f1c67 1290{
514f1c67 1291 size_t printed = 0;
55d43bca
DA
1292 unsigned char *p;
1293 unsigned long val;
514f1c67 1294
f208bd8d 1295 if (sc->args != NULL) {
514f1c67 1296 struct format_field *field;
01533e97
ACM
1297 u8 bit = 1;
1298 struct syscall_arg arg = {
75b757ca
ACM
1299 .idx = 0,
1300 .mask = 0,
1301 .trace = trace,
1302 .thread = thread,
01533e97 1303 };
6e7eeb51 1304
f208bd8d 1305 for (field = sc->args; field;
01533e97
ACM
1306 field = field->next, ++arg.idx, bit <<= 1) {
1307 if (arg.mask & bit)
6e7eeb51 1308 continue;
55d43bca
DA
1309
1310 /* special care for unaligned accesses */
1311 p = args + sizeof(unsigned long) * arg.idx;
1312 memcpy(&val, p, sizeof(val));
1313
4aa58232
ACM
1314 /*
1315 * Suppress this argument if its value is zero and
1316 * and we don't have a string associated in an
1317 * strarray for it.
1318 */
55d43bca 1319 if (val == 0 &&
4aa58232
ACM
1320 !(sc->arg_scnprintf &&
1321 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1322 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1323 continue;
1324
752fde44 1325 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1326 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1327 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1328 arg.val = val;
1f115cb7
ACM
1329 if (sc->arg_parm)
1330 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1331 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1332 size - printed, &arg);
6e7eeb51 1333 } else {
13d4ff3e 1334 printed += scnprintf(bf + printed, size - printed,
55d43bca 1335 "%ld", val);
6e7eeb51 1336 }
514f1c67 1337 }
4c4d6e51
ACM
1338 } else if (IS_ERR(sc->tp_format)) {
1339 /*
1340 * If we managed to read the tracepoint /format file, then we
1341 * may end up not having any args, like with gettid(), so only
1342 * print the raw args when we didn't manage to read it.
1343 */
01533e97
ACM
1344 int i = 0;
1345
514f1c67 1346 while (i < 6) {
55d43bca
DA
1347 /* special care for unaligned accesses */
1348 p = args + sizeof(unsigned long) * i;
1349 memcpy(&val, p, sizeof(val));
752fde44
ACM
1350 printed += scnprintf(bf + printed, size - printed,
1351 "%sarg%d: %ld",
55d43bca 1352 printed ? ", " : "", i, val);
514f1c67
ACM
1353 ++i;
1354 }
1355 }
1356
1357 return printed;
1358}
1359
ba3d7dee 1360typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1361 union perf_event *event,
ba3d7dee
ACM
1362 struct perf_sample *sample);
1363
1364static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1365 struct perf_evsel *evsel, int id)
ba3d7dee 1366{
ba3d7dee
ACM
1367
1368 if (id < 0) {
adaa18bf
ACM
1369
1370 /*
1371 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1372 * before that, leaving at a higher verbosity level till that is
1373 * explained. Reproduced with plain ftrace with:
1374 *
1375 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1376 * grep "NR -1 " /t/trace_pipe
1377 *
1378 * After generating some load on the machine.
1379 */
1380 if (verbose > 1) {
1381 static u64 n;
1382 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1383 id, perf_evsel__name(evsel), ++n);
1384 }
ba3d7dee
ACM
1385 return NULL;
1386 }
1387
1388 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1389 trace__read_syscall_info(trace, id))
1390 goto out_cant_read;
1391
1392 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1393 goto out_cant_read;
1394
1395 return &trace->syscalls.table[id];
1396
1397out_cant_read:
7c304ee0
ACM
1398 if (verbose) {
1399 fprintf(trace->output, "Problems reading syscall %d", id);
1400 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1401 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1402 fputs(" information\n", trace->output);
1403 }
ba3d7dee
ACM
1404 return NULL;
1405}
1406
bf2575c1
DA
1407static void thread__update_stats(struct thread_trace *ttrace,
1408 int id, struct perf_sample *sample)
1409{
1410 struct int_node *inode;
1411 struct stats *stats;
1412 u64 duration = 0;
1413
1414 inode = intlist__findnew(ttrace->syscall_stats, id);
1415 if (inode == NULL)
1416 return;
1417
1418 stats = inode->priv;
1419 if (stats == NULL) {
1420 stats = malloc(sizeof(struct stats));
1421 if (stats == NULL)
1422 return;
1423 init_stats(stats);
1424 inode->priv = stats;
1425 }
1426
1427 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1428 duration = sample->time - ttrace->entry_time;
1429
1430 update_stats(stats, duration);
1431}
1432
e596663e
ACM
1433static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1434{
1435 struct thread_trace *ttrace;
1436 u64 duration;
1437 size_t printed;
1438
1439 if (trace->current == NULL)
1440 return 0;
1441
1442 ttrace = thread__priv(trace->current);
1443
1444 if (!ttrace->entry_pending)
1445 return 0;
1446
1447 duration = sample->time - ttrace->entry_time;
1448
1449 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1450 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1451 ttrace->entry_pending = false;
1452
1453 return printed;
1454}
1455
ba3d7dee 1456static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1457 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1458 struct perf_sample *sample)
1459{
752fde44 1460 char *msg;
ba3d7dee 1461 void *args;
752fde44 1462 size_t printed = 0;
2ae3a312 1463 struct thread *thread;
b91fc39f 1464 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1465 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1466 struct thread_trace *ttrace;
1467
1468 if (sc == NULL)
1469 return -1;
ba3d7dee 1470
8fb598e5 1471 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1472 ttrace = thread__trace(thread, trace->output);
2ae3a312 1473 if (ttrace == NULL)
b91fc39f 1474 goto out_put;
ba3d7dee 1475
77170988 1476 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1477
1478 if (ttrace->entry_str == NULL) {
e4d44e83 1479 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1480 if (!ttrace->entry_str)
b91fc39f 1481 goto out_put;
752fde44
ACM
1482 }
1483
5cf9c84e 1484 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1485 trace__printf_interrupted_entry(trace, sample);
e596663e 1486
752fde44
ACM
1487 ttrace->entry_time = sample->time;
1488 msg = ttrace->entry_str;
e4d44e83 1489 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1490
e4d44e83 1491 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1492 args, trace, thread);
752fde44 1493
5089f20e 1494 if (sc->is_exit) {
5cf9c84e 1495 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
c24ff998 1496 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
c008f78f 1497 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1498 }
7f4f8001 1499 } else {
752fde44 1500 ttrace->entry_pending = true;
7f4f8001
ACM
1501 /* See trace__vfs_getname & trace__sys_exit */
1502 ttrace->filename.pending_open = false;
1503 }
ba3d7dee 1504
f3b623b8
ACM
1505 if (trace->current != thread) {
1506 thread__put(trace->current);
1507 trace->current = thread__get(thread);
1508 }
b91fc39f
ACM
1509 err = 0;
1510out_put:
1511 thread__put(thread);
1512 return err;
ba3d7dee
ACM
1513}
1514
5cf9c84e
ACM
1515static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1516 struct perf_sample *sample,
1517 struct callchain_cursor *cursor)
202ff968
ACM
1518{
1519 struct addr_location al;
5cf9c84e
ACM
1520
1521 if (machine__resolve(trace->host, &al, sample) < 0 ||
1522 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1523 return -1;
1524
1525 return 0;
1526}
1527
1528static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1529{
202ff968 1530 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1531 const unsigned int print_opts = EVSEL__PRINT_SYM |
1532 EVSEL__PRINT_DSO |
1533 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1534
d327e60c 1535 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1536}
1537
ba3d7dee 1538static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1539 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1540 struct perf_sample *sample)
1541{
2c82c3ad 1542 long ret;
60c907ab 1543 u64 duration = 0;
2ae3a312 1544 struct thread *thread;
5cf9c84e 1545 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1546 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1547 struct thread_trace *ttrace;
1548
1549 if (sc == NULL)
1550 return -1;
ba3d7dee 1551
8fb598e5 1552 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1553 ttrace = thread__trace(thread, trace->output);
2ae3a312 1554 if (ttrace == NULL)
b91fc39f 1555 goto out_put;
ba3d7dee 1556
bf2575c1
DA
1557 if (trace->summary)
1558 thread__update_stats(ttrace, id, sample);
1559
77170988 1560 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1561
fd0db102 1562 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1563 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1564 ttrace->filename.pending_open = false;
c522739d
ACM
1565 ++trace->stats.vfs_getname;
1566 }
1567
752fde44
ACM
1568 ttrace->exit_time = sample->time;
1569
ae9ed035 1570 if (ttrace->entry_time) {
60c907ab 1571 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1572 if (trace__filter_duration(trace, duration))
1573 goto out;
1574 } else if (trace->duration_filter)
1575 goto out;
60c907ab 1576
5cf9c84e
ACM
1577 if (sample->callchain) {
1578 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1579 if (callchain_ret == 0) {
1580 if (callchain_cursor.nr < trace->min_stack)
1581 goto out;
1582 callchain_ret = 1;
1583 }
1584 }
1585
fd2eabaf
DA
1586 if (trace->summary_only)
1587 goto out;
1588
c24ff998 1589 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
752fde44
ACM
1590
1591 if (ttrace->entry_pending) {
c24ff998 1592 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1593 } else {
c24ff998
ACM
1594 fprintf(trace->output, " ... [");
1595 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1596 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1597 }
1598
da3c9a44
ACM
1599 if (sc->fmt == NULL) {
1600signed_print:
2c82c3ad 1601 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1602 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1603 char bf[STRERR_BUFSIZE];
ba3d7dee
ACM
1604 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1605 *e = audit_errno_to_name(-ret);
1606
c24ff998 1607 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1608 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1609 fprintf(trace->output, ") = 0 Timeout");
04b34729 1610 else if (sc->fmt->hexret)
2c82c3ad 1611 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1612 else if (sc->fmt->errpid) {
1613 struct thread *child = machine__find_thread(trace->host, ret, ret);
1614
1615 if (child != NULL) {
1616 fprintf(trace->output, ") = %ld", ret);
1617 if (child->comm_set)
1618 fprintf(trace->output, " (%s)", thread__comm_str(child));
1619 thread__put(child);
1620 }
1621 } else
da3c9a44 1622 goto signed_print;
ba3d7dee 1623
c24ff998 1624 fputc('\n', trace->output);
566a0885 1625
5cf9c84e
ACM
1626 if (callchain_ret > 0)
1627 trace__fprintf_callchain(trace, sample);
1628 else if (callchain_ret < 0)
1629 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1630out:
752fde44 1631 ttrace->entry_pending = false;
b91fc39f
ACM
1632 err = 0;
1633out_put:
1634 thread__put(thread);
1635 return err;
ba3d7dee
ACM
1636}
1637
c522739d 1638static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1639 union perf_event *event __maybe_unused,
c522739d
ACM
1640 struct perf_sample *sample)
1641{
f994592d
ACM
1642 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1643 struct thread_trace *ttrace;
1644 size_t filename_len, entry_str_len, to_move;
1645 ssize_t remaining_space;
1646 char *pos;
7f4f8001 1647 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1648
1649 if (!thread)
1650 goto out;
1651
1652 ttrace = thread__priv(thread);
1653 if (!ttrace)
1654 goto out;
1655
7f4f8001
ACM
1656 filename_len = strlen(filename);
1657
1658 if (ttrace->filename.namelen < filename_len) {
1659 char *f = realloc(ttrace->filename.name, filename_len + 1);
1660
1661 if (f == NULL)
1662 goto out;
1663
1664 ttrace->filename.namelen = filename_len;
1665 ttrace->filename.name = f;
1666 }
1667
1668 strcpy(ttrace->filename.name, filename);
1669 ttrace->filename.pending_open = true;
1670
f994592d
ACM
1671 if (!ttrace->filename.ptr)
1672 goto out;
1673
1674 entry_str_len = strlen(ttrace->entry_str);
1675 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1676 if (remaining_space <= 0)
1677 goto out;
1678
f994592d
ACM
1679 if (filename_len > (size_t)remaining_space) {
1680 filename += filename_len - remaining_space;
1681 filename_len = remaining_space;
1682 }
1683
1684 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1685 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1686 memmove(pos + filename_len, pos, to_move);
1687 memcpy(pos, filename, filename_len);
1688
1689 ttrace->filename.ptr = 0;
1690 ttrace->filename.entry_str_pos = 0;
1691out:
c522739d
ACM
1692 return 0;
1693}
1694
1302d88e 1695static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1696 union perf_event *event __maybe_unused,
1302d88e
ACM
1697 struct perf_sample *sample)
1698{
1699 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1700 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1701 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1702 sample->pid,
1703 sample->tid);
c24ff998 1704 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1705
1706 if (ttrace == NULL)
1707 goto out_dump;
1708
1709 ttrace->runtime_ms += runtime_ms;
1710 trace->runtime_ms += runtime_ms;
b91fc39f 1711 thread__put(thread);
1302d88e
ACM
1712 return 0;
1713
1714out_dump:
c24ff998 1715 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1716 evsel->name,
1717 perf_evsel__strval(evsel, sample, "comm"),
1718 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1719 runtime,
1720 perf_evsel__intval(evsel, sample, "vruntime"));
b91fc39f 1721 thread__put(thread);
1302d88e
ACM
1722 return 0;
1723}
1724
1d6c9407
WN
1725static void bpf_output__printer(enum binary_printer_ops op,
1726 unsigned int val, void *extra)
1727{
1728 FILE *output = extra;
1729 unsigned char ch = (unsigned char)val;
1730
1731 switch (op) {
1732 case BINARY_PRINT_CHAR_DATA:
1733 fprintf(output, "%c", isprint(ch) ? ch : '.');
1734 break;
1735 case BINARY_PRINT_DATA_BEGIN:
1736 case BINARY_PRINT_LINE_BEGIN:
1737 case BINARY_PRINT_ADDR:
1738 case BINARY_PRINT_NUM_DATA:
1739 case BINARY_PRINT_NUM_PAD:
1740 case BINARY_PRINT_SEP:
1741 case BINARY_PRINT_CHAR_PAD:
1742 case BINARY_PRINT_LINE_END:
1743 case BINARY_PRINT_DATA_END:
1744 default:
1745 break;
1746 }
1747}
1748
1749static void bpf_output__fprintf(struct trace *trace,
1750 struct perf_sample *sample)
1751{
1752 print_binary(sample->raw_data, sample->raw_size, 8,
1753 bpf_output__printer, trace->output);
1754}
1755
14a052df
ACM
1756static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1757 union perf_event *event __maybe_unused,
1758 struct perf_sample *sample)
1759{
7ad35615
ACM
1760 int callchain_ret = 0;
1761
1762 if (sample->callchain) {
1763 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1764 if (callchain_ret == 0) {
1765 if (callchain_cursor.nr < trace->min_stack)
1766 goto out;
1767 callchain_ret = 1;
1768 }
1769 }
1770
14a052df
ACM
1771 trace__printf_interrupted_entry(trace, sample);
1772 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1773
1774 if (trace->trace_syscalls)
1775 fprintf(trace->output, "( ): ");
1776
1777 fprintf(trace->output, "%s:", evsel->name);
14a052df 1778
1d6c9407
WN
1779 if (perf_evsel__is_bpf_output(evsel)) {
1780 bpf_output__fprintf(trace, sample);
1781 } else if (evsel->tp_format) {
14a052df
ACM
1782 event_format__fprintf(evsel->tp_format, sample->cpu,
1783 sample->raw_data, sample->raw_size,
1784 trace->output);
1785 }
1786
1787 fprintf(trace->output, ")\n");
202ff968 1788
7ad35615
ACM
1789 if (callchain_ret > 0)
1790 trace__fprintf_callchain(trace, sample);
1791 else if (callchain_ret < 0)
1792 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1793out:
14a052df
ACM
1794 return 0;
1795}
1796
598d02c5
SF
1797static void print_location(FILE *f, struct perf_sample *sample,
1798 struct addr_location *al,
1799 bool print_dso, bool print_sym)
1800{
1801
1802 if ((verbose || print_dso) && al->map)
1803 fprintf(f, "%s@", al->map->dso->long_name);
1804
1805 if ((verbose || print_sym) && al->sym)
4414a3c5 1806 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1807 al->addr - al->sym->start);
1808 else if (al->map)
4414a3c5 1809 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1810 else
4414a3c5 1811 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1812}
1813
1814static int trace__pgfault(struct trace *trace,
1815 struct perf_evsel *evsel,
473398a2 1816 union perf_event *event __maybe_unused,
598d02c5
SF
1817 struct perf_sample *sample)
1818{
1819 struct thread *thread;
598d02c5
SF
1820 struct addr_location al;
1821 char map_type = 'd';
a2ea67d7 1822 struct thread_trace *ttrace;
b91fc39f 1823 int err = -1;
1df54290 1824 int callchain_ret = 0;
598d02c5
SF
1825
1826 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1827
1828 if (sample->callchain) {
1829 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1830 if (callchain_ret == 0) {
1831 if (callchain_cursor.nr < trace->min_stack)
1832 goto out_put;
1833 callchain_ret = 1;
1834 }
1835 }
1836
a2ea67d7
SF
1837 ttrace = thread__trace(thread, trace->output);
1838 if (ttrace == NULL)
b91fc39f 1839 goto out_put;
a2ea67d7
SF
1840
1841 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1842 ttrace->pfmaj++;
1843 else
1844 ttrace->pfmin++;
1845
1846 if (trace->summary_only)
b91fc39f 1847 goto out;
598d02c5 1848
473398a2 1849 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1850 sample->ip, &al);
1851
1852 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1853
1854 fprintf(trace->output, "%sfault [",
1855 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1856 "maj" : "min");
1857
1858 print_location(trace->output, sample, &al, false, true);
1859
1860 fprintf(trace->output, "] => ");
1861
473398a2 1862 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1863 sample->addr, &al);
1864
1865 if (!al.map) {
473398a2 1866 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1867 MAP__FUNCTION, sample->addr, &al);
1868
1869 if (al.map)
1870 map_type = 'x';
1871 else
1872 map_type = '?';
1873 }
1874
1875 print_location(trace->output, sample, &al, true, false);
1876
1877 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1878
1df54290
ACM
1879 if (callchain_ret > 0)
1880 trace__fprintf_callchain(trace, sample);
1881 else if (callchain_ret < 0)
1882 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1883out:
1884 err = 0;
1885out_put:
1886 thread__put(thread);
1887 return err;
598d02c5
SF
1888}
1889
bdc89661
DA
1890static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1891{
1892 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1893 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1894 return false;
1895
1896 if (trace->pid_list || trace->tid_list)
1897 return true;
1898
1899 return false;
1900}
1901
e6001980 1902static void trace__set_base_time(struct trace *trace,
8a07a809 1903 struct perf_evsel *evsel,
e6001980
ACM
1904 struct perf_sample *sample)
1905{
8a07a809
ACM
1906 /*
1907 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1908 * and don't use sample->time unconditionally, we may end up having
1909 * some other event in the future without PERF_SAMPLE_TIME for good
1910 * reason, i.e. we may not be interested in its timestamps, just in
1911 * it taking place, picking some piece of information when it
1912 * appears in our event stream (vfs_getname comes to mind).
1913 */
1914 if (trace->base_time == 0 && !trace->full_time &&
1915 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1916 trace->base_time = sample->time;
1917}
1918
6810fc91 1919static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1920 union perf_event *event,
6810fc91
DA
1921 struct perf_sample *sample,
1922 struct perf_evsel *evsel,
1923 struct machine *machine __maybe_unused)
1924{
1925 struct trace *trace = container_of(tool, struct trace, tool);
1926 int err = 0;
1927
744a9719 1928 tracepoint_handler handler = evsel->handler;
6810fc91 1929
bdc89661
DA
1930 if (skip_sample(trace, sample))
1931 return 0;
1932
e6001980 1933 trace__set_base_time(trace, evsel, sample);
6810fc91 1934
3160565f
DA
1935 if (handler) {
1936 ++trace->nr_events;
0c82adcf 1937 handler(trace, evsel, event, sample);
3160565f 1938 }
6810fc91
DA
1939
1940 return err;
1941}
1942
bdc89661
DA
1943static int parse_target_str(struct trace *trace)
1944{
1945 if (trace->opts.target.pid) {
1946 trace->pid_list = intlist__new(trace->opts.target.pid);
1947 if (trace->pid_list == NULL) {
1948 pr_err("Error parsing process id string\n");
1949 return -EINVAL;
1950 }
1951 }
1952
1953 if (trace->opts.target.tid) {
1954 trace->tid_list = intlist__new(trace->opts.target.tid);
1955 if (trace->tid_list == NULL) {
1956 pr_err("Error parsing thread id string\n");
1957 return -EINVAL;
1958 }
1959 }
1960
1961 return 0;
1962}
1963
1e28fe0a 1964static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1965{
1966 unsigned int rec_argc, i, j;
1967 const char **rec_argv;
1968 const char * const record_args[] = {
1969 "record",
1970 "-R",
1971 "-m", "1024",
1972 "-c", "1",
5e2485b1
DA
1973 };
1974
1e28fe0a
SF
1975 const char * const sc_args[] = { "-e", };
1976 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1977 const char * const majpf_args[] = { "-e", "major-faults" };
1978 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1979 const char * const minpf_args[] = { "-e", "minor-faults" };
1980 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1981
9aca7f17 1982 /* +1 is for the event string below */
1e28fe0a
SF
1983 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1984 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1985 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1986
1987 if (rec_argv == NULL)
1988 return -ENOMEM;
1989
1e28fe0a 1990 j = 0;
5e2485b1 1991 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1992 rec_argv[j++] = record_args[i];
1993
e281a960
SF
1994 if (trace->trace_syscalls) {
1995 for (i = 0; i < sc_args_nr; i++)
1996 rec_argv[j++] = sc_args[i];
1997
1998 /* event string may be different for older kernels - e.g., RHEL6 */
1999 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2000 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2001 else if (is_valid_tracepoint("syscalls:sys_enter"))
2002 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2003 else {
2004 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2005 return -1;
2006 }
9aca7f17 2007 }
9aca7f17 2008
1e28fe0a
SF
2009 if (trace->trace_pgfaults & TRACE_PFMAJ)
2010 for (i = 0; i < majpf_args_nr; i++)
2011 rec_argv[j++] = majpf_args[i];
2012
2013 if (trace->trace_pgfaults & TRACE_PFMIN)
2014 for (i = 0; i < minpf_args_nr; i++)
2015 rec_argv[j++] = minpf_args[i];
2016
2017 for (i = 0; i < (unsigned int)argc; i++)
2018 rec_argv[j++] = argv[i];
5e2485b1 2019
1e28fe0a 2020 return cmd_record(j, rec_argv, NULL);
5e2485b1
DA
2021}
2022
bf2575c1
DA
2023static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2024
08c98776 2025static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2026{
ef503831 2027 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2028
2029 if (IS_ERR(evsel))
08c98776 2030 return false;
c522739d
ACM
2031
2032 if (perf_evsel__field(evsel, "pathname") == NULL) {
2033 perf_evsel__delete(evsel);
08c98776 2034 return false;
c522739d
ACM
2035 }
2036
744a9719 2037 evsel->handler = trace__vfs_getname;
c522739d 2038 perf_evlist__add(evlist, evsel);
08c98776 2039 return true;
c522739d
ACM
2040}
2041
0ae537cb 2042static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2043{
2044 struct perf_evsel *evsel;
2045 struct perf_event_attr attr = {
2046 .type = PERF_TYPE_SOFTWARE,
2047 .mmap_data = 1,
598d02c5
SF
2048 };
2049
2050 attr.config = config;
0524798c 2051 attr.sample_period = 1;
598d02c5
SF
2052
2053 event_attr_init(&attr);
2054
2055 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2056 if (evsel)
2057 evsel->handler = trace__pgfault;
598d02c5 2058
0ae537cb 2059 return evsel;
598d02c5
SF
2060}
2061
ddbb1b13
ACM
2062static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2063{
2064 const u32 type = event->header.type;
2065 struct perf_evsel *evsel;
2066
ddbb1b13
ACM
2067 if (type != PERF_RECORD_SAMPLE) {
2068 trace__process_event(trace, trace->host, event, sample);
2069 return;
2070 }
2071
2072 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2073 if (evsel == NULL) {
2074 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2075 return;
2076 }
2077
e6001980
ACM
2078 trace__set_base_time(trace, evsel, sample);
2079
ddbb1b13
ACM
2080 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2081 sample->raw_data == NULL) {
2082 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2083 perf_evsel__name(evsel), sample->tid,
2084 sample->cpu, sample->raw_size);
2085 } else {
2086 tracepoint_handler handler = evsel->handler;
2087 handler(trace, evsel, event, sample);
2088 }
2089}
2090
c27366f0
ACM
2091static int trace__add_syscall_newtp(struct trace *trace)
2092{
2093 int ret = -1;
2094 struct perf_evlist *evlist = trace->evlist;
2095 struct perf_evsel *sys_enter, *sys_exit;
2096
2097 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2098 if (sys_enter == NULL)
2099 goto out;
2100
2101 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2102 goto out_delete_sys_enter;
2103
2104 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2105 if (sys_exit == NULL)
2106 goto out_delete_sys_enter;
2107
2108 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2109 goto out_delete_sys_exit;
2110
2111 perf_evlist__add(evlist, sys_enter);
2112 perf_evlist__add(evlist, sys_exit);
2113
2ddd5c04 2114 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2115 /*
2116 * We're interested only in the user space callchain
2117 * leading to the syscall, allow overriding that for
2118 * debugging reasons using --kernel_syscall_callchains
2119 */
2120 sys_exit->attr.exclude_callchain_kernel = 1;
2121 }
2122
8b3ce757
ACM
2123 trace->syscalls.events.sys_enter = sys_enter;
2124 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2125
2126 ret = 0;
2127out:
2128 return ret;
2129
2130out_delete_sys_exit:
2131 perf_evsel__delete_priv(sys_exit);
2132out_delete_sys_enter:
2133 perf_evsel__delete_priv(sys_enter);
2134 goto out;
2135}
2136
19867b61
ACM
2137static int trace__set_ev_qualifier_filter(struct trace *trace)
2138{
2139 int err = -1;
2140 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2141 trace->ev_qualifier_ids.nr,
2142 trace->ev_qualifier_ids.entries);
2143
2144 if (filter == NULL)
2145 goto out_enomem;
2146
2147 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2148 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2149
2150 free(filter);
2151out:
2152 return err;
2153out_enomem:
2154 errno = ENOMEM;
2155 goto out;
2156}
c27366f0 2157
f15eb531 2158static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2159{
14a052df 2160 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2161 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2162 int err = -1, i;
2163 unsigned long before;
f15eb531 2164 const bool forks = argc > 0;
46fb3c21 2165 bool draining = false;
514f1c67 2166
75b757ca
ACM
2167 trace->live = true;
2168
c27366f0 2169 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2170 goto out_error_raw_syscalls;
514f1c67 2171
e281a960 2172 if (trace->trace_syscalls)
08c98776 2173 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2174
0ae537cb
ACM
2175 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2176 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2177 if (pgfault_maj == NULL)
2178 goto out_error_mem;
2179 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2180 }
598d02c5 2181
0ae537cb
ACM
2182 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2183 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2184 if (pgfault_min == NULL)
2185 goto out_error_mem;
2186 perf_evlist__add(evlist, pgfault_min);
2187 }
598d02c5 2188
1302d88e 2189 if (trace->sched &&
2cc990ba
ACM
2190 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2191 trace__sched_stat_runtime))
2192 goto out_error_sched_stat_runtime;
1302d88e 2193
514f1c67
ACM
2194 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2195 if (err < 0) {
c24ff998 2196 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2197 goto out_delete_evlist;
2198 }
2199
752fde44
ACM
2200 err = trace__symbols_init(trace, evlist);
2201 if (err < 0) {
c24ff998 2202 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2203 goto out_delete_evlist;
752fde44
ACM
2204 }
2205
fde54b78
ACM
2206 perf_evlist__config(evlist, &trace->opts, NULL);
2207
0c3a6ef4
ACM
2208 if (callchain_param.enabled) {
2209 bool use_identifier = false;
2210
2211 if (trace->syscalls.events.sys_exit) {
2212 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2213 &trace->opts, &callchain_param);
2214 use_identifier = true;
2215 }
2216
2217 if (pgfault_maj) {
2218 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2219 use_identifier = true;
2220 }
2221
2222 if (pgfault_min) {
2223 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2224 use_identifier = true;
2225 }
2226
2227 if (use_identifier) {
2228 /*
2229 * Now we have evsels with different sample_ids, use
2230 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2231 * from a fixed position in each ring buffer record.
2232 *
2233 * As of this the changeset introducing this comment, this
2234 * isn't strictly needed, as the fields that can come before
2235 * PERF_SAMPLE_ID are all used, but we'll probably disable
2236 * some of those for things like copying the payload of
2237 * pointer syscall arguments, and for vfs_getname we don't
2238 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2239 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2240 */
2241 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2242 perf_evlist__reset_sample_bit(evlist, ID);
2243 }
fde54b78 2244 }
514f1c67 2245
f15eb531
NK
2246 signal(SIGCHLD, sig_handler);
2247 signal(SIGINT, sig_handler);
2248
2249 if (forks) {
6ef73ec4 2250 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2251 argv, false, NULL);
f15eb531 2252 if (err < 0) {
c24ff998 2253 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2254 goto out_delete_evlist;
f15eb531
NK
2255 }
2256 }
2257
514f1c67 2258 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2259 if (err < 0)
2260 goto out_error_open;
514f1c67 2261
ba504235
WN
2262 err = bpf__apply_obj_config();
2263 if (err) {
2264 char errbuf[BUFSIZ];
2265
2266 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2267 pr_err("ERROR: Apply config to BPF failed: %s\n",
2268 errbuf);
2269 goto out_error_open;
2270 }
2271
241b057c
ACM
2272 /*
2273 * Better not use !target__has_task() here because we need to cover the
2274 * case where no threads were specified in the command line, but a
2275 * workload was, and in that case we will fill in the thread_map when
2276 * we fork the workload in perf_evlist__prepare_workload.
2277 */
f078c385
ACM
2278 if (trace->filter_pids.nr > 0)
2279 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2280 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2281 err = perf_evlist__set_filter_pid(evlist, getpid());
2282
94ad89bc
ACM
2283 if (err < 0)
2284 goto out_error_mem;
2285
19867b61
ACM
2286 if (trace->ev_qualifier_ids.nr > 0) {
2287 err = trace__set_ev_qualifier_filter(trace);
2288 if (err < 0)
2289 goto out_errno;
19867b61 2290
2e5e5f87
ACM
2291 pr_debug("event qualifier tracepoint filter: %s\n",
2292 trace->syscalls.events.sys_exit->filter);
2293 }
19867b61 2294
94ad89bc
ACM
2295 err = perf_evlist__apply_filters(evlist, &evsel);
2296 if (err < 0)
2297 goto out_error_apply_filters;
241b057c 2298
f885037e 2299 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2300 if (err < 0)
2301 goto out_error_mmap;
514f1c67 2302
cb24d01d
ACM
2303 if (!target__none(&trace->opts.target))
2304 perf_evlist__enable(evlist);
2305
f15eb531
NK
2306 if (forks)
2307 perf_evlist__start_workload(evlist);
2308
e13798c7 2309 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2310 evlist->threads->nr > 1 ||
2311 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2312again:
efd5745e 2313 before = trace->nr_events;
514f1c67
ACM
2314
2315 for (i = 0; i < evlist->nr_mmaps; i++) {
2316 union perf_event *event;
2317
2318 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2319 struct perf_sample sample;
514f1c67 2320
efd5745e 2321 ++trace->nr_events;
514f1c67 2322
514f1c67
ACM
2323 err = perf_evlist__parse_sample(evlist, event, &sample);
2324 if (err) {
c24ff998 2325 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2326 goto next_event;
514f1c67
ACM
2327 }
2328
ddbb1b13 2329 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2330next_event:
2331 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2332
ba209f85
ACM
2333 if (interrupted)
2334 goto out_disable;
02ac5421
ACM
2335
2336 if (done && !draining) {
2337 perf_evlist__disable(evlist);
2338 draining = true;
2339 }
514f1c67
ACM
2340 }
2341 }
2342
efd5745e 2343 if (trace->nr_events == before) {
ba209f85 2344 int timeout = done ? 100 : -1;
f15eb531 2345
46fb3c21
ACM
2346 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2347 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2348 draining = true;
2349
ba209f85 2350 goto again;
46fb3c21 2351 }
ba209f85
ACM
2352 } else {
2353 goto again;
f15eb531
NK
2354 }
2355
ba209f85 2356out_disable:
f3b623b8
ACM
2357 thread__zput(trace->current);
2358
ba209f85 2359 perf_evlist__disable(evlist);
514f1c67 2360
c522739d
ACM
2361 if (!err) {
2362 if (trace->summary)
2363 trace__fprintf_thread_summary(trace, trace->output);
2364
2365 if (trace->show_tool_stats) {
2366 fprintf(trace->output, "Stats:\n "
2367 " vfs_getname : %" PRIu64 "\n"
2368 " proc_getname: %" PRIu64 "\n",
2369 trace->stats.vfs_getname,
2370 trace->stats.proc_getname);
2371 }
2372 }
bf2575c1 2373
514f1c67
ACM
2374out_delete_evlist:
2375 perf_evlist__delete(evlist);
14a052df 2376 trace->evlist = NULL;
75b757ca 2377 trace->live = false;
514f1c67 2378 return err;
6ef068cb
ACM
2379{
2380 char errbuf[BUFSIZ];
a8f23d8f 2381
2cc990ba 2382out_error_sched_stat_runtime:
988bdb31 2383 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2384 goto out_error;
2385
801c67b0 2386out_error_raw_syscalls:
988bdb31 2387 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2388 goto out_error;
2389
e09b18d4
ACM
2390out_error_mmap:
2391 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2392 goto out_error;
2393
a8f23d8f
ACM
2394out_error_open:
2395 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2396
2397out_error:
6ef068cb 2398 fprintf(trace->output, "%s\n", errbuf);
87f91868 2399 goto out_delete_evlist;
94ad89bc
ACM
2400
2401out_error_apply_filters:
2402 fprintf(trace->output,
2403 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2404 evsel->filter, perf_evsel__name(evsel), errno,
2405 strerror_r(errno, errbuf, sizeof(errbuf)));
2406 goto out_delete_evlist;
514f1c67 2407}
5ed08dae
ACM
2408out_error_mem:
2409 fprintf(trace->output, "Not enough memory to run!\n");
2410 goto out_delete_evlist;
19867b61
ACM
2411
2412out_errno:
2413 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2414 goto out_delete_evlist;
a8f23d8f 2415}
514f1c67 2416
6810fc91
DA
2417static int trace__replay(struct trace *trace)
2418{
2419 const struct perf_evsel_str_handler handlers[] = {
c522739d 2420 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2421 };
f5fc1412
JO
2422 struct perf_data_file file = {
2423 .path = input_name,
2424 .mode = PERF_DATA_MODE_READ,
e366a6d8 2425 .force = trace->force,
f5fc1412 2426 };
6810fc91 2427 struct perf_session *session;
003824e8 2428 struct perf_evsel *evsel;
6810fc91
DA
2429 int err = -1;
2430
2431 trace->tool.sample = trace__process_sample;
2432 trace->tool.mmap = perf_event__process_mmap;
384c671e 2433 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2434 trace->tool.comm = perf_event__process_comm;
2435 trace->tool.exit = perf_event__process_exit;
2436 trace->tool.fork = perf_event__process_fork;
2437 trace->tool.attr = perf_event__process_attr;
2438 trace->tool.tracing_data = perf_event__process_tracing_data;
2439 trace->tool.build_id = perf_event__process_build_id;
2440
0a8cb85c 2441 trace->tool.ordered_events = true;
6810fc91
DA
2442 trace->tool.ordering_requires_timestamps = true;
2443
2444 /* add tid to output */
2445 trace->multiple_threads = true;
2446
f5fc1412 2447 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2448 if (session == NULL)
52e02834 2449 return -1;
6810fc91 2450
0a7e6d1b 2451 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2452 goto out;
2453
8fb598e5
DA
2454 trace->host = &session->machines.host;
2455
6810fc91
DA
2456 err = perf_session__set_tracepoints_handlers(session, handlers);
2457 if (err)
2458 goto out;
2459
003824e8
NK
2460 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2461 "raw_syscalls:sys_enter");
9aca7f17
DA
2462 /* older kernels have syscalls tp versus raw_syscalls */
2463 if (evsel == NULL)
2464 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2465 "syscalls:sys_enter");
003824e8 2466
e281a960
SF
2467 if (evsel &&
2468 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2469 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2470 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2471 goto out;
2472 }
2473
2474 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2475 "raw_syscalls:sys_exit");
9aca7f17
DA
2476 if (evsel == NULL)
2477 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2478 "syscalls:sys_exit");
e281a960
SF
2479 if (evsel &&
2480 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2481 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2482 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2483 goto out;
2484 }
2485
1e28fe0a
SF
2486 evlist__for_each(session->evlist, evsel) {
2487 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2488 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2489 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2490 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2491 evsel->handler = trace__pgfault;
2492 }
2493
bdc89661
DA
2494 err = parse_target_str(trace);
2495 if (err != 0)
2496 goto out;
2497
6810fc91
DA
2498 setup_pager();
2499
b7b61cbe 2500 err = perf_session__process_events(session);
6810fc91
DA
2501 if (err)
2502 pr_err("Failed to process events, error %d", err);
2503
bf2575c1
DA
2504 else if (trace->summary)
2505 trace__fprintf_thread_summary(trace, trace->output);
2506
6810fc91
DA
2507out:
2508 perf_session__delete(session);
2509
2510 return err;
2511}
2512
1302d88e
ACM
2513static size_t trace__fprintf_threads_header(FILE *fp)
2514{
2515 size_t printed;
2516
99ff7150 2517 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2518
2519 return printed;
2520}
2521
b535d523
ACM
2522DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2523 struct stats *stats;
2524 double msecs;
2525 int syscall;
2526)
2527{
2528 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2529 struct stats *stats = source->priv;
2530
2531 entry->syscall = source->i;
2532 entry->stats = stats;
2533 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2534}
2535
bf2575c1
DA
2536static size_t thread__dump_stats(struct thread_trace *ttrace,
2537 struct trace *trace, FILE *fp)
2538{
bf2575c1
DA
2539 size_t printed = 0;
2540 struct syscall *sc;
b535d523
ACM
2541 struct rb_node *nd;
2542 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2543
b535d523 2544 if (syscall_stats == NULL)
bf2575c1
DA
2545 return 0;
2546
2547 printed += fprintf(fp, "\n");
2548
834fd46d
MW
2549 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2550 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2551 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2552
b535d523
ACM
2553 resort_rb__for_each(nd, syscall_stats) {
2554 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2555 if (stats) {
2556 double min = (double)(stats->min) / NSEC_PER_MSEC;
2557 double max = (double)(stats->max) / NSEC_PER_MSEC;
2558 double avg = avg_stats(stats);
2559 double pct;
2560 u64 n = (u64) stats->n;
2561
2562 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2563 avg /= NSEC_PER_MSEC;
2564
b535d523 2565 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2566 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2567 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2568 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2569 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2570 }
bf2575c1
DA
2571 }
2572
b535d523 2573 resort_rb__delete(syscall_stats);
bf2575c1 2574 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2575
2576 return printed;
2577}
2578
96c14451 2579static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2580{
96c14451 2581 size_t printed = 0;
89dceb22 2582 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2583 double ratio;
2584
2585 if (ttrace == NULL)
2586 return 0;
2587
2588 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2589
15e65c69 2590 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2591 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2592 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2593 if (ttrace->pfmaj)
2594 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2595 if (ttrace->pfmin)
2596 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2597 if (trace->sched)
2598 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2599 else if (fputc('\n', fp) != EOF)
2600 ++printed;
2601
bf2575c1 2602 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2603
96c14451
ACM
2604 return printed;
2605}
896cbb56 2606
96c14451
ACM
2607static unsigned long thread__nr_events(struct thread_trace *ttrace)
2608{
2609 return ttrace ? ttrace->nr_events : 0;
2610}
2611
2612DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2613 struct thread *thread;
2614)
2615{
2616 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2617}
2618
1302d88e
ACM
2619static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2620{
96c14451
ACM
2621 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2622 size_t printed = trace__fprintf_threads_header(fp);
2623 struct rb_node *nd;
1302d88e 2624
96c14451
ACM
2625 if (threads == NULL) {
2626 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2627 return 0;
2628 }
2629
2630 resort_rb__for_each(nd, threads)
2631 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2632
96c14451
ACM
2633 resort_rb__delete(threads);
2634
2635 return printed;
1302d88e
ACM
2636}
2637
ae9ed035
ACM
2638static int trace__set_duration(const struct option *opt, const char *str,
2639 int unset __maybe_unused)
2640{
2641 struct trace *trace = opt->value;
2642
2643 trace->duration_filter = atof(str);
2644 return 0;
2645}
2646
f078c385
ACM
2647static int trace__set_filter_pids(const struct option *opt, const char *str,
2648 int unset __maybe_unused)
2649{
2650 int ret = -1;
2651 size_t i;
2652 struct trace *trace = opt->value;
2653 /*
2654 * FIXME: introduce a intarray class, plain parse csv and create a
2655 * { int nr, int entries[] } struct...
2656 */
2657 struct intlist *list = intlist__new(str);
2658
2659 if (list == NULL)
2660 return -1;
2661
2662 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2663 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2664
2665 if (trace->filter_pids.entries == NULL)
2666 goto out;
2667
2668 trace->filter_pids.entries[0] = getpid();
2669
2670 for (i = 1; i < trace->filter_pids.nr; ++i)
2671 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2672
2673 intlist__delete(list);
2674 ret = 0;
2675out:
2676 return ret;
2677}
2678
c24ff998
ACM
2679static int trace__open_output(struct trace *trace, const char *filename)
2680{
2681 struct stat st;
2682
2683 if (!stat(filename, &st) && st.st_size) {
2684 char oldname[PATH_MAX];
2685
2686 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2687 unlink(oldname);
2688 rename(filename, oldname);
2689 }
2690
2691 trace->output = fopen(filename, "w");
2692
2693 return trace->output == NULL ? -errno : 0;
2694}
2695
598d02c5
SF
2696static int parse_pagefaults(const struct option *opt, const char *str,
2697 int unset __maybe_unused)
2698{
2699 int *trace_pgfaults = opt->value;
2700
2701 if (strcmp(str, "all") == 0)
2702 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2703 else if (strcmp(str, "maj") == 0)
2704 *trace_pgfaults |= TRACE_PFMAJ;
2705 else if (strcmp(str, "min") == 0)
2706 *trace_pgfaults |= TRACE_PFMIN;
2707 else
2708 return -1;
2709
2710 return 0;
2711}
2712
14a052df
ACM
2713static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2714{
2715 struct perf_evsel *evsel;
2716
2717 evlist__for_each(evlist, evsel)
2718 evsel->handler = handler;
2719}
2720
514f1c67
ACM
2721int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2722{
6fdd9cb7 2723 const char *trace_usage[] = {
f15eb531
NK
2724 "perf trace [<options>] [<command>]",
2725 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2726 "perf trace record [<options>] [<command>]",
2727 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2728 NULL
2729 };
2730 struct trace trace = {
514f1c67
ACM
2731 .syscalls = {
2732 . max = -1,
2733 },
2734 .opts = {
2735 .target = {
2736 .uid = UINT_MAX,
2737 .uses_mmap = true,
2738 },
2739 .user_freq = UINT_MAX,
2740 .user_interval = ULLONG_MAX,
509051ea 2741 .no_buffering = true,
38d5447d 2742 .mmap_pages = UINT_MAX,
9d9cad76 2743 .proc_map_timeout = 500,
514f1c67 2744 },
007d66a0 2745 .output = stderr,
50c95cbd 2746 .show_comm = true,
e281a960 2747 .trace_syscalls = true,
44621819 2748 .kernel_syscallchains = false,
05614993 2749 .max_stack = UINT_MAX,
514f1c67 2750 };
c24ff998 2751 const char *output_name = NULL;
2ae3a312 2752 const char *ev_qualifier_str = NULL;
514f1c67 2753 const struct option trace_options[] = {
14a052df
ACM
2754 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2755 "event selector. use 'perf list' to list available events",
2756 parse_events_option),
50c95cbd
ACM
2757 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2758 "show the thread COMM next to its id"),
c522739d 2759 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
d303e85a 2760 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
c24ff998 2761 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2762 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2763 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2764 "trace events on existing process id"),
ac9be8ee 2765 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2766 "trace events on existing thread id"),
fa0e4ffe
ACM
2767 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2768 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2769 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2770 "system-wide collection from all CPUs"),
ac9be8ee 2771 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2772 "list of cpus to monitor"),
6810fc91 2773 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2774 "child tasks do not inherit counters"),
994a1f78
JO
2775 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2776 "number of mmap data pages",
2777 perf_evlist__parse_mmap_pages),
ac9be8ee 2778 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2779 "user to profile"),
ae9ed035
ACM
2780 OPT_CALLBACK(0, "duration", &trace, "float",
2781 "show only events with duration > N.M ms",
2782 trace__set_duration),
1302d88e 2783 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2784 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2785 OPT_BOOLEAN('T', "time", &trace.full_time,
2786 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2787 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2788 "Show only syscall summary with statistics"),
2789 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2790 "Show all syscalls and summary with statistics"),
598d02c5
SF
2791 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2792 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2793 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2794 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2795 OPT_CALLBACK(0, "call-graph", &trace.opts,
2796 "record_mode[,record_size]", record_callchain_help,
2797 &record_parse_callchain_opt),
44621819
ACM
2798 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2799 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2800 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2801 "Set the minimum stack depth when parsing the callchain, "
2802 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2803 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2804 "Set the maximum stack depth when parsing the callchain, "
2805 "anything beyond the specified depth will be ignored. "
4cb93446 2806 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2807 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2808 "per thread proc mmap processing timeout in ms"),
514f1c67
ACM
2809 OPT_END()
2810 };
ccd62a89 2811 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2812 bool mmap_pages_user_set = true;
6fdd9cb7 2813 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2814 int err;
32caf0d1 2815 char bf[BUFSIZ];
514f1c67 2816
4d08cb80
ACM
2817 signal(SIGSEGV, sighandler_dump_stack);
2818 signal(SIGFPE, sighandler_dump_stack);
2819
14a052df 2820 trace.evlist = perf_evlist__new();
fd0db102 2821 trace.sctbl = syscalltbl__new();
14a052df 2822
fd0db102 2823 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2824 pr_err("Not enough memory to run!\n");
ff8f695c 2825 err = -ENOMEM;
14a052df
ACM
2826 goto out;
2827 }
2828
6fdd9cb7
YS
2829 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2830 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2831
d7888573
WN
2832 err = bpf__setup_stdout(trace.evlist);
2833 if (err) {
2834 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2835 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2836 goto out;
2837 }
2838
59247e33
ACM
2839 err = -1;
2840
598d02c5
SF
2841 if (trace.trace_pgfaults) {
2842 trace.opts.sample_address = true;
2843 trace.opts.sample_time = true;
2844 }
2845
f3e459d1
ACM
2846 if (trace.opts.mmap_pages == UINT_MAX)
2847 mmap_pages_user_set = false;
2848
05614993 2849 if (trace.max_stack == UINT_MAX) {
fe176085 2850 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2851 max_stack_user_set = false;
2852 }
2853
2854#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2855 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2856 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2857#endif
2858
2ddd5c04 2859 if (callchain_param.enabled) {
f3e459d1
ACM
2860 if (!mmap_pages_user_set && geteuid() == 0)
2861 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2862
566a0885 2863 symbol_conf.use_callchain = true;
f3e459d1 2864 }
566a0885 2865
14a052df
ACM
2866 if (trace.evlist->nr_entries > 0)
2867 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2868
1e28fe0a
SF
2869 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2870 return trace__record(&trace, argc-1, &argv[1]);
2871
2872 /* summary_only implies summary option, but don't overwrite summary if set */
2873 if (trace.summary_only)
2874 trace.summary = trace.summary_only;
2875
726f3234
ACM
2876 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2877 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2878 pr_err("Please specify something to trace.\n");
2879 return -1;
2880 }
2881
59247e33
ACM
2882 if (!trace.trace_syscalls && ev_qualifier_str) {
2883 pr_err("The -e option can't be used with --no-syscalls.\n");
2884 goto out;
2885 }
2886
c24ff998
ACM
2887 if (output_name != NULL) {
2888 err = trace__open_output(&trace, output_name);
2889 if (err < 0) {
2890 perror("failed to create output file");
2891 goto out;
2892 }
2893 }
2894
fd0db102
ACM
2895 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2896
2ae3a312 2897 if (ev_qualifier_str != NULL) {
b059efdf 2898 const char *s = ev_qualifier_str;
005438a8
ACM
2899 struct strlist_config slist_config = {
2900 .dirname = system_path(STRACE_GROUPS_DIR),
2901 };
b059efdf
ACM
2902
2903 trace.not_ev_qualifier = *s == '!';
2904 if (trace.not_ev_qualifier)
2905 ++s;
005438a8 2906 trace.ev_qualifier = strlist__new(s, &slist_config);
2ae3a312 2907 if (trace.ev_qualifier == NULL) {
c24ff998
ACM
2908 fputs("Not enough memory to parse event qualifier",
2909 trace.output);
2910 err = -ENOMEM;
2911 goto out_close;
2ae3a312 2912 }
d0cc439b
ACM
2913
2914 err = trace__validate_ev_qualifier(&trace);
2915 if (err)
2916 goto out_close;
2ae3a312
ACM
2917 }
2918
602ad878 2919 err = target__validate(&trace.opts.target);
32caf0d1 2920 if (err) {
602ad878 2921 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2922 fprintf(trace.output, "%s", bf);
2923 goto out_close;
32caf0d1
NK
2924 }
2925
602ad878 2926 err = target__parse_uid(&trace.opts.target);
514f1c67 2927 if (err) {
602ad878 2928 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2929 fprintf(trace.output, "%s", bf);
2930 goto out_close;
514f1c67
ACM
2931 }
2932
602ad878 2933 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
2934 trace.opts.target.system_wide = true;
2935
6810fc91
DA
2936 if (input_name)
2937 err = trace__replay(&trace);
2938 else
2939 err = trace__run(&trace, argc, argv);
1302d88e 2940
c24ff998
ACM
2941out_close:
2942 if (output_name != NULL)
2943 fclose(trace.output);
2944out:
1302d88e 2945 return err;
514f1c67 2946}