Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
[linux-2.6-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
092bd3cd 24#include "util/env.h"
5ab8c689 25#include "util/event.h"
514f1c67 26#include "util/evlist.h"
4b6ab94e 27#include <subcmd/exec-cmd.h>
752fde44 28#include "util/machine.h"
9a3993d4 29#include "util/path.h"
6810fc91 30#include "util/session.h"
752fde44 31#include "util/thread.h"
4b6ab94e 32#include <subcmd/parse-options.h>
2ae3a312 33#include "util/strlist.h"
bdc89661 34#include "util/intlist.h"
514f1c67 35#include "util/thread_map.h"
bf2575c1 36#include "util/stat.h"
fd5cead2 37#include "trace/beauty/beauty.h"
97978b3e 38#include "trace-event.h"
9aca7f17 39#include "util/parse-events.h"
ba504235 40#include "util/bpf-loader.h"
566a0885 41#include "callchain.h"
fea01392 42#include "print_binary.h"
a067558e 43#include "string2.h"
fd0db102 44#include "syscalltbl.h"
96c14451 45#include "rb_resort.h"
514f1c67 46
a43783ae 47#include <errno.h>
fd20e811 48#include <inttypes.h>
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c 54#include <linux/filter.h>
877a7a11 55#include <linux/kernel.h>
39878d49 56#include <linux/random.h>
c6d4a494 57#include <linux/stringify.h>
bd48c63e 58#include <linux/time64.h>
bafae98e 59#include <fcntl.h>
514f1c67 60
3d689ed6
ACM
61#include "sane_ctype.h"
62
c188e7ac
ACM
63#ifndef O_CLOEXEC
64# define O_CLOEXEC 02000000
65#endif
66
83a51694
ACM
67#ifndef F_LINUX_SPECIFIC_BASE
68# define F_LINUX_SPECIFIC_BASE 1024
69#endif
70
d1d438a3
ACM
71struct trace {
72 struct perf_tool tool;
fd0db102 73 struct syscalltbl *sctbl;
d1d438a3
ACM
74 struct {
75 int max;
76 struct syscall *table;
77 struct {
78 struct perf_evsel *sys_enter,
79 *sys_exit;
80 } events;
81 } syscalls;
82 struct record_opts opts;
83 struct perf_evlist *evlist;
84 struct machine *host;
85 struct thread *current;
86 u64 base_time;
87 FILE *output;
88 unsigned long nr_events;
89 struct strlist *ev_qualifier;
90 struct {
91 size_t nr;
92 int *entries;
93 } ev_qualifier_ids;
d1d438a3
ACM
94 struct {
95 size_t nr;
96 pid_t *entries;
97 } filter_pids;
98 double duration_filter;
99 double runtime_ms;
100 struct {
101 u64 vfs_getname,
102 proc_getname;
103 } stats;
c6d4a494 104 unsigned int max_stack;
5cf9c84e 105 unsigned int min_stack;
d1d438a3
ACM
106 bool not_ev_qualifier;
107 bool live;
108 bool full_time;
109 bool sched;
110 bool multiple_threads;
111 bool summary;
112 bool summary_only;
113 bool show_comm;
591421e1 114 bool print_sample;
d1d438a3
ACM
115 bool show_tool_stats;
116 bool trace_syscalls;
44621819 117 bool kernel_syscallchains;
d1d438a3
ACM
118 bool force;
119 bool vfs_getname;
120 int trace_pgfaults;
fd0db102 121 int open_id;
d1d438a3 122};
a1c2552d 123
77170988
ACM
124struct tp_field {
125 int offset;
126 union {
127 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
128 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
129 };
130};
131
132#define TP_UINT_FIELD(bits) \
133static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
134{ \
55d43bca
DA
135 u##bits value; \
136 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
137 return value; \
77170988
ACM
138}
139
140TP_UINT_FIELD(8);
141TP_UINT_FIELD(16);
142TP_UINT_FIELD(32);
143TP_UINT_FIELD(64);
144
145#define TP_UINT_FIELD__SWAPPED(bits) \
146static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
147{ \
55d43bca
DA
148 u##bits value; \
149 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
150 return bswap_##bits(value);\
151}
152
153TP_UINT_FIELD__SWAPPED(16);
154TP_UINT_FIELD__SWAPPED(32);
155TP_UINT_FIELD__SWAPPED(64);
156
157static int tp_field__init_uint(struct tp_field *field,
158 struct format_field *format_field,
159 bool needs_swap)
160{
161 field->offset = format_field->offset;
162
163 switch (format_field->size) {
164 case 1:
165 field->integer = tp_field__u8;
166 break;
167 case 2:
168 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
169 break;
170 case 4:
171 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
172 break;
173 case 8:
174 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
175 break;
176 default:
177 return -1;
178 }
179
180 return 0;
181}
182
183static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
184{
185 return sample->raw_data + field->offset;
186}
187
188static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
189{
190 field->offset = format_field->offset;
191 field->pointer = tp_field__ptr;
192 return 0;
193}
194
195struct syscall_tp {
196 struct tp_field id;
197 union {
198 struct tp_field args, ret;
199 };
200};
201
202static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
205{
206 struct format_field *format_field = perf_evsel__field(evsel, name);
207
208 if (format_field == NULL)
209 return -1;
210
211 return tp_field__init_uint(field, format_field, evsel->needs_swap);
212}
213
214#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
217
218static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
219 struct tp_field *field,
220 const char *name)
221{
222 struct format_field *format_field = perf_evsel__field(evsel, name);
223
224 if (format_field == NULL)
225 return -1;
226
227 return tp_field__init_ptr(field, format_field);
228}
229
230#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
231 ({ struct syscall_tp *sc = evsel->priv;\
232 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
233
234static void perf_evsel__delete_priv(struct perf_evsel *evsel)
235{
04662523 236 zfree(&evsel->priv);
77170988
ACM
237 perf_evsel__delete(evsel);
238}
239
96695d44
NK
240static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
241{
242 evsel->priv = malloc(sizeof(struct syscall_tp));
243 if (evsel->priv != NULL) {
244 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
245 goto out_delete;
246
247 evsel->handler = handler;
248 return 0;
249 }
250
251 return -ENOMEM;
252
253out_delete:
04662523 254 zfree(&evsel->priv);
96695d44
NK
255 return -ENOENT;
256}
257
ef503831 258static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 259{
ef503831 260 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 261
9aca7f17 262 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 263 if (IS_ERR(evsel))
9aca7f17
DA
264 evsel = perf_evsel__newtp("syscalls", direction);
265
8dd2a131
JO
266 if (IS_ERR(evsel))
267 return NULL;
268
269 if (perf_evsel__init_syscall_tp(evsel, handler))
270 goto out_delete;
77170988
ACM
271
272 return evsel;
273
274out_delete:
275 perf_evsel__delete_priv(evsel);
276 return NULL;
277}
278
279#define perf_evsel__sc_tp_uint(evsel, name, sample) \
280 ({ struct syscall_tp *fields = evsel->priv; \
281 fields->name.integer(&fields->name, sample); })
282
283#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
284 ({ struct syscall_tp *fields = evsel->priv; \
285 fields->name.pointer(&fields->name, sample); })
286
0ae79636
ACM
287size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
288{
289 int idx = val - sa->offset;
1f115cb7 290
0ae79636
ACM
291 if (idx < 0 || idx >= sa->nr_entries)
292 return scnprintf(bf, size, intfmt, val);
1f115cb7 293
0ae79636 294 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
295}
296
975b7c2f
ACM
297static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
1f115cb7 300{
0ae79636 301 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
302}
303
975b7c2f
ACM
304static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
305 struct syscall_arg *arg)
306{
307 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
308}
309
1f115cb7
ACM
310#define SCA_STRARRAY syscall_arg__scnprintf_strarray
311
83a51694
ACM
312struct strarrays {
313 int nr_entries;
314 struct strarray **entries;
315};
316
317#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
318 .nr_entries = ARRAY_SIZE(array), \
319 .entries = array, \
320}
321
274e86fd
ACM
322size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
323 struct syscall_arg *arg)
83a51694
ACM
324{
325 struct strarrays *sas = arg->parm;
326 int i;
327
328 for (i = 0; i < sas->nr_entries; ++i) {
329 struct strarray *sa = sas->entries[i];
330 int idx = arg->val - sa->offset;
331
332 if (idx >= 0 && idx < sa->nr_entries) {
333 if (sa->entries[idx] == NULL)
334 break;
335 return scnprintf(bf, size, "%s", sa->entries[idx]);
336 }
337 }
338
339 return scnprintf(bf, size, "%d", arg->val);
340}
341
48e1f91a
ACM
342#ifndef AT_FDCWD
343#define AT_FDCWD -100
344#endif
345
75b757ca
ACM
346static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
347 struct syscall_arg *arg)
348{
349 int fd = arg->val;
350
351 if (fd == AT_FDCWD)
352 return scnprintf(bf, size, "CWD");
353
354 return syscall_arg__scnprintf_fd(bf, size, arg);
355}
356
357#define SCA_FDAT syscall_arg__scnprintf_fd_at
358
359static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
360 struct syscall_arg *arg);
361
362#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
363
2c2b1623 364size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 365{
01533e97 366 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
367}
368
2c2b1623 369size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
370{
371 return scnprintf(bf, size, "%d", arg->val);
372}
373
5dde91ed
ACM
374size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
375{
376 return scnprintf(bf, size, "%ld", arg->val);
377}
378
729a7841
ACM
379static const char *bpf_cmd[] = {
380 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
381 "MAP_GET_NEXT_KEY", "PROG_LOAD",
382};
383static DEFINE_STRARRAY(bpf_cmd);
384
03e3adc9
ACM
385static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
386static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 387
1f115cb7
ACM
388static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
389static DEFINE_STRARRAY(itimers);
390
b62bee1b
ACM
391static const char *keyctl_options[] = {
392 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
393 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
394 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
395 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
396 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
397};
398static DEFINE_STRARRAY(keyctl_options);
399
efe6b882
ACM
400static const char *whences[] = { "SET", "CUR", "END",
401#ifdef SEEK_DATA
402"DATA",
403#endif
404#ifdef SEEK_HOLE
405"HOLE",
406#endif
407};
408static DEFINE_STRARRAY(whences);
f9da0b0c 409
80f587d5
ACM
410static const char *fcntl_cmds[] = {
411 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
412 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
413 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
414 "GETOWNER_UIDS",
80f587d5
ACM
415};
416static DEFINE_STRARRAY(fcntl_cmds);
417
83a51694
ACM
418static const char *fcntl_linux_specific_cmds[] = {
419 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
420 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 421 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
422};
423
424static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
425
426static struct strarray *fcntl_cmds_arrays[] = {
427 &strarray__fcntl_cmds,
428 &strarray__fcntl_linux_specific_cmds,
429};
430
431static DEFINE_STRARRAYS(fcntl_cmds_arrays);
432
c045bf02
ACM
433static const char *rlimit_resources[] = {
434 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
435 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
436 "RTTIME",
437};
438static DEFINE_STRARRAY(rlimit_resources);
439
eb5b1b14
ACM
440static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
441static DEFINE_STRARRAY(sighow);
442
4f8c1b74
DA
443static const char *clockid[] = {
444 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
445 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
446 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
447};
448static DEFINE_STRARRAY(clockid);
449
e10bce81
ACM
450static const char *socket_families[] = {
451 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
452 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
453 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
454 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
455 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
456 "ALG", "NFC", "VSOCK",
457};
458static DEFINE_STRARRAY(socket_families);
459
51108999
ACM
460static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
461 struct syscall_arg *arg)
462{
463 size_t printed = 0;
464 int mode = arg->val;
465
466 if (mode == F_OK) /* 0 */
467 return scnprintf(bf, size, "F");
468#define P_MODE(n) \
469 if (mode & n##_OK) { \
470 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
471 mode &= ~n##_OK; \
472 }
473
474 P_MODE(R);
475 P_MODE(W);
476 P_MODE(X);
477#undef P_MODE
478
479 if (mode)
480 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
481
482 return printed;
483}
484
485#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
486
f994592d
ACM
487static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
488 struct syscall_arg *arg);
489
490#define SCA_FILENAME syscall_arg__scnprintf_filename
491
46cce19b
ACM
492static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
493 struct syscall_arg *arg)
494{
495 int printed = 0, flags = arg->val;
496
497#define P_FLAG(n) \
498 if (flags & O_##n) { \
499 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
500 flags &= ~O_##n; \
501 }
502
503 P_FLAG(CLOEXEC);
504 P_FLAG(NONBLOCK);
505#undef P_FLAG
506
507 if (flags)
508 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
509
510 return printed;
511}
512
513#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
514
a355a61e
ACM
515#ifndef GRND_NONBLOCK
516#define GRND_NONBLOCK 0x0001
517#endif
518#ifndef GRND_RANDOM
519#define GRND_RANDOM 0x0002
520#endif
521
39878d49
ACM
522static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
523 struct syscall_arg *arg)
524{
525 int printed = 0, flags = arg->val;
526
527#define P_FLAG(n) \
528 if (flags & GRND_##n) { \
529 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
530 flags &= ~GRND_##n; \
531 }
532
533 P_FLAG(RANDOM);
534 P_FLAG(NONBLOCK);
535#undef P_FLAG
536
537 if (flags)
538 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
539
540 return printed;
541}
542
543#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
544
82d4a110
ACM
545#define STRARRAY(name, array) \
546 { .scnprintf = SCA_STRARRAY, \
547 .parm = &strarray__##array, }
453350dd 548
092bd3cd 549#include "trace/beauty/arch_errno_names.c"
ea8dc3ce 550#include "trace/beauty/eventfd.c"
d5d71e86 551#include "trace/beauty/futex_op.c"
3258abe0 552#include "trace/beauty/futex_val3.c"
df4cb167 553#include "trace/beauty/mmap.c"
ba2f22cf 554#include "trace/beauty/mode_t.c"
a30e6259 555#include "trace/beauty/msg_flags.c"
8f48df69 556#include "trace/beauty/open_flags.c"
62de344e 557#include "trace/beauty/perf_event_open.c"
d5d71e86 558#include "trace/beauty/pid.c"
a3bca91f 559#include "trace/beauty/sched_policy.c"
f5cd95ea 560#include "trace/beauty/seccomp.c"
12199d8e 561#include "trace/beauty/signum.c"
bbf86c43 562#include "trace/beauty/socket_type.c"
7206b900 563#include "trace/beauty/waitid_options.c"
a3bca91f 564
82d4a110
ACM
565struct syscall_arg_fmt {
566 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
567 void *parm;
c51bdfec 568 const char *name;
d47737d5 569 bool show_zero;
82d4a110
ACM
570};
571
514f1c67
ACM
572static struct syscall_fmt {
573 const char *name;
aec1930b 574 const char *alias;
82d4a110 575 struct syscall_arg_fmt arg[6];
332337da 576 u8 nr_args;
11c8e39f 577 bool errpid;
514f1c67 578 bool timeout;
04b34729 579 bool hexret;
514f1c67 580} syscall_fmts[] = {
1f63139c 581 { .name = "access",
82d4a110 582 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
1f63139c 583 { .name = "bpf",
82d4a110 584 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 585 { .name = "brk", .hexret = true,
82d4a110 586 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 587 { .name = "clock_gettime",
82d4a110 588 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
33396a3a
ACM
589 { .name = "clone", .errpid = true, .nr_args = 5,
590 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
591 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
592 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
593 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
594 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
1f63139c 595 { .name = "close",
82d4a110 596 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1f63139c 597 { .name = "epoll_ctl",
82d4a110 598 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 599 { .name = "eventfd2",
82d4a110 600 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 601 { .name = "fchmodat",
82d4a110 602 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 603 { .name = "fchownat",
82d4a110 604 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 605 { .name = "fcntl",
82d4a110 606 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
607 .parm = &strarrays__fcntl_cmds_arrays,
608 .show_zero = true, },
82d4a110 609 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 610 { .name = "flock",
82d4a110 611 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
612 { .name = "fstat", .alias = "newfstat", },
613 { .name = "fstatat", .alias = "newfstatat", },
614 { .name = "futex",
3258abe0
ACM
615 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
616 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
1f63139c 617 { .name = "futimesat",
82d4a110 618 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 619 { .name = "getitimer",
82d4a110 620 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 621 { .name = "getpid", .errpid = true, },
d1d438a3 622 { .name = "getpgid", .errpid = true, },
c65f1070 623 { .name = "getppid", .errpid = true, },
1f63139c 624 { .name = "getrandom",
82d4a110 625 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 626 { .name = "getrlimit",
82d4a110 627 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
2d1073de 628 { .name = "gettid", .errpid = true, },
1f63139c 629 { .name = "ioctl",
82d4a110 630 .arg = {
844ae5b4
ACM
631#if defined(__i386__) || defined(__x86_64__)
632/*
633 * FIXME: Make this available to all arches.
634 */
1cc47f2d 635 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
82d4a110 636 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 637#else
82d4a110 638 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 639#endif
1de3038d
ACM
640 { .name = "kcmp", .nr_args = 5,
641 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
642 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
643 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
644 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
645 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1f63139c 646 { .name = "keyctl",
82d4a110 647 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 648 { .name = "kill",
82d4a110 649 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 650 { .name = "linkat",
82d4a110 651 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 652 { .name = "lseek",
82d4a110 653 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
654 { .name = "lstat", .alias = "newlstat", },
655 { .name = "madvise",
82d4a110
ACM
656 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
657 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 658 { .name = "mkdirat",
82d4a110 659 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 660 { .name = "mknodat",
82d4a110 661 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 662 { .name = "mlock",
82d4a110 663 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 664 { .name = "mlockall",
82d4a110 665 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 666 { .name = "mmap", .hexret = true,
54265664
JO
667/* The standard mmap maps to old_mmap on s390x */
668#if defined(__s390x__)
669 .alias = "old_mmap",
670#endif
82d4a110
ACM
671 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
672 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
673 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 674 { .name = "mprotect",
82d4a110
ACM
675 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
676 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 677 { .name = "mq_unlink",
82d4a110 678 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 679 { .name = "mremap", .hexret = true,
82d4a110
ACM
680 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
681 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
682 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 683 { .name = "munlock",
82d4a110 684 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 685 { .name = "munmap",
82d4a110 686 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 687 { .name = "name_to_handle_at",
82d4a110 688 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 689 { .name = "newfstatat",
82d4a110 690 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 691 { .name = "open",
82d4a110 692 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 693 { .name = "open_by_handle_at",
82d4a110
ACM
694 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
695 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 696 { .name = "openat",
82d4a110
ACM
697 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
698 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 699 { .name = "perf_event_open",
82d4a110
ACM
700 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
701 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
702 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 703 { .name = "pipe2",
82d4a110 704 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
83bc9c37
ACM
705 { .name = "pkey_alloc",
706 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
707 { .name = "pkey_free",
708 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
709 { .name = "pkey_mprotect",
710 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
711 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
712 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
1f63139c
ACM
713 { .name = "poll", .timeout = true, },
714 { .name = "ppoll", .timeout = true, },
d688d037
ACM
715 { .name = "prctl", .alias = "arch_prctl",
716 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
717 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
718 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1f63139c
ACM
719 { .name = "pread", .alias = "pread64", },
720 { .name = "preadv", .alias = "pread", },
721 { .name = "prlimit64",
82d4a110 722 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
723 { .name = "pwrite", .alias = "pwrite64", },
724 { .name = "readlinkat",
82d4a110 725 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 726 { .name = "recvfrom",
82d4a110 727 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 728 { .name = "recvmmsg",
82d4a110 729 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 730 { .name = "recvmsg",
82d4a110 731 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 732 { .name = "renameat",
82d4a110 733 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 734 { .name = "rt_sigaction",
82d4a110 735 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 736 { .name = "rt_sigprocmask",
82d4a110 737 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 738 { .name = "rt_sigqueueinfo",
82d4a110 739 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 740 { .name = "rt_tgsigqueueinfo",
82d4a110 741 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 742 { .name = "sched_setscheduler",
82d4a110 743 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 744 { .name = "seccomp",
82d4a110
ACM
745 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
746 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
747 { .name = "select", .timeout = true, },
748 { .name = "sendmmsg",
82d4a110 749 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 750 { .name = "sendmsg",
82d4a110 751 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 752 { .name = "sendto",
82d4a110 753 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 754 { .name = "set_tid_address", .errpid = true, },
1f63139c 755 { .name = "setitimer",
82d4a110 756 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 757 { .name = "setrlimit",
82d4a110 758 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 759 { .name = "socket",
82d4a110
ACM
760 .arg = { [0] = STRARRAY(family, socket_families),
761 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c 762 { .name = "socketpair",
82d4a110
ACM
763 .arg = { [0] = STRARRAY(family, socket_families),
764 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c
ACM
765 { .name = "stat", .alias = "newstat", },
766 { .name = "statx",
82d4a110
ACM
767 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
768 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
769 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 770 { .name = "swapoff",
82d4a110 771 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 772 { .name = "swapon",
82d4a110 773 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 774 { .name = "symlinkat",
82d4a110 775 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 776 { .name = "tgkill",
82d4a110 777 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 778 { .name = "tkill",
82d4a110 779 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c
ACM
780 { .name = "uname", .alias = "newuname", },
781 { .name = "unlinkat",
82d4a110 782 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 783 { .name = "utimensat",
82d4a110 784 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 785 { .name = "wait4", .errpid = true,
82d4a110 786 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 787 { .name = "waitid", .errpid = true,
82d4a110 788 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
789};
790
791static int syscall_fmt__cmp(const void *name, const void *fmtp)
792{
793 const struct syscall_fmt *fmt = fmtp;
794 return strcmp(name, fmt->name);
795}
796
797static struct syscall_fmt *syscall_fmt__find(const char *name)
798{
799 const int nmemb = ARRAY_SIZE(syscall_fmts);
800 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
801}
802
803struct syscall {
804 struct event_format *tp_format;
f208bd8d
ACM
805 int nr_args;
806 struct format_field *args;
514f1c67 807 const char *name;
5089f20e 808 bool is_exit;
514f1c67 809 struct syscall_fmt *fmt;
82d4a110 810 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
811};
812
fd2b2975
ACM
813/*
814 * We need to have this 'calculated' boolean because in some cases we really
815 * don't know what is the duration of a syscall, for instance, when we start
816 * a session and some threads are waiting for a syscall to finish, say 'poll',
817 * in which case all we can do is to print "( ? ) for duration and for the
818 * start timestamp.
819 */
820static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
821{
822 double duration = (double)t / NSEC_PER_MSEC;
823 size_t printed = fprintf(fp, "(");
824
fd2b2975 825 if (!calculated)
522283fe 826 printed += fprintf(fp, " ");
fd2b2975 827 else if (duration >= 1.0)
60c907ab
ACM
828 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
829 else if (duration >= 0.01)
830 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
831 else
832 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 833 return printed + fprintf(fp, "): ");
60c907ab
ACM
834}
835
f994592d
ACM
836/**
837 * filename.ptr: The filename char pointer that will be vfs_getname'd
838 * filename.entry_str_pos: Where to insert the string translated from
839 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
840 * ret_scnprintf: syscall args may set this to a different syscall return
841 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 842 */
752fde44
ACM
843struct thread_trace {
844 u64 entry_time;
752fde44 845 bool entry_pending;
efd5745e 846 unsigned long nr_events;
a2ea67d7 847 unsigned long pfmaj, pfmin;
752fde44 848 char *entry_str;
1302d88e 849 double runtime_ms;
7ee57434 850 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
851 struct {
852 unsigned long ptr;
7f4f8001
ACM
853 short int entry_str_pos;
854 bool pending_open;
855 unsigned int namelen;
856 char *name;
f994592d 857 } filename;
75b757ca
ACM
858 struct {
859 int max;
860 char **table;
861 } paths;
bf2575c1
DA
862
863 struct intlist *syscall_stats;
752fde44
ACM
864};
865
866static struct thread_trace *thread_trace__new(void)
867{
75b757ca
ACM
868 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
869
870 if (ttrace)
871 ttrace->paths.max = -1;
872
bf2575c1
DA
873 ttrace->syscall_stats = intlist__new(NULL);
874
75b757ca 875 return ttrace;
752fde44
ACM
876}
877
c24ff998 878static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 879{
efd5745e
ACM
880 struct thread_trace *ttrace;
881
752fde44
ACM
882 if (thread == NULL)
883 goto fail;
884
89dceb22
NK
885 if (thread__priv(thread) == NULL)
886 thread__set_priv(thread, thread_trace__new());
48000a1a 887
89dceb22 888 if (thread__priv(thread) == NULL)
752fde44
ACM
889 goto fail;
890
89dceb22 891 ttrace = thread__priv(thread);
efd5745e
ACM
892 ++ttrace->nr_events;
893
894 return ttrace;
752fde44 895fail:
c24ff998 896 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
897 "WARNING: not enough memory, dropping samples!\n");
898 return NULL;
899}
900
84486caa
ACM
901
902void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 903 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
904{
905 struct thread_trace *ttrace = thread__priv(arg->thread);
906
907 ttrace->ret_scnprintf = ret_scnprintf;
908}
909
598d02c5
SF
910#define TRACE_PFMAJ (1 << 0)
911#define TRACE_PFMIN (1 << 1)
912
e4d44e83
ACM
913static const size_t trace__entry_str_size = 2048;
914
97119f37 915static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 916{
89dceb22 917 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
918
919 if (fd > ttrace->paths.max) {
920 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
921
922 if (npath == NULL)
923 return -1;
924
925 if (ttrace->paths.max != -1) {
926 memset(npath + ttrace->paths.max + 1, 0,
927 (fd - ttrace->paths.max) * sizeof(char *));
928 } else {
929 memset(npath, 0, (fd + 1) * sizeof(char *));
930 }
931
932 ttrace->paths.table = npath;
933 ttrace->paths.max = fd;
934 }
935
936 ttrace->paths.table[fd] = strdup(pathname);
937
938 return ttrace->paths.table[fd] != NULL ? 0 : -1;
939}
940
97119f37
ACM
941static int thread__read_fd_path(struct thread *thread, int fd)
942{
943 char linkname[PATH_MAX], pathname[PATH_MAX];
944 struct stat st;
945 int ret;
946
947 if (thread->pid_ == thread->tid) {
948 scnprintf(linkname, sizeof(linkname),
949 "/proc/%d/fd/%d", thread->pid_, fd);
950 } else {
951 scnprintf(linkname, sizeof(linkname),
952 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
953 }
954
955 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
956 return -1;
957
958 ret = readlink(linkname, pathname, sizeof(pathname));
959
960 if (ret < 0 || ret > st.st_size)
961 return -1;
962
963 pathname[ret] = '\0';
964 return trace__set_fd_pathname(thread, fd, pathname);
965}
966
c522739d
ACM
967static const char *thread__fd_path(struct thread *thread, int fd,
968 struct trace *trace)
75b757ca 969{
89dceb22 970 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
971
972 if (ttrace == NULL)
973 return NULL;
974
975 if (fd < 0)
976 return NULL;
977
cdcd1e6b 978 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
979 if (!trace->live)
980 return NULL;
981 ++trace->stats.proc_getname;
cdcd1e6b 982 if (thread__read_fd_path(thread, fd))
c522739d
ACM
983 return NULL;
984 }
75b757ca
ACM
985
986 return ttrace->paths.table[fd];
987}
988
fc65eb82 989size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
990{
991 int fd = arg->val;
992 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 993 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
994
995 if (path)
996 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
997
998 return printed;
999}
1000
0a2f7540
ACM
1001size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1002{
1003 size_t printed = scnprintf(bf, size, "%d", fd);
1004 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1005
1006 if (thread) {
1007 const char *path = thread__fd_path(thread, fd, trace);
1008
1009 if (path)
1010 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1011
1012 thread__put(thread);
1013 }
1014
1015 return printed;
1016}
1017
75b757ca
ACM
1018static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1019 struct syscall_arg *arg)
1020{
1021 int fd = arg->val;
1022 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1023 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1024
04662523
ACM
1025 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1026 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1027
1028 return printed;
1029}
1030
f994592d
ACM
1031static void thread__set_filename_pos(struct thread *thread, const char *bf,
1032 unsigned long ptr)
1033{
1034 struct thread_trace *ttrace = thread__priv(thread);
1035
1036 ttrace->filename.ptr = ptr;
1037 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1038}
1039
1040static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1041 struct syscall_arg *arg)
1042{
1043 unsigned long ptr = arg->val;
1044
1045 if (!arg->trace->vfs_getname)
1046 return scnprintf(bf, size, "%#x", ptr);
1047
1048 thread__set_filename_pos(arg->thread, bf, ptr);
1049 return 0;
1050}
1051
ae9ed035
ACM
1052static bool trace__filter_duration(struct trace *trace, double t)
1053{
1054 return t < (trace->duration_filter * NSEC_PER_MSEC);
1055}
1056
fd2b2975 1057static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1058{
1059 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1060
60c907ab 1061 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1062}
1063
fd2b2975
ACM
1064/*
1065 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1066 * using ttrace->entry_time for a thread that receives a sys_exit without
1067 * first having received a sys_enter ("poll" issued before tracing session
1068 * starts, lost sys_enter exit due to ring buffer overflow).
1069 */
1070static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1071{
1072 if (tstamp > 0)
1073 return __trace__fprintf_tstamp(trace, tstamp, fp);
1074
1075 return fprintf(fp, " ? ");
1076}
1077
f15eb531 1078static bool done = false;
ba209f85 1079static bool interrupted = false;
f15eb531 1080
ba209f85 1081static void sig_handler(int sig)
f15eb531
NK
1082{
1083 done = true;
ba209f85 1084 interrupted = sig == SIGINT;
f15eb531
NK
1085}
1086
752fde44 1087static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1088 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1089{
1090 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1091 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1092
50c95cbd
ACM
1093 if (trace->multiple_threads) {
1094 if (trace->show_comm)
1902efe7 1095 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1096 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1097 }
752fde44
ACM
1098
1099 return printed;
1100}
1101
c24ff998 1102static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1103 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1104{
1105 int ret = 0;
1106
1107 switch (event->header.type) {
1108 case PERF_RECORD_LOST:
c24ff998 1109 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1110 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1111 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1112 break;
752fde44 1113 default:
162f0bef 1114 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1115 break;
1116 }
1117
1118 return ret;
1119}
1120
c24ff998 1121static int trace__tool_process(struct perf_tool *tool,
752fde44 1122 union perf_event *event,
162f0bef 1123 struct perf_sample *sample,
752fde44
ACM
1124 struct machine *machine)
1125{
c24ff998 1126 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1127 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1128}
1129
caf8a0d0
ACM
1130static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1131{
1132 struct machine *machine = vmachine;
1133
1134 if (machine->kptr_restrict_warned)
1135 return NULL;
1136
1137 if (symbol_conf.kptr_restrict) {
1138 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1139 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1140 "Kernel samples will not be resolved.\n");
1141 machine->kptr_restrict_warned = true;
1142 return NULL;
1143 }
1144
1145 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1146}
1147
752fde44
ACM
1148static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1149{
0a7e6d1b 1150 int err = symbol__init(NULL);
752fde44
ACM
1151
1152 if (err)
1153 return err;
1154
8fb598e5
DA
1155 trace->host = machine__new_host();
1156 if (trace->host == NULL)
1157 return -ENOMEM;
752fde44 1158
cbd5c178
AV
1159 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1160 if (err < 0)
1161 goto out;
706c3da4 1162
a33fbd56 1163 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76 1164 evlist->threads, trace__tool_process, false,
340b47f5 1165 trace->opts.proc_map_timeout, 1);
cbd5c178 1166out:
752fde44
ACM
1167 if (err)
1168 symbol__exit();
1169
1170 return err;
1171}
1172
33974a41
AV
1173static void trace__symbols__exit(struct trace *trace)
1174{
1175 machine__exit(trace->host);
1176 trace->host = NULL;
1177
1178 symbol__exit();
1179}
1180
5e58fcfa 1181static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
13d4ff3e 1182{
5e58fcfa 1183 int idx;
13d4ff3e 1184
332337da
ACM
1185 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1186 nr_args = sc->fmt->nr_args;
1187
5e58fcfa 1188 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
82d4a110 1189 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1190 return -1;
1191
5e58fcfa
ACM
1192 for (idx = 0; idx < nr_args; ++idx) {
1193 if (sc->fmt)
82d4a110 1194 sc->arg_fmt[idx] = sc->fmt->arg[idx];
5e58fcfa 1195 }
82d4a110 1196
5e58fcfa
ACM
1197 sc->nr_args = nr_args;
1198 return 0;
1199}
1200
1201static int syscall__set_arg_fmts(struct syscall *sc)
1202{
1203 struct format_field *field;
1204 int idx = 0, len;
1205
1206 for (field = sc->args; field; field = field->next, ++idx) {
1207 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1208 continue;
1f115cb7 1209
82d4a110 1210 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1211 (strcmp(field->name, "filename") == 0 ||
1212 strcmp(field->name, "path") == 0 ||
1213 strcmp(field->name, "pathname") == 0))
82d4a110 1214 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1215 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1216 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1217 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1218 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1219 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1220 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1221 else if ((strcmp(field->type, "int") == 0 ||
1222 strcmp(field->type, "unsigned int") == 0 ||
1223 strcmp(field->type, "long") == 0) &&
1224 (len = strlen(field->name)) >= 2 &&
1225 strcmp(field->name + len - 2, "fd") == 0) {
1226 /*
1227 * /sys/kernel/tracing/events/syscalls/sys_enter*
1228 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1229 * 65 int
1230 * 23 unsigned int
1231 * 7 unsigned long
1232 */
82d4a110 1233 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1234 }
13d4ff3e
ACM
1235 }
1236
1237 return 0;
1238}
1239
514f1c67
ACM
1240static int trace__read_syscall_info(struct trace *trace, int id)
1241{
1242 char tp_name[128];
1243 struct syscall *sc;
fd0db102 1244 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1245
1246 if (name == NULL)
1247 return -1;
514f1c67
ACM
1248
1249 if (id > trace->syscalls.max) {
1250 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1251
1252 if (nsyscalls == NULL)
1253 return -1;
1254
1255 if (trace->syscalls.max != -1) {
1256 memset(nsyscalls + trace->syscalls.max + 1, 0,
1257 (id - trace->syscalls.max) * sizeof(*sc));
1258 } else {
1259 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1260 }
1261
1262 trace->syscalls.table = nsyscalls;
1263 trace->syscalls.max = id;
1264 }
1265
1266 sc = trace->syscalls.table + id;
3a531260 1267 sc->name = name;
2ae3a312 1268
3a531260 1269 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1270
aec1930b 1271 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1272 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1273
8dd2a131 1274 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1275 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1276 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1277 }
514f1c67 1278
5e58fcfa
ACM
1279 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1280 return -1;
1281
8dd2a131 1282 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1283 return -1;
1284
f208bd8d 1285 sc->args = sc->tp_format->format.fields;
c42de706
TS
1286 /*
1287 * We need to check and discard the first variable '__syscall_nr'
1288 * or 'nr' that mean the syscall number. It is needless here.
1289 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1290 */
1291 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1292 sc->args = sc->args->next;
1293 --sc->nr_args;
1294 }
1295
5089f20e
ACM
1296 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1297
13d4ff3e 1298 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1299}
1300
d0cc439b
ACM
1301static int trace__validate_ev_qualifier(struct trace *trace)
1302{
8b3ce757 1303 int err = 0, i;
27702bcf 1304 size_t nr_allocated;
d0cc439b
ACM
1305 struct str_node *pos;
1306
8b3ce757
ACM
1307 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1308 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1309 sizeof(trace->ev_qualifier_ids.entries[0]));
1310
1311 if (trace->ev_qualifier_ids.entries == NULL) {
1312 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1313 trace->output);
1314 err = -EINVAL;
1315 goto out;
1316 }
1317
27702bcf 1318 nr_allocated = trace->ev_qualifier_ids.nr;
8b3ce757
ACM
1319 i = 0;
1320
602a1f4d 1321 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1322 const char *sc = pos->s;
27702bcf 1323 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
d0cc439b 1324
8b3ce757 1325 if (id < 0) {
27702bcf
ACM
1326 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1327 if (id >= 0)
1328 goto matches;
1329
d0cc439b
ACM
1330 if (err == 0) {
1331 fputs("Error:\tInvalid syscall ", trace->output);
1332 err = -EINVAL;
1333 } else {
1334 fputs(", ", trace->output);
1335 }
1336
1337 fputs(sc, trace->output);
1338 }
27702bcf 1339matches:
8b3ce757 1340 trace->ev_qualifier_ids.entries[i++] = id;
27702bcf
ACM
1341 if (match_next == -1)
1342 continue;
1343
1344 while (1) {
1345 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1346 if (id < 0)
1347 break;
1348 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1349 void *entries;
1350
1351 nr_allocated += 8;
1352 entries = realloc(trace->ev_qualifier_ids.entries,
1353 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1354 if (entries == NULL) {
1355 err = -ENOMEM;
1356 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1357 goto out_free;
1358 }
1359 trace->ev_qualifier_ids.entries = entries;
1360 }
1361 trace->ev_qualifier_ids.nr++;
1362 trace->ev_qualifier_ids.entries[i++] = id;
1363 }
d0cc439b
ACM
1364 }
1365
1366 if (err < 0) {
1367 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1368 "\nHint:\tand: 'man syscalls'\n", trace->output);
27702bcf 1369out_free:
8b3ce757
ACM
1370 zfree(&trace->ev_qualifier_ids.entries);
1371 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1372 }
8b3ce757 1373out:
d0cc439b
ACM
1374 return err;
1375}
1376
55d43bca
DA
1377/*
1378 * args is to be interpreted as a series of longs but we need to handle
1379 * 8-byte unaligned accesses. args points to raw_data within the event
1380 * and raw_data is guaranteed to be 8-byte unaligned because it is
1381 * preceded by raw_size which is a u32. So we need to copy args to a temp
1382 * variable to read it. Most notably this avoids extended load instructions
1383 * on unaligned addresses
1384 */
325f5091 1385unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
f9f83b33
ACM
1386{
1387 unsigned long val;
325f5091 1388 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
f9f83b33
ACM
1389
1390 memcpy(&val, p, sizeof(val));
1391 return val;
1392}
1393
c51bdfec
ACM
1394static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1395 struct syscall_arg *arg)
1396{
1397 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1398 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1399
1400 return scnprintf(bf, size, "arg%d: ", arg->idx);
1401}
1402
d032d79e
ACM
1403static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1404 struct syscall_arg *arg, unsigned long val)
1405{
1406 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1407 arg->val = val;
1408 if (sc->arg_fmt[arg->idx].parm)
1409 arg->parm = sc->arg_fmt[arg->idx].parm;
1410 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1411 }
1412 return scnprintf(bf, size, "%ld", val);
1413}
1414
752fde44 1415static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1416 unsigned char *args, struct trace *trace,
75b757ca 1417 struct thread *thread)
514f1c67 1418{
514f1c67 1419 size_t printed = 0;
55d43bca 1420 unsigned long val;
d032d79e
ACM
1421 u8 bit = 1;
1422 struct syscall_arg arg = {
1423 .args = args,
1424 .idx = 0,
1425 .mask = 0,
1426 .trace = trace,
1427 .thread = thread,
1428 };
84486caa
ACM
1429 struct thread_trace *ttrace = thread__priv(thread);
1430
1431 /*
1432 * Things like fcntl will set this in its 'cmd' formatter to pick the
1433 * right formatter for the return value (an fd? file flags?), which is
1434 * not needed for syscalls that always return a given type, say an fd.
1435 */
1436 ttrace->ret_scnprintf = NULL;
514f1c67 1437
f208bd8d 1438 if (sc->args != NULL) {
514f1c67 1439 struct format_field *field;
6e7eeb51 1440
f208bd8d 1441 for (field = sc->args; field;
01533e97
ACM
1442 field = field->next, ++arg.idx, bit <<= 1) {
1443 if (arg.mask & bit)
6e7eeb51 1444 continue;
55d43bca 1445
f9f83b33 1446 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1447
4aa58232
ACM
1448 /*
1449 * Suppress this argument if its value is zero and
1450 * and we don't have a string associated in an
1451 * strarray for it.
1452 */
55d43bca 1453 if (val == 0 &&
82d4a110 1454 !(sc->arg_fmt &&
d47737d5
ACM
1455 (sc->arg_fmt[arg.idx].show_zero ||
1456 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1457 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1458 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1459 continue;
1460
752fde44 1461 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1462 "%s%s: ", printed ? ", " : "", field->name);
d032d79e 1463 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
514f1c67 1464 }
4c4d6e51
ACM
1465 } else if (IS_ERR(sc->tp_format)) {
1466 /*
1467 * If we managed to read the tracepoint /format file, then we
1468 * may end up not having any args, like with gettid(), so only
1469 * print the raw args when we didn't manage to read it.
1470 */
332337da 1471 while (arg.idx < sc->nr_args) {
d032d79e
ACM
1472 if (arg.mask & bit)
1473 goto next_arg;
1474 val = syscall_arg__val(&arg, arg.idx);
c51bdfec
ACM
1475 if (printed)
1476 printed += scnprintf(bf + printed, size - printed, ", ");
1477 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
d032d79e
ACM
1478 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1479next_arg:
1480 ++arg.idx;
1481 bit <<= 1;
514f1c67
ACM
1482 }
1483 }
1484
1485 return printed;
1486}
1487
ba3d7dee 1488typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1489 union perf_event *event,
ba3d7dee
ACM
1490 struct perf_sample *sample);
1491
1492static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1493 struct perf_evsel *evsel, int id)
ba3d7dee 1494{
ba3d7dee
ACM
1495
1496 if (id < 0) {
adaa18bf
ACM
1497
1498 /*
1499 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1500 * before that, leaving at a higher verbosity level till that is
1501 * explained. Reproduced with plain ftrace with:
1502 *
1503 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1504 * grep "NR -1 " /t/trace_pipe
1505 *
1506 * After generating some load on the machine.
1507 */
1508 if (verbose > 1) {
1509 static u64 n;
1510 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1511 id, perf_evsel__name(evsel), ++n);
1512 }
ba3d7dee
ACM
1513 return NULL;
1514 }
1515
1516 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1517 trace__read_syscall_info(trace, id))
1518 goto out_cant_read;
1519
1520 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1521 goto out_cant_read;
1522
1523 return &trace->syscalls.table[id];
1524
1525out_cant_read:
bb963e16 1526 if (verbose > 0) {
7c304ee0
ACM
1527 fprintf(trace->output, "Problems reading syscall %d", id);
1528 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1529 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1530 fputs(" information\n", trace->output);
1531 }
ba3d7dee
ACM
1532 return NULL;
1533}
1534
bf2575c1
DA
1535static void thread__update_stats(struct thread_trace *ttrace,
1536 int id, struct perf_sample *sample)
1537{
1538 struct int_node *inode;
1539 struct stats *stats;
1540 u64 duration = 0;
1541
1542 inode = intlist__findnew(ttrace->syscall_stats, id);
1543 if (inode == NULL)
1544 return;
1545
1546 stats = inode->priv;
1547 if (stats == NULL) {
1548 stats = malloc(sizeof(struct stats));
1549 if (stats == NULL)
1550 return;
1551 init_stats(stats);
1552 inode->priv = stats;
1553 }
1554
1555 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1556 duration = sample->time - ttrace->entry_time;
1557
1558 update_stats(stats, duration);
1559}
1560
522283fe 1561static int trace__printf_interrupted_entry(struct trace *trace)
e596663e
ACM
1562{
1563 struct thread_trace *ttrace;
e596663e
ACM
1564 size_t printed;
1565
1566 if (trace->current == NULL)
1567 return 0;
1568
1569 ttrace = thread__priv(trace->current);
1570
1571 if (!ttrace->entry_pending)
1572 return 0;
1573
522283fe 1574 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
e596663e
ACM
1575 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1576 ttrace->entry_pending = false;
1577
1578 return printed;
1579}
1580
591421e1
ACM
1581static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1582 struct perf_sample *sample, struct thread *thread)
1583{
1584 int printed = 0;
1585
1586 if (trace->print_sample) {
1587 double ts = (double)sample->time / NSEC_PER_MSEC;
1588
1589 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1590 perf_evsel__name(evsel), ts,
1591 thread__comm_str(thread),
1592 sample->pid, sample->tid, sample->cpu);
1593 }
1594
1595 return printed;
1596}
1597
ba3d7dee 1598static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1599 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1600 struct perf_sample *sample)
1601{
752fde44 1602 char *msg;
ba3d7dee 1603 void *args;
752fde44 1604 size_t printed = 0;
2ae3a312 1605 struct thread *thread;
b91fc39f 1606 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1607 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1608 struct thread_trace *ttrace;
1609
1610 if (sc == NULL)
1611 return -1;
ba3d7dee 1612
8fb598e5 1613 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1614 ttrace = thread__trace(thread, trace->output);
2ae3a312 1615 if (ttrace == NULL)
b91fc39f 1616 goto out_put;
ba3d7dee 1617
591421e1
ACM
1618 trace__fprintf_sample(trace, evsel, sample, thread);
1619
77170988 1620 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1621
1622 if (ttrace->entry_str == NULL) {
e4d44e83 1623 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1624 if (!ttrace->entry_str)
b91fc39f 1625 goto out_put;
752fde44
ACM
1626 }
1627
5cf9c84e 1628 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
522283fe 1629 trace__printf_interrupted_entry(trace);
e596663e 1630
752fde44
ACM
1631 ttrace->entry_time = sample->time;
1632 msg = ttrace->entry_str;
e4d44e83 1633 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1634
e4d44e83 1635 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1636 args, trace, thread);
752fde44 1637
5089f20e 1638 if (sc->is_exit) {
5cf9c84e 1639 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1640 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1641 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1642 }
7f4f8001 1643 } else {
752fde44 1644 ttrace->entry_pending = true;
7f4f8001
ACM
1645 /* See trace__vfs_getname & trace__sys_exit */
1646 ttrace->filename.pending_open = false;
1647 }
ba3d7dee 1648
f3b623b8
ACM
1649 if (trace->current != thread) {
1650 thread__put(trace->current);
1651 trace->current = thread__get(thread);
1652 }
b91fc39f
ACM
1653 err = 0;
1654out_put:
1655 thread__put(thread);
1656 return err;
ba3d7dee
ACM
1657}
1658
5cf9c84e
ACM
1659static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1660 struct perf_sample *sample,
1661 struct callchain_cursor *cursor)
202ff968
ACM
1662{
1663 struct addr_location al;
5cf9c84e
ACM
1664
1665 if (machine__resolve(trace->host, &al, sample) < 0 ||
bd3dda9a 1666 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
5cf9c84e
ACM
1667 return -1;
1668
1669 return 0;
1670}
1671
1672static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1673{
202ff968 1674 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1675 const unsigned int print_opts = EVSEL__PRINT_SYM |
1676 EVSEL__PRINT_DSO |
1677 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1678
d327e60c 1679 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1680}
1681
092bd3cd
HB
1682static const char *errno_to_name(struct perf_evsel *evsel, int err)
1683{
1684 struct perf_env *env = perf_evsel__env(evsel);
1685 const char *arch_name = perf_env__arch(env);
1686
1687 return arch_syscalls__strerrno(arch_name, err);
1688}
1689
ba3d7dee 1690static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1691 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1692 struct perf_sample *sample)
1693{
2c82c3ad 1694 long ret;
60c907ab 1695 u64 duration = 0;
fd2b2975 1696 bool duration_calculated = false;
2ae3a312 1697 struct thread *thread;
5cf9c84e 1698 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1699 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1700 struct thread_trace *ttrace;
1701
1702 if (sc == NULL)
1703 return -1;
ba3d7dee 1704
8fb598e5 1705 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1706 ttrace = thread__trace(thread, trace->output);
2ae3a312 1707 if (ttrace == NULL)
b91fc39f 1708 goto out_put;
ba3d7dee 1709
591421e1
ACM
1710 trace__fprintf_sample(trace, evsel, sample, thread);
1711
bf2575c1
DA
1712 if (trace->summary)
1713 thread__update_stats(ttrace, id, sample);
1714
77170988 1715 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1716
fd0db102 1717 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1718 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1719 ttrace->filename.pending_open = false;
c522739d
ACM
1720 ++trace->stats.vfs_getname;
1721 }
1722
ae9ed035 1723 if (ttrace->entry_time) {
60c907ab 1724 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1725 if (trace__filter_duration(trace, duration))
1726 goto out;
fd2b2975 1727 duration_calculated = true;
ae9ed035
ACM
1728 } else if (trace->duration_filter)
1729 goto out;
60c907ab 1730
5cf9c84e
ACM
1731 if (sample->callchain) {
1732 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1733 if (callchain_ret == 0) {
1734 if (callchain_cursor.nr < trace->min_stack)
1735 goto out;
1736 callchain_ret = 1;
1737 }
1738 }
1739
fd2eabaf
DA
1740 if (trace->summary_only)
1741 goto out;
1742
fd2b2975 1743 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1744
1745 if (ttrace->entry_pending) {
c24ff998 1746 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1747 } else {
c24ff998
ACM
1748 fprintf(trace->output, " ... [");
1749 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1750 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1751 }
1752
da3c9a44 1753 if (sc->fmt == NULL) {
1f63139c
ACM
1754 if (ret < 0)
1755 goto errno_print;
da3c9a44 1756signed_print:
6f8fe61e 1757 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1758 } else if (ret < 0) {
1759errno_print: {
942a91ed 1760 char bf[STRERR_BUFSIZE];
c8b5f2c9 1761 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
092bd3cd 1762 *e = errno_to_name(evsel, -ret);
ba3d7dee 1763
c24ff998 1764 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1765 }
da3c9a44 1766 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1767 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1768 else if (ttrace->ret_scnprintf) {
1769 char bf[1024];
7ee57434
ACM
1770 struct syscall_arg arg = {
1771 .val = ret,
1772 .thread = thread,
1773 .trace = trace,
1774 };
1775 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1776 ttrace->ret_scnprintf = NULL;
1777 fprintf(trace->output, ") = %s", bf);
1778 } else if (sc->fmt->hexret)
2c82c3ad 1779 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1780 else if (sc->fmt->errpid) {
1781 struct thread *child = machine__find_thread(trace->host, ret, ret);
1782
1783 if (child != NULL) {
1784 fprintf(trace->output, ") = %ld", ret);
1785 if (child->comm_set)
1786 fprintf(trace->output, " (%s)", thread__comm_str(child));
1787 thread__put(child);
1788 }
1789 } else
da3c9a44 1790 goto signed_print;
ba3d7dee 1791
c24ff998 1792 fputc('\n', trace->output);
566a0885 1793
5cf9c84e
ACM
1794 if (callchain_ret > 0)
1795 trace__fprintf_callchain(trace, sample);
1796 else if (callchain_ret < 0)
1797 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1798out:
752fde44 1799 ttrace->entry_pending = false;
b91fc39f
ACM
1800 err = 0;
1801out_put:
1802 thread__put(thread);
1803 return err;
ba3d7dee
ACM
1804}
1805
c522739d 1806static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1807 union perf_event *event __maybe_unused,
c522739d
ACM
1808 struct perf_sample *sample)
1809{
f994592d
ACM
1810 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1811 struct thread_trace *ttrace;
1812 size_t filename_len, entry_str_len, to_move;
1813 ssize_t remaining_space;
1814 char *pos;
7f4f8001 1815 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1816
1817 if (!thread)
1818 goto out;
1819
1820 ttrace = thread__priv(thread);
1821 if (!ttrace)
ef65e96e 1822 goto out_put;
f994592d 1823
7f4f8001 1824 filename_len = strlen(filename);
39f0e7a8 1825 if (filename_len == 0)
ef65e96e 1826 goto out_put;
7f4f8001
ACM
1827
1828 if (ttrace->filename.namelen < filename_len) {
1829 char *f = realloc(ttrace->filename.name, filename_len + 1);
1830
1831 if (f == NULL)
ef65e96e 1832 goto out_put;
7f4f8001
ACM
1833
1834 ttrace->filename.namelen = filename_len;
1835 ttrace->filename.name = f;
1836 }
1837
1838 strcpy(ttrace->filename.name, filename);
1839 ttrace->filename.pending_open = true;
1840
f994592d 1841 if (!ttrace->filename.ptr)
ef65e96e 1842 goto out_put;
f994592d
ACM
1843
1844 entry_str_len = strlen(ttrace->entry_str);
1845 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1846 if (remaining_space <= 0)
ef65e96e 1847 goto out_put;
f994592d 1848
f994592d
ACM
1849 if (filename_len > (size_t)remaining_space) {
1850 filename += filename_len - remaining_space;
1851 filename_len = remaining_space;
1852 }
1853
1854 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1855 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1856 memmove(pos + filename_len, pos, to_move);
1857 memcpy(pos, filename, filename_len);
1858
1859 ttrace->filename.ptr = 0;
1860 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1861out_put:
1862 thread__put(thread);
f994592d 1863out:
c522739d
ACM
1864 return 0;
1865}
1866
1302d88e 1867static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1868 union perf_event *event __maybe_unused,
1302d88e
ACM
1869 struct perf_sample *sample)
1870{
1871 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1872 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1873 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1874 sample->pid,
1875 sample->tid);
c24ff998 1876 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1877
1878 if (ttrace == NULL)
1879 goto out_dump;
1880
1881 ttrace->runtime_ms += runtime_ms;
1882 trace->runtime_ms += runtime_ms;
ef65e96e 1883out_put:
b91fc39f 1884 thread__put(thread);
1302d88e
ACM
1885 return 0;
1886
1887out_dump:
c24ff998 1888 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1889 evsel->name,
1890 perf_evsel__strval(evsel, sample, "comm"),
1891 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1892 runtime,
1893 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1894 goto out_put;
1302d88e
ACM
1895}
1896
923d0c9a
ACM
1897static int bpf_output__printer(enum binary_printer_ops op,
1898 unsigned int val, void *extra __maybe_unused, FILE *fp)
1d6c9407 1899{
1d6c9407
WN
1900 unsigned char ch = (unsigned char)val;
1901
1902 switch (op) {
1903 case BINARY_PRINT_CHAR_DATA:
923d0c9a 1904 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1d6c9407
WN
1905 case BINARY_PRINT_DATA_BEGIN:
1906 case BINARY_PRINT_LINE_BEGIN:
1907 case BINARY_PRINT_ADDR:
1908 case BINARY_PRINT_NUM_DATA:
1909 case BINARY_PRINT_NUM_PAD:
1910 case BINARY_PRINT_SEP:
1911 case BINARY_PRINT_CHAR_PAD:
1912 case BINARY_PRINT_LINE_END:
1913 case BINARY_PRINT_DATA_END:
1914 default:
1915 break;
1916 }
923d0c9a
ACM
1917
1918 return 0;
1d6c9407
WN
1919}
1920
1921static void bpf_output__fprintf(struct trace *trace,
1922 struct perf_sample *sample)
1923{
923d0c9a
ACM
1924 binary__fprintf(sample->raw_data, sample->raw_size, 8,
1925 bpf_output__printer, NULL, trace->output);
1d6c9407
WN
1926}
1927
14a052df
ACM
1928static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1929 union perf_event *event __maybe_unused,
1930 struct perf_sample *sample)
1931{
7ad35615
ACM
1932 int callchain_ret = 0;
1933
1934 if (sample->callchain) {
1935 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1936 if (callchain_ret == 0) {
1937 if (callchain_cursor.nr < trace->min_stack)
1938 goto out;
1939 callchain_ret = 1;
1940 }
1941 }
1942
522283fe 1943 trace__printf_interrupted_entry(trace);
14a052df 1944 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1945
1946 if (trace->trace_syscalls)
1947 fprintf(trace->output, "( ): ");
1948
1949 fprintf(trace->output, "%s:", evsel->name);
14a052df 1950
1d6c9407
WN
1951 if (perf_evsel__is_bpf_output(evsel)) {
1952 bpf_output__fprintf(trace, sample);
1953 } else if (evsel->tp_format) {
14a052df
ACM
1954 event_format__fprintf(evsel->tp_format, sample->cpu,
1955 sample->raw_data, sample->raw_size,
1956 trace->output);
1957 }
1958
1959 fprintf(trace->output, ")\n");
202ff968 1960
7ad35615
ACM
1961 if (callchain_ret > 0)
1962 trace__fprintf_callchain(trace, sample);
1963 else if (callchain_ret < 0)
1964 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1965out:
14a052df
ACM
1966 return 0;
1967}
1968
598d02c5
SF
1969static void print_location(FILE *f, struct perf_sample *sample,
1970 struct addr_location *al,
1971 bool print_dso, bool print_sym)
1972{
1973
bb963e16 1974 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1975 fprintf(f, "%s@", al->map->dso->long_name);
1976
bb963e16 1977 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1978 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1979 al->addr - al->sym->start);
1980 else if (al->map)
4414a3c5 1981 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1982 else
4414a3c5 1983 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1984}
1985
1986static int trace__pgfault(struct trace *trace,
1987 struct perf_evsel *evsel,
473398a2 1988 union perf_event *event __maybe_unused,
598d02c5
SF
1989 struct perf_sample *sample)
1990{
1991 struct thread *thread;
598d02c5
SF
1992 struct addr_location al;
1993 char map_type = 'd';
a2ea67d7 1994 struct thread_trace *ttrace;
b91fc39f 1995 int err = -1;
1df54290 1996 int callchain_ret = 0;
598d02c5
SF
1997
1998 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1999
2000 if (sample->callchain) {
2001 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2002 if (callchain_ret == 0) {
2003 if (callchain_cursor.nr < trace->min_stack)
2004 goto out_put;
2005 callchain_ret = 1;
2006 }
2007 }
2008
a2ea67d7
SF
2009 ttrace = thread__trace(thread, trace->output);
2010 if (ttrace == NULL)
b91fc39f 2011 goto out_put;
a2ea67d7
SF
2012
2013 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2014 ttrace->pfmaj++;
2015 else
2016 ttrace->pfmin++;
2017
2018 if (trace->summary_only)
b91fc39f 2019 goto out;
598d02c5 2020
473398a2 2021 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
2022 sample->ip, &al);
2023
fd2b2975 2024 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
2025
2026 fprintf(trace->output, "%sfault [",
2027 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2028 "maj" : "min");
2029
2030 print_location(trace->output, sample, &al, false, true);
2031
2032 fprintf(trace->output, "] => ");
2033
473398a2 2034 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
2035 sample->addr, &al);
2036
2037 if (!al.map) {
473398a2 2038 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
2039 MAP__FUNCTION, sample->addr, &al);
2040
2041 if (al.map)
2042 map_type = 'x';
2043 else
2044 map_type = '?';
2045 }
2046
2047 print_location(trace->output, sample, &al, true, false);
2048
2049 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2050
1df54290
ACM
2051 if (callchain_ret > 0)
2052 trace__fprintf_callchain(trace, sample);
2053 else if (callchain_ret < 0)
2054 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2055out:
2056 err = 0;
2057out_put:
2058 thread__put(thread);
2059 return err;
598d02c5
SF
2060}
2061
e6001980 2062static void trace__set_base_time(struct trace *trace,
8a07a809 2063 struct perf_evsel *evsel,
e6001980
ACM
2064 struct perf_sample *sample)
2065{
8a07a809
ACM
2066 /*
2067 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2068 * and don't use sample->time unconditionally, we may end up having
2069 * some other event in the future without PERF_SAMPLE_TIME for good
2070 * reason, i.e. we may not be interested in its timestamps, just in
2071 * it taking place, picking some piece of information when it
2072 * appears in our event stream (vfs_getname comes to mind).
2073 */
2074 if (trace->base_time == 0 && !trace->full_time &&
2075 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2076 trace->base_time = sample->time;
2077}
2078
6810fc91 2079static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2080 union perf_event *event,
6810fc91
DA
2081 struct perf_sample *sample,
2082 struct perf_evsel *evsel,
2083 struct machine *machine __maybe_unused)
2084{
2085 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2086 struct thread *thread;
6810fc91
DA
2087 int err = 0;
2088
744a9719 2089 tracepoint_handler handler = evsel->handler;
6810fc91 2090
aa07df6e
DA
2091 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2092 if (thread && thread__is_filtered(thread))
ef65e96e 2093 goto out;
bdc89661 2094
e6001980 2095 trace__set_base_time(trace, evsel, sample);
6810fc91 2096
3160565f
DA
2097 if (handler) {
2098 ++trace->nr_events;
0c82adcf 2099 handler(trace, evsel, event, sample);
3160565f 2100 }
ef65e96e
ACM
2101out:
2102 thread__put(thread);
6810fc91
DA
2103 return err;
2104}
2105
1e28fe0a 2106static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2107{
2108 unsigned int rec_argc, i, j;
2109 const char **rec_argv;
2110 const char * const record_args[] = {
2111 "record",
2112 "-R",
2113 "-m", "1024",
2114 "-c", "1",
5e2485b1
DA
2115 };
2116
1e28fe0a
SF
2117 const char * const sc_args[] = { "-e", };
2118 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2119 const char * const majpf_args[] = { "-e", "major-faults" };
2120 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2121 const char * const minpf_args[] = { "-e", "minor-faults" };
2122 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2123
9aca7f17 2124 /* +1 is for the event string below */
1e28fe0a
SF
2125 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2126 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2127 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2128
2129 if (rec_argv == NULL)
2130 return -ENOMEM;
2131
1e28fe0a 2132 j = 0;
5e2485b1 2133 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2134 rec_argv[j++] = record_args[i];
2135
e281a960
SF
2136 if (trace->trace_syscalls) {
2137 for (i = 0; i < sc_args_nr; i++)
2138 rec_argv[j++] = sc_args[i];
2139
2140 /* event string may be different for older kernels - e.g., RHEL6 */
2141 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2142 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2143 else if (is_valid_tracepoint("syscalls:sys_enter"))
2144 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2145 else {
2146 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
c896f85a 2147 free(rec_argv);
e281a960
SF
2148 return -1;
2149 }
9aca7f17 2150 }
9aca7f17 2151
1e28fe0a
SF
2152 if (trace->trace_pgfaults & TRACE_PFMAJ)
2153 for (i = 0; i < majpf_args_nr; i++)
2154 rec_argv[j++] = majpf_args[i];
2155
2156 if (trace->trace_pgfaults & TRACE_PFMIN)
2157 for (i = 0; i < minpf_args_nr; i++)
2158 rec_argv[j++] = minpf_args[i];
2159
2160 for (i = 0; i < (unsigned int)argc; i++)
2161 rec_argv[j++] = argv[i];
5e2485b1 2162
b0ad8ea6 2163 return cmd_record(j, rec_argv);
5e2485b1
DA
2164}
2165
bf2575c1
DA
2166static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2167
08c98776 2168static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2169{
ef503831 2170 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2171
2172 if (IS_ERR(evsel))
08c98776 2173 return false;
c522739d
ACM
2174
2175 if (perf_evsel__field(evsel, "pathname") == NULL) {
2176 perf_evsel__delete(evsel);
08c98776 2177 return false;
c522739d
ACM
2178 }
2179
744a9719 2180 evsel->handler = trace__vfs_getname;
c522739d 2181 perf_evlist__add(evlist, evsel);
08c98776 2182 return true;
c522739d
ACM
2183}
2184
0ae537cb 2185static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2186{
2187 struct perf_evsel *evsel;
2188 struct perf_event_attr attr = {
2189 .type = PERF_TYPE_SOFTWARE,
2190 .mmap_data = 1,
598d02c5
SF
2191 };
2192
2193 attr.config = config;
0524798c 2194 attr.sample_period = 1;
598d02c5
SF
2195
2196 event_attr_init(&attr);
2197
2198 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2199 if (evsel)
2200 evsel->handler = trace__pgfault;
598d02c5 2201
0ae537cb 2202 return evsel;
598d02c5
SF
2203}
2204
ddbb1b13
ACM
2205static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2206{
2207 const u32 type = event->header.type;
2208 struct perf_evsel *evsel;
2209
ddbb1b13
ACM
2210 if (type != PERF_RECORD_SAMPLE) {
2211 trace__process_event(trace, trace->host, event, sample);
2212 return;
2213 }
2214
2215 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2216 if (evsel == NULL) {
2217 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2218 return;
2219 }
2220
e6001980
ACM
2221 trace__set_base_time(trace, evsel, sample);
2222
ddbb1b13
ACM
2223 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2224 sample->raw_data == NULL) {
2225 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2226 perf_evsel__name(evsel), sample->tid,
2227 sample->cpu, sample->raw_size);
2228 } else {
2229 tracepoint_handler handler = evsel->handler;
2230 handler(trace, evsel, event, sample);
2231 }
2232}
2233
c27366f0
ACM
2234static int trace__add_syscall_newtp(struct trace *trace)
2235{
2236 int ret = -1;
2237 struct perf_evlist *evlist = trace->evlist;
2238 struct perf_evsel *sys_enter, *sys_exit;
2239
2240 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2241 if (sys_enter == NULL)
2242 goto out;
2243
2244 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2245 goto out_delete_sys_enter;
2246
2247 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2248 if (sys_exit == NULL)
2249 goto out_delete_sys_enter;
2250
2251 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2252 goto out_delete_sys_exit;
2253
08e26396
ACM
2254 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2255 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2256
c27366f0
ACM
2257 perf_evlist__add(evlist, sys_enter);
2258 perf_evlist__add(evlist, sys_exit);
2259
2ddd5c04 2260 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2261 /*
2262 * We're interested only in the user space callchain
2263 * leading to the syscall, allow overriding that for
2264 * debugging reasons using --kernel_syscall_callchains
2265 */
2266 sys_exit->attr.exclude_callchain_kernel = 1;
2267 }
2268
8b3ce757
ACM
2269 trace->syscalls.events.sys_enter = sys_enter;
2270 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2271
2272 ret = 0;
2273out:
2274 return ret;
2275
2276out_delete_sys_exit:
2277 perf_evsel__delete_priv(sys_exit);
2278out_delete_sys_enter:
2279 perf_evsel__delete_priv(sys_enter);
2280 goto out;
2281}
2282
19867b61
ACM
2283static int trace__set_ev_qualifier_filter(struct trace *trace)
2284{
2285 int err = -1;
b15d0a4c 2286 struct perf_evsel *sys_exit;
19867b61
ACM
2287 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2288 trace->ev_qualifier_ids.nr,
2289 trace->ev_qualifier_ids.entries);
2290
2291 if (filter == NULL)
2292 goto out_enomem;
2293
3541c034
MP
2294 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2295 filter)) {
b15d0a4c 2296 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2297 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2298 }
19867b61
ACM
2299
2300 free(filter);
2301out:
2302 return err;
2303out_enomem:
2304 errno = ENOMEM;
2305 goto out;
2306}
c27366f0 2307
dd1a5037
ACM
2308static int trace__set_filter_loop_pids(struct trace *trace)
2309{
082ab9a1 2310 unsigned int nr = 1;
dd1a5037
ACM
2311 pid_t pids[32] = {
2312 getpid(),
2313 };
082ab9a1
ACM
2314 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2315
2316 while (thread && nr < ARRAY_SIZE(pids)) {
2317 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2318
2319 if (parent == NULL)
2320 break;
2321
2322 if (!strcmp(thread__comm_str(parent), "sshd")) {
2323 pids[nr++] = parent->tid;
2324 break;
2325 }
2326 thread = parent;
2327 }
dd1a5037
ACM
2328
2329 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2330}
2331
f15eb531 2332static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2333{
14a052df 2334 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2335 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2336 int err = -1, i;
2337 unsigned long before;
f15eb531 2338 const bool forks = argc > 0;
46fb3c21 2339 bool draining = false;
514f1c67 2340
75b757ca
ACM
2341 trace->live = true;
2342
c27366f0 2343 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2344 goto out_error_raw_syscalls;
514f1c67 2345
e281a960 2346 if (trace->trace_syscalls)
08c98776 2347 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2348
0ae537cb
ACM
2349 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2350 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2351 if (pgfault_maj == NULL)
2352 goto out_error_mem;
08e26396 2353 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
0ae537cb 2354 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2355 }
598d02c5 2356
0ae537cb
ACM
2357 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2358 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2359 if (pgfault_min == NULL)
2360 goto out_error_mem;
08e26396 2361 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
0ae537cb
ACM
2362 perf_evlist__add(evlist, pgfault_min);
2363 }
598d02c5 2364
1302d88e 2365 if (trace->sched &&
2cc990ba
ACM
2366 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2367 trace__sched_stat_runtime))
2368 goto out_error_sched_stat_runtime;
1302d88e 2369
514f1c67
ACM
2370 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2371 if (err < 0) {
c24ff998 2372 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2373 goto out_delete_evlist;
2374 }
2375
752fde44
ACM
2376 err = trace__symbols_init(trace, evlist);
2377 if (err < 0) {
c24ff998 2378 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2379 goto out_delete_evlist;
752fde44
ACM
2380 }
2381
75d50117 2382 perf_evlist__config(evlist, &trace->opts, &callchain_param);
fde54b78 2383
f15eb531
NK
2384 signal(SIGCHLD, sig_handler);
2385 signal(SIGINT, sig_handler);
2386
2387 if (forks) {
6ef73ec4 2388 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2389 argv, false, NULL);
f15eb531 2390 if (err < 0) {
c24ff998 2391 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2392 goto out_delete_evlist;
f15eb531
NK
2393 }
2394 }
2395
514f1c67 2396 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2397 if (err < 0)
2398 goto out_error_open;
514f1c67 2399
ba504235
WN
2400 err = bpf__apply_obj_config();
2401 if (err) {
2402 char errbuf[BUFSIZ];
2403
2404 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2405 pr_err("ERROR: Apply config to BPF failed: %s\n",
2406 errbuf);
2407 goto out_error_open;
2408 }
2409
241b057c
ACM
2410 /*
2411 * Better not use !target__has_task() here because we need to cover the
2412 * case where no threads were specified in the command line, but a
2413 * workload was, and in that case we will fill in the thread_map when
2414 * we fork the workload in perf_evlist__prepare_workload.
2415 */
f078c385
ACM
2416 if (trace->filter_pids.nr > 0)
2417 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2418 else if (thread_map__pid(evlist->threads, 0) == -1)
dd1a5037 2419 err = trace__set_filter_loop_pids(trace);
f078c385 2420
94ad89bc
ACM
2421 if (err < 0)
2422 goto out_error_mem;
2423
19867b61
ACM
2424 if (trace->ev_qualifier_ids.nr > 0) {
2425 err = trace__set_ev_qualifier_filter(trace);
2426 if (err < 0)
2427 goto out_errno;
19867b61 2428
2e5e5f87
ACM
2429 pr_debug("event qualifier tracepoint filter: %s\n",
2430 trace->syscalls.events.sys_exit->filter);
2431 }
19867b61 2432
94ad89bc
ACM
2433 err = perf_evlist__apply_filters(evlist, &evsel);
2434 if (err < 0)
2435 goto out_error_apply_filters;
241b057c 2436
f74b9d3a 2437 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
e09b18d4
ACM
2438 if (err < 0)
2439 goto out_error_mmap;
514f1c67 2440
e36b7821 2441 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2442 perf_evlist__enable(evlist);
2443
f15eb531
NK
2444 if (forks)
2445 perf_evlist__start_workload(evlist);
2446
e36b7821
AB
2447 if (trace->opts.initial_delay) {
2448 usleep(trace->opts.initial_delay * 1000);
2449 perf_evlist__enable(evlist);
2450 }
2451
e13798c7 2452 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2453 evlist->threads->nr > 1 ||
2454 perf_evlist__first(evlist)->attr.inherit;
bd3dda9a
ACM
2455
2456 /*
2457 * Now that we already used evsel->attr to ask the kernel to setup the
2458 * events, lets reuse evsel->attr.sample_max_stack as the limit in
2459 * trace__resolve_callchain(), allowing per-event max-stack settings
2460 * to override an explicitely set --max-stack global setting.
2461 */
2462 evlist__for_each_entry(evlist, evsel) {
2463 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
2464 evsel->attr.sample_max_stack == 0)
2465 evsel->attr.sample_max_stack = trace->max_stack;
2466 }
514f1c67 2467again:
efd5745e 2468 before = trace->nr_events;
514f1c67
ACM
2469
2470 for (i = 0; i < evlist->nr_mmaps; i++) {
2471 union perf_event *event;
2472
2473 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2474 struct perf_sample sample;
514f1c67 2475
efd5745e 2476 ++trace->nr_events;
514f1c67 2477
514f1c67
ACM
2478 err = perf_evlist__parse_sample(evlist, event, &sample);
2479 if (err) {
c24ff998 2480 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2481 goto next_event;
514f1c67
ACM
2482 }
2483
ddbb1b13 2484 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2485next_event:
2486 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2487
ba209f85
ACM
2488 if (interrupted)
2489 goto out_disable;
02ac5421
ACM
2490
2491 if (done && !draining) {
2492 perf_evlist__disable(evlist);
2493 draining = true;
2494 }
514f1c67
ACM
2495 }
2496 }
2497
efd5745e 2498 if (trace->nr_events == before) {
ba209f85 2499 int timeout = done ? 100 : -1;
f15eb531 2500
46fb3c21
ACM
2501 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2502 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2503 draining = true;
2504
ba209f85 2505 goto again;
46fb3c21 2506 }
ba209f85
ACM
2507 } else {
2508 goto again;
f15eb531
NK
2509 }
2510
ba209f85 2511out_disable:
f3b623b8
ACM
2512 thread__zput(trace->current);
2513
ba209f85 2514 perf_evlist__disable(evlist);
514f1c67 2515
c522739d
ACM
2516 if (!err) {
2517 if (trace->summary)
2518 trace__fprintf_thread_summary(trace, trace->output);
2519
2520 if (trace->show_tool_stats) {
2521 fprintf(trace->output, "Stats:\n "
2522 " vfs_getname : %" PRIu64 "\n"
2523 " proc_getname: %" PRIu64 "\n",
2524 trace->stats.vfs_getname,
2525 trace->stats.proc_getname);
2526 }
2527 }
bf2575c1 2528
514f1c67 2529out_delete_evlist:
33974a41
AV
2530 trace__symbols__exit(trace);
2531
514f1c67 2532 perf_evlist__delete(evlist);
14a052df 2533 trace->evlist = NULL;
75b757ca 2534 trace->live = false;
514f1c67 2535 return err;
6ef068cb
ACM
2536{
2537 char errbuf[BUFSIZ];
a8f23d8f 2538
2cc990ba 2539out_error_sched_stat_runtime:
988bdb31 2540 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2541 goto out_error;
2542
801c67b0 2543out_error_raw_syscalls:
988bdb31 2544 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2545 goto out_error;
2546
e09b18d4
ACM
2547out_error_mmap:
2548 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2549 goto out_error;
2550
a8f23d8f
ACM
2551out_error_open:
2552 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2553
2554out_error:
6ef068cb 2555 fprintf(trace->output, "%s\n", errbuf);
87f91868 2556 goto out_delete_evlist;
94ad89bc
ACM
2557
2558out_error_apply_filters:
2559 fprintf(trace->output,
2560 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2561 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2562 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2563 goto out_delete_evlist;
514f1c67 2564}
5ed08dae
ACM
2565out_error_mem:
2566 fprintf(trace->output, "Not enough memory to run!\n");
2567 goto out_delete_evlist;
19867b61
ACM
2568
2569out_errno:
2570 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2571 goto out_delete_evlist;
a8f23d8f 2572}
514f1c67 2573
6810fc91
DA
2574static int trace__replay(struct trace *trace)
2575{
2576 const struct perf_evsel_str_handler handlers[] = {
c522739d 2577 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2578 };
8ceb41d7 2579 struct perf_data data = {
eae8ad80
JO
2580 .file = {
2581 .path = input_name,
2582 },
2583 .mode = PERF_DATA_MODE_READ,
2584 .force = trace->force,
f5fc1412 2585 };
6810fc91 2586 struct perf_session *session;
003824e8 2587 struct perf_evsel *evsel;
6810fc91
DA
2588 int err = -1;
2589
2590 trace->tool.sample = trace__process_sample;
2591 trace->tool.mmap = perf_event__process_mmap;
384c671e 2592 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2593 trace->tool.comm = perf_event__process_comm;
2594 trace->tool.exit = perf_event__process_exit;
2595 trace->tool.fork = perf_event__process_fork;
2596 trace->tool.attr = perf_event__process_attr;
f3b3614a 2597 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2598 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2599 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2600
0a8cb85c 2601 trace->tool.ordered_events = true;
6810fc91
DA
2602 trace->tool.ordering_requires_timestamps = true;
2603
2604 /* add tid to output */
2605 trace->multiple_threads = true;
2606
8ceb41d7 2607 session = perf_session__new(&data, false, &trace->tool);
6810fc91 2608 if (session == NULL)
52e02834 2609 return -1;
6810fc91 2610
aa07df6e
DA
2611 if (trace->opts.target.pid)
2612 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2613
2614 if (trace->opts.target.tid)
2615 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2616
0a7e6d1b 2617 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2618 goto out;
2619
8fb598e5
DA
2620 trace->host = &session->machines.host;
2621
6810fc91
DA
2622 err = perf_session__set_tracepoints_handlers(session, handlers);
2623 if (err)
2624 goto out;
2625
003824e8
NK
2626 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2627 "raw_syscalls:sys_enter");
9aca7f17
DA
2628 /* older kernels have syscalls tp versus raw_syscalls */
2629 if (evsel == NULL)
2630 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2631 "syscalls:sys_enter");
003824e8 2632
e281a960
SF
2633 if (evsel &&
2634 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2635 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2636 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2637 goto out;
2638 }
2639
2640 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2641 "raw_syscalls:sys_exit");
9aca7f17
DA
2642 if (evsel == NULL)
2643 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2644 "syscalls:sys_exit");
e281a960
SF
2645 if (evsel &&
2646 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2647 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2648 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2649 goto out;
2650 }
2651
e5cadb93 2652 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2653 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2654 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2655 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2656 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2657 evsel->handler = trace__pgfault;
2658 }
2659
6810fc91
DA
2660 setup_pager();
2661
b7b61cbe 2662 err = perf_session__process_events(session);
6810fc91
DA
2663 if (err)
2664 pr_err("Failed to process events, error %d", err);
2665
bf2575c1
DA
2666 else if (trace->summary)
2667 trace__fprintf_thread_summary(trace, trace->output);
2668
6810fc91
DA
2669out:
2670 perf_session__delete(session);
2671
2672 return err;
2673}
2674
1302d88e
ACM
2675static size_t trace__fprintf_threads_header(FILE *fp)
2676{
2677 size_t printed;
2678
99ff7150 2679 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2680
2681 return printed;
2682}
2683
b535d523
ACM
2684DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2685 struct stats *stats;
2686 double msecs;
2687 int syscall;
2688)
2689{
2690 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2691 struct stats *stats = source->priv;
2692
2693 entry->syscall = source->i;
2694 entry->stats = stats;
2695 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2696}
2697
bf2575c1
DA
2698static size_t thread__dump_stats(struct thread_trace *ttrace,
2699 struct trace *trace, FILE *fp)
2700{
bf2575c1
DA
2701 size_t printed = 0;
2702 struct syscall *sc;
b535d523
ACM
2703 struct rb_node *nd;
2704 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2705
b535d523 2706 if (syscall_stats == NULL)
bf2575c1
DA
2707 return 0;
2708
2709 printed += fprintf(fp, "\n");
2710
834fd46d
MW
2711 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2712 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2713 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2714
98a91837 2715 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2716 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2717 if (stats) {
2718 double min = (double)(stats->min) / NSEC_PER_MSEC;
2719 double max = (double)(stats->max) / NSEC_PER_MSEC;
2720 double avg = avg_stats(stats);
2721 double pct;
2722 u64 n = (u64) stats->n;
2723
2724 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2725 avg /= NSEC_PER_MSEC;
2726
b535d523 2727 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2728 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2729 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2730 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2731 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2732 }
bf2575c1
DA
2733 }
2734
b535d523 2735 resort_rb__delete(syscall_stats);
bf2575c1 2736 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2737
2738 return printed;
2739}
2740
96c14451 2741static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2742{
96c14451 2743 size_t printed = 0;
89dceb22 2744 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2745 double ratio;
2746
2747 if (ttrace == NULL)
2748 return 0;
2749
2750 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2751
15e65c69 2752 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2753 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2754 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2755 if (ttrace->pfmaj)
2756 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2757 if (ttrace->pfmin)
2758 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2759 if (trace->sched)
2760 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2761 else if (fputc('\n', fp) != EOF)
2762 ++printed;
2763
bf2575c1 2764 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2765
96c14451
ACM
2766 return printed;
2767}
896cbb56 2768
96c14451
ACM
2769static unsigned long thread__nr_events(struct thread_trace *ttrace)
2770{
2771 return ttrace ? ttrace->nr_events : 0;
2772}
2773
2774DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2775 struct thread *thread;
2776)
2777{
2778 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2779}
2780
1302d88e
ACM
2781static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2782{
96c14451
ACM
2783 size_t printed = trace__fprintf_threads_header(fp);
2784 struct rb_node *nd;
91e467bc 2785 int i;
1302d88e 2786
91e467bc
KL
2787 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2788 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
96c14451 2789
91e467bc
KL
2790 if (threads == NULL) {
2791 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2792 return 0;
2793 }
896cbb56 2794
91e467bc
KL
2795 resort_rb__for_each_entry(nd, threads)
2796 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
96c14451 2797
91e467bc
KL
2798 resort_rb__delete(threads);
2799 }
96c14451 2800 return printed;
1302d88e
ACM
2801}
2802
ae9ed035
ACM
2803static int trace__set_duration(const struct option *opt, const char *str,
2804 int unset __maybe_unused)
2805{
2806 struct trace *trace = opt->value;
2807
2808 trace->duration_filter = atof(str);
2809 return 0;
2810}
2811
f078c385
ACM
2812static int trace__set_filter_pids(const struct option *opt, const char *str,
2813 int unset __maybe_unused)
2814{
2815 int ret = -1;
2816 size_t i;
2817 struct trace *trace = opt->value;
2818 /*
2819 * FIXME: introduce a intarray class, plain parse csv and create a
2820 * { int nr, int entries[] } struct...
2821 */
2822 struct intlist *list = intlist__new(str);
2823
2824 if (list == NULL)
2825 return -1;
2826
2827 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2828 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2829
2830 if (trace->filter_pids.entries == NULL)
2831 goto out;
2832
2833 trace->filter_pids.entries[0] = getpid();
2834
2835 for (i = 1; i < trace->filter_pids.nr; ++i)
2836 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2837
2838 intlist__delete(list);
2839 ret = 0;
2840out:
2841 return ret;
2842}
2843
c24ff998
ACM
2844static int trace__open_output(struct trace *trace, const char *filename)
2845{
2846 struct stat st;
2847
2848 if (!stat(filename, &st) && st.st_size) {
2849 char oldname[PATH_MAX];
2850
2851 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2852 unlink(oldname);
2853 rename(filename, oldname);
2854 }
2855
2856 trace->output = fopen(filename, "w");
2857
2858 return trace->output == NULL ? -errno : 0;
2859}
2860
598d02c5
SF
2861static int parse_pagefaults(const struct option *opt, const char *str,
2862 int unset __maybe_unused)
2863{
2864 int *trace_pgfaults = opt->value;
2865
2866 if (strcmp(str, "all") == 0)
2867 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2868 else if (strcmp(str, "maj") == 0)
2869 *trace_pgfaults |= TRACE_PFMAJ;
2870 else if (strcmp(str, "min") == 0)
2871 *trace_pgfaults |= TRACE_PFMIN;
2872 else
2873 return -1;
2874
2875 return 0;
2876}
2877
14a052df
ACM
2878static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2879{
2880 struct perf_evsel *evsel;
2881
e5cadb93 2882 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2883 evsel->handler = handler;
2884}
2885
017037ff
ACM
2886/*
2887 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2888 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2889 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2890 *
2891 * It'd be better to introduce a parse_options() variant that would return a
2892 * list with the terms it didn't match to an event...
2893 */
2894static int trace__parse_events_option(const struct option *opt, const char *str,
2895 int unset __maybe_unused)
2896{
2897 struct trace *trace = (struct trace *)opt->value;
2898 const char *s = str;
2899 char *sep = NULL, *lists[2] = { NULL, NULL, };
27702bcf 2900 int len = strlen(str) + 1, err = -1, list, idx;
017037ff
ACM
2901 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2902 char group_name[PATH_MAX];
2903
2904 if (strace_groups_dir == NULL)
2905 return -1;
2906
2907 if (*s == '!') {
2908 ++s;
2909 trace->not_ev_qualifier = true;
2910 }
2911
2912 while (1) {
2913 if ((sep = strchr(s, ',')) != NULL)
2914 *sep = '\0';
2915
2916 list = 0;
27702bcf
ACM
2917 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
2918 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
017037ff
ACM
2919 list = 1;
2920 } else {
2921 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2922 if (access(group_name, R_OK) == 0)
2923 list = 1;
2924 }
2925
2926 if (lists[list]) {
2927 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2928 } else {
2929 lists[list] = malloc(len);
2930 if (lists[list] == NULL)
2931 goto out;
2932 strcpy(lists[list], s);
2933 }
2934
2935 if (!sep)
2936 break;
2937
2938 *sep = ',';
2939 s = sep + 1;
2940 }
2941
2942 if (lists[1] != NULL) {
2943 struct strlist_config slist_config = {
2944 .dirname = strace_groups_dir,
2945 };
2946
2947 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2948 if (trace->ev_qualifier == NULL) {
2949 fputs("Not enough memory to parse event qualifier", trace->output);
2950 goto out;
2951 }
2952
2953 if (trace__validate_ev_qualifier(trace))
2954 goto out;
2955 }
2956
2957 err = 0;
2958
2959 if (lists[0]) {
2960 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2961 "event selector. use 'perf list' to list available events",
2962 parse_events_option);
2963 err = parse_events_option(&o, lists[0], 0);
2964 }
2965out:
2966 if (sep)
2967 *sep = ',';
2968
2969 return err;
2970}
2971
b0ad8ea6 2972int cmd_trace(int argc, const char **argv)
514f1c67 2973{
6fdd9cb7 2974 const char *trace_usage[] = {
f15eb531
NK
2975 "perf trace [<options>] [<command>]",
2976 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2977 "perf trace record [<options>] [<command>]",
2978 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2979 NULL
2980 };
2981 struct trace trace = {
514f1c67
ACM
2982 .syscalls = {
2983 . max = -1,
2984 },
2985 .opts = {
2986 .target = {
2987 .uid = UINT_MAX,
2988 .uses_mmap = true,
2989 },
2990 .user_freq = UINT_MAX,
2991 .user_interval = ULLONG_MAX,
509051ea 2992 .no_buffering = true,
38d5447d 2993 .mmap_pages = UINT_MAX,
9d9cad76 2994 .proc_map_timeout = 500,
514f1c67 2995 },
007d66a0 2996 .output = stderr,
50c95cbd 2997 .show_comm = true,
e281a960 2998 .trace_syscalls = true,
44621819 2999 .kernel_syscallchains = false,
05614993 3000 .max_stack = UINT_MAX,
514f1c67 3001 };
c24ff998 3002 const char *output_name = NULL;
514f1c67 3003 const struct option trace_options[] = {
017037ff
ACM
3004 OPT_CALLBACK('e', "event", &trace, "event",
3005 "event/syscall selector. use 'perf list' to list available events",
3006 trace__parse_events_option),
50c95cbd
ACM
3007 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3008 "show the thread COMM next to its id"),
c522739d 3009 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
3010 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
3011 trace__parse_events_option),
c24ff998 3012 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 3013 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
3014 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3015 "trace events on existing process id"),
ac9be8ee 3016 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 3017 "trace events on existing thread id"),
fa0e4ffe
ACM
3018 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3019 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 3020 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 3021 "system-wide collection from all CPUs"),
ac9be8ee 3022 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 3023 "list of cpus to monitor"),
6810fc91 3024 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 3025 "child tasks do not inherit counters"),
994a1f78
JO
3026 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3027 "number of mmap data pages",
3028 perf_evlist__parse_mmap_pages),
ac9be8ee 3029 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 3030 "user to profile"),
ae9ed035
ACM
3031 OPT_CALLBACK(0, "duration", &trace, "float",
3032 "show only events with duration > N.M ms",
3033 trace__set_duration),
1302d88e 3034 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 3035 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
3036 OPT_BOOLEAN('T', "time", &trace.full_time,
3037 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
3038 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3039 "Show only syscall summary with statistics"),
3040 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3041 "Show all syscalls and summary with statistics"),
598d02c5
SF
3042 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3043 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 3044 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 3045 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
3046 OPT_CALLBACK(0, "call-graph", &trace.opts,
3047 "record_mode[,record_size]", record_callchain_help,
3048 &record_parse_callchain_opt),
44621819
ACM
3049 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3050 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
3051 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3052 "Set the minimum stack depth when parsing the callchain, "
3053 "anything below the specified depth will be ignored."),
c6d4a494
ACM
3054 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3055 "Set the maximum stack depth when parsing the callchain, "
3056 "anything beyond the specified depth will be ignored. "
4cb93446 3057 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
591421e1
ACM
3058 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
3059 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
9d9cad76
KL
3060 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3061 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
3062 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3063 "ms to wait before starting measurement after program "
3064 "start"),
514f1c67
ACM
3065 OPT_END()
3066 };
ccd62a89 3067 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3068 bool mmap_pages_user_set = true;
6fdd9cb7 3069 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 3070 int err;
32caf0d1 3071 char bf[BUFSIZ];
514f1c67 3072
4d08cb80
ACM
3073 signal(SIGSEGV, sighandler_dump_stack);
3074 signal(SIGFPE, sighandler_dump_stack);
3075
14a052df 3076 trace.evlist = perf_evlist__new();
fd0db102 3077 trace.sctbl = syscalltbl__new();
14a052df 3078
fd0db102 3079 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3080 pr_err("Not enough memory to run!\n");
ff8f695c 3081 err = -ENOMEM;
14a052df
ACM
3082 goto out;
3083 }
3084
6fdd9cb7
YS
3085 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3086 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3087
d7888573
WN
3088 err = bpf__setup_stdout(trace.evlist);
3089 if (err) {
3090 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3091 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3092 goto out;
3093 }
3094
59247e33
ACM
3095 err = -1;
3096
598d02c5
SF
3097 if (trace.trace_pgfaults) {
3098 trace.opts.sample_address = true;
3099 trace.opts.sample_time = true;
3100 }
3101
f3e459d1
ACM
3102 if (trace.opts.mmap_pages == UINT_MAX)
3103 mmap_pages_user_set = false;
3104
05614993 3105 if (trace.max_stack == UINT_MAX) {
fe176085 3106 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
3107 max_stack_user_set = false;
3108 }
3109
3110#ifdef HAVE_DWARF_UNWIND_SUPPORT
75d50117 3111 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
05614993 3112 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
75d50117 3113 }
05614993
ACM
3114#endif
3115
2ddd5c04 3116 if (callchain_param.enabled) {
f3e459d1
ACM
3117 if (!mmap_pages_user_set && geteuid() == 0)
3118 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3119
566a0885 3120 symbol_conf.use_callchain = true;
f3e459d1 3121 }
566a0885 3122
14a052df
ACM
3123 if (trace.evlist->nr_entries > 0)
3124 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3125
1e28fe0a
SF
3126 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3127 return trace__record(&trace, argc-1, &argv[1]);
3128
3129 /* summary_only implies summary option, but don't overwrite summary if set */
3130 if (trace.summary_only)
3131 trace.summary = trace.summary_only;
3132
726f3234
ACM
3133 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3134 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
3135 pr_err("Please specify something to trace.\n");
3136 return -1;
3137 }
3138
017037ff 3139 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
3140 pr_err("The -e option can't be used with --no-syscalls.\n");
3141 goto out;
3142 }
3143
c24ff998
ACM
3144 if (output_name != NULL) {
3145 err = trace__open_output(&trace, output_name);
3146 if (err < 0) {
3147 perror("failed to create output file");
3148 goto out;
3149 }
3150 }
3151
fd0db102
ACM
3152 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3153
602ad878 3154 err = target__validate(&trace.opts.target);
32caf0d1 3155 if (err) {
602ad878 3156 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3157 fprintf(trace.output, "%s", bf);
3158 goto out_close;
32caf0d1
NK
3159 }
3160
602ad878 3161 err = target__parse_uid(&trace.opts.target);
514f1c67 3162 if (err) {
602ad878 3163 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3164 fprintf(trace.output, "%s", bf);
3165 goto out_close;
514f1c67
ACM
3166 }
3167
602ad878 3168 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3169 trace.opts.target.system_wide = true;
3170
6810fc91
DA
3171 if (input_name)
3172 err = trace__replay(&trace);
3173 else
3174 err = trace__run(&trace, argc, argv);
1302d88e 3175
c24ff998
ACM
3176out_close:
3177 if (output_name != NULL)
3178 fclose(trace.output);
3179out:
1302d88e 3180 return err;
514f1c67 3181}