perf trace beauty fcntl: Beautify F_GETLEASE and F_SETLEASE arg/return
[linux-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
5ab8c689 24#include "util/event.h"
514f1c67 25#include "util/evlist.h"
4b6ab94e 26#include <subcmd/exec-cmd.h>
752fde44 27#include "util/machine.h"
9a3993d4 28#include "util/path.h"
6810fc91 29#include "util/session.h"
752fde44 30#include "util/thread.h"
4b6ab94e 31#include <subcmd/parse-options.h>
2ae3a312 32#include "util/strlist.h"
bdc89661 33#include "util/intlist.h"
514f1c67 34#include "util/thread_map.h"
bf2575c1 35#include "util/stat.h"
fd5cead2 36#include "trace/beauty/beauty.h"
97978b3e 37#include "trace-event.h"
9aca7f17 38#include "util/parse-events.h"
ba504235 39#include "util/bpf-loader.h"
566a0885 40#include "callchain.h"
fea01392 41#include "print_binary.h"
a067558e 42#include "string2.h"
fd0db102 43#include "syscalltbl.h"
96c14451 44#include "rb_resort.h"
514f1c67 45
a43783ae 46#include <errno.h>
fd20e811 47#include <inttypes.h>
fd0db102 48#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c
ACM
54#include <linux/filter.h>
55#include <linux/audit.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
514f1c67 60
3d689ed6
ACM
61#include "sane_ctype.h"
62
c188e7ac
ACM
63#ifndef O_CLOEXEC
64# define O_CLOEXEC 02000000
65#endif
66
83a51694
ACM
67#ifndef F_LINUX_SPECIFIC_BASE
68# define F_LINUX_SPECIFIC_BASE 1024
69#endif
70
d1d438a3
ACM
71struct trace {
72 struct perf_tool tool;
fd0db102 73 struct syscalltbl *sctbl;
d1d438a3
ACM
74 struct {
75 int max;
76 struct syscall *table;
77 struct {
78 struct perf_evsel *sys_enter,
79 *sys_exit;
80 } events;
81 } syscalls;
82 struct record_opts opts;
83 struct perf_evlist *evlist;
84 struct machine *host;
85 struct thread *current;
86 u64 base_time;
87 FILE *output;
88 unsigned long nr_events;
89 struct strlist *ev_qualifier;
90 struct {
91 size_t nr;
92 int *entries;
93 } ev_qualifier_ids;
d1d438a3
ACM
94 struct {
95 size_t nr;
96 pid_t *entries;
97 } filter_pids;
98 double duration_filter;
99 double runtime_ms;
100 struct {
101 u64 vfs_getname,
102 proc_getname;
103 } stats;
c6d4a494 104 unsigned int max_stack;
5cf9c84e 105 unsigned int min_stack;
d1d438a3
ACM
106 bool not_ev_qualifier;
107 bool live;
108 bool full_time;
109 bool sched;
110 bool multiple_threads;
111 bool summary;
112 bool summary_only;
113 bool show_comm;
114 bool show_tool_stats;
115 bool trace_syscalls;
44621819 116 bool kernel_syscallchains;
d1d438a3
ACM
117 bool force;
118 bool vfs_getname;
119 int trace_pgfaults;
fd0db102 120 int open_id;
d1d438a3 121};
a1c2552d 122
77170988
ACM
123struct tp_field {
124 int offset;
125 union {
126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
128 };
129};
130
131#define TP_UINT_FIELD(bits) \
132static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
55d43bca
DA
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return value; \
77170988
ACM
137}
138
139TP_UINT_FIELD(8);
140TP_UINT_FIELD(16);
141TP_UINT_FIELD(32);
142TP_UINT_FIELD(64);
143
144#define TP_UINT_FIELD__SWAPPED(bits) \
145static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
146{ \
55d43bca
DA
147 u##bits value; \
148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
149 return bswap_##bits(value);\
150}
151
152TP_UINT_FIELD__SWAPPED(16);
153TP_UINT_FIELD__SWAPPED(32);
154TP_UINT_FIELD__SWAPPED(64);
155
156static int tp_field__init_uint(struct tp_field *field,
157 struct format_field *format_field,
158 bool needs_swap)
159{
160 field->offset = format_field->offset;
161
162 switch (format_field->size) {
163 case 1:
164 field->integer = tp_field__u8;
165 break;
166 case 2:
167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
168 break;
169 case 4:
170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
171 break;
172 case 8:
173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
174 break;
175 default:
176 return -1;
177 }
178
179 return 0;
180}
181
182static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
183{
184 return sample->raw_data + field->offset;
185}
186
187static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
188{
189 field->offset = format_field->offset;
190 field->pointer = tp_field__ptr;
191 return 0;
192}
193
194struct syscall_tp {
195 struct tp_field id;
196 union {
197 struct tp_field args, ret;
198 };
199};
200
201static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202 struct tp_field *field,
203 const char *name)
204{
205 struct format_field *format_field = perf_evsel__field(evsel, name);
206
207 if (format_field == NULL)
208 return -1;
209
210 return tp_field__init_uint(field, format_field, evsel->needs_swap);
211}
212
213#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
216
217static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218 struct tp_field *field,
219 const char *name)
220{
221 struct format_field *format_field = perf_evsel__field(evsel, name);
222
223 if (format_field == NULL)
224 return -1;
225
226 return tp_field__init_ptr(field, format_field);
227}
228
229#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230 ({ struct syscall_tp *sc = evsel->priv;\
231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
232
233static void perf_evsel__delete_priv(struct perf_evsel *evsel)
234{
04662523 235 zfree(&evsel->priv);
77170988
ACM
236 perf_evsel__delete(evsel);
237}
238
96695d44
NK
239static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
240{
241 evsel->priv = malloc(sizeof(struct syscall_tp));
242 if (evsel->priv != NULL) {
243 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
244 goto out_delete;
245
246 evsel->handler = handler;
247 return 0;
248 }
249
250 return -ENOMEM;
251
252out_delete:
04662523 253 zfree(&evsel->priv);
96695d44
NK
254 return -ENOENT;
255}
256
ef503831 257static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 258{
ef503831 259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 260
9aca7f17 261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 262 if (IS_ERR(evsel))
9aca7f17
DA
263 evsel = perf_evsel__newtp("syscalls", direction);
264
8dd2a131
JO
265 if (IS_ERR(evsel))
266 return NULL;
267
268 if (perf_evsel__init_syscall_tp(evsel, handler))
269 goto out_delete;
77170988
ACM
270
271 return evsel;
272
273out_delete:
274 perf_evsel__delete_priv(evsel);
275 return NULL;
276}
277
278#define perf_evsel__sc_tp_uint(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.integer(&fields->name, sample); })
281
282#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.pointer(&fields->name, sample); })
285
0ae79636
ACM
286size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
287{
288 int idx = val - sa->offset;
1f115cb7 289
0ae79636
ACM
290 if (idx < 0 || idx >= sa->nr_entries)
291 return scnprintf(bf, size, intfmt, val);
1f115cb7 292
0ae79636 293 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
0ae79636 300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
301}
302
975b7c2f
ACM
303static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
305{
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
307}
308
1f115cb7
ACM
309#define SCA_STRARRAY syscall_arg__scnprintf_strarray
310
83a51694
ACM
311struct strarrays {
312 int nr_entries;
313 struct strarray **entries;
314};
315
316#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317 .nr_entries = ARRAY_SIZE(array), \
318 .entries = array, \
319}
320
274e86fd
ACM
321size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322 struct syscall_arg *arg)
83a51694
ACM
323{
324 struct strarrays *sas = arg->parm;
325 int i;
326
327 for (i = 0; i < sas->nr_entries; ++i) {
328 struct strarray *sa = sas->entries[i];
329 int idx = arg->val - sa->offset;
330
331 if (idx >= 0 && idx < sa->nr_entries) {
332 if (sa->entries[idx] == NULL)
333 break;
334 return scnprintf(bf, size, "%s", sa->entries[idx]);
335 }
336 }
337
338 return scnprintf(bf, size, "%d", arg->val);
339}
340
844ae5b4
ACM
341#if defined(__i386__) || defined(__x86_64__)
342/*
343 * FIXME: Make this available to all arches as soon as the ioctl beautifier
344 * gets rewritten to support all arches.
345 */
78645cf3
ACM
346static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
347 struct syscall_arg *arg)
348{
349 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
350}
351
352#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 353#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 354
48e1f91a
ACM
355#ifndef AT_FDCWD
356#define AT_FDCWD -100
357#endif
358
75b757ca
ACM
359static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
360 struct syscall_arg *arg)
361{
362 int fd = arg->val;
363
364 if (fd == AT_FDCWD)
365 return scnprintf(bf, size, "CWD");
366
367 return syscall_arg__scnprintf_fd(bf, size, arg);
368}
369
370#define SCA_FDAT syscall_arg__scnprintf_fd_at
371
372static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
373 struct syscall_arg *arg);
374
375#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
376
2c2b1623 377size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 378{
01533e97 379 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
380}
381
2c2b1623 382size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
383{
384 return scnprintf(bf, size, "%d", arg->val);
385}
386
5dde91ed
ACM
387size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
388{
389 return scnprintf(bf, size, "%ld", arg->val);
390}
391
729a7841
ACM
392static const char *bpf_cmd[] = {
393 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
394 "MAP_GET_NEXT_KEY", "PROG_LOAD",
395};
396static DEFINE_STRARRAY(bpf_cmd);
397
03e3adc9
ACM
398static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
399static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 400
1f115cb7
ACM
401static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
402static DEFINE_STRARRAY(itimers);
403
b62bee1b
ACM
404static const char *keyctl_options[] = {
405 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
406 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
407 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
408 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
409 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
410};
411static DEFINE_STRARRAY(keyctl_options);
412
efe6b882
ACM
413static const char *whences[] = { "SET", "CUR", "END",
414#ifdef SEEK_DATA
415"DATA",
416#endif
417#ifdef SEEK_HOLE
418"HOLE",
419#endif
420};
421static DEFINE_STRARRAY(whences);
f9da0b0c 422
80f587d5
ACM
423static const char *fcntl_cmds[] = {
424 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
425 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
426 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
427 "GETOWNER_UIDS",
80f587d5
ACM
428};
429static DEFINE_STRARRAY(fcntl_cmds);
430
83a51694
ACM
431static const char *fcntl_linux_specific_cmds[] = {
432 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
433 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 434 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
435};
436
437static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
438
439static struct strarray *fcntl_cmds_arrays[] = {
440 &strarray__fcntl_cmds,
441 &strarray__fcntl_linux_specific_cmds,
442};
443
444static DEFINE_STRARRAYS(fcntl_cmds_arrays);
445
c045bf02
ACM
446static const char *rlimit_resources[] = {
447 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
448 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
449 "RTTIME",
450};
451static DEFINE_STRARRAY(rlimit_resources);
452
eb5b1b14
ACM
453static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
454static DEFINE_STRARRAY(sighow);
455
4f8c1b74
DA
456static const char *clockid[] = {
457 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
458 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
459 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
460};
461static DEFINE_STRARRAY(clockid);
462
e10bce81
ACM
463static const char *socket_families[] = {
464 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
465 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
466 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
467 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
468 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
469 "ALG", "NFC", "VSOCK",
470};
471static DEFINE_STRARRAY(socket_families);
472
51108999
ACM
473static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
474 struct syscall_arg *arg)
475{
476 size_t printed = 0;
477 int mode = arg->val;
478
479 if (mode == F_OK) /* 0 */
480 return scnprintf(bf, size, "F");
481#define P_MODE(n) \
482 if (mode & n##_OK) { \
483 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
484 mode &= ~n##_OK; \
485 }
486
487 P_MODE(R);
488 P_MODE(W);
489 P_MODE(X);
490#undef P_MODE
491
492 if (mode)
493 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
494
495 return printed;
496}
497
498#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
499
f994592d
ACM
500static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
501 struct syscall_arg *arg);
502
503#define SCA_FILENAME syscall_arg__scnprintf_filename
504
46cce19b
ACM
505static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
506 struct syscall_arg *arg)
507{
508 int printed = 0, flags = arg->val;
509
510#define P_FLAG(n) \
511 if (flags & O_##n) { \
512 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513 flags &= ~O_##n; \
514 }
515
516 P_FLAG(CLOEXEC);
517 P_FLAG(NONBLOCK);
518#undef P_FLAG
519
520 if (flags)
521 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
522
523 return printed;
524}
525
526#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
527
844ae5b4
ACM
528#if defined(__i386__) || defined(__x86_64__)
529/*
530 * FIXME: Make this available to all arches.
531 */
78645cf3
ACM
532#define TCGETS 0x5401
533
534static const char *tioctls[] = {
535 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
536 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
537 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
538 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
539 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
540 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
541 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
542 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
543 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
544 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
545 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
546 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
547 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
548 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
549 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
550};
551
552static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 553#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 554
a355a61e
ACM
555#ifndef GRND_NONBLOCK
556#define GRND_NONBLOCK 0x0001
557#endif
558#ifndef GRND_RANDOM
559#define GRND_RANDOM 0x0002
560#endif
561
39878d49
ACM
562static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
563 struct syscall_arg *arg)
564{
565 int printed = 0, flags = arg->val;
566
567#define P_FLAG(n) \
568 if (flags & GRND_##n) { \
569 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 flags &= ~GRND_##n; \
571 }
572
573 P_FLAG(RANDOM);
574 P_FLAG(NONBLOCK);
575#undef P_FLAG
576
577 if (flags)
578 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
579
580 return printed;
581}
582
583#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
584
453350dd
ACM
585#define STRARRAY(arg, name, array) \
586 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
587 .arg_parm = { [arg] = &strarray__##array, }
588
ea8dc3ce 589#include "trace/beauty/eventfd.c"
8bf382ce 590#include "trace/beauty/flock.c"
d5d71e86 591#include "trace/beauty/futex_op.c"
df4cb167 592#include "trace/beauty/mmap.c"
ba2f22cf 593#include "trace/beauty/mode_t.c"
a30e6259 594#include "trace/beauty/msg_flags.c"
8f48df69 595#include "trace/beauty/open_flags.c"
62de344e 596#include "trace/beauty/perf_event_open.c"
d5d71e86 597#include "trace/beauty/pid.c"
a3bca91f 598#include "trace/beauty/sched_policy.c"
f5cd95ea 599#include "trace/beauty/seccomp.c"
12199d8e 600#include "trace/beauty/signum.c"
bbf86c43 601#include "trace/beauty/socket_type.c"
7206b900 602#include "trace/beauty/waitid_options.c"
a3bca91f 603
514f1c67
ACM
604static struct syscall_fmt {
605 const char *name;
aec1930b 606 const char *alias;
01533e97 607 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 608 void *arg_parm[6];
514f1c67 609 bool errmsg;
11c8e39f 610 bool errpid;
514f1c67 611 bool timeout;
04b34729 612 bool hexret;
514f1c67 613} syscall_fmts[] = {
51108999 614 { .name = "access", .errmsg = true,
12f3ca4f 615 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 616 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 617 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
618 { .name = "brk", .hexret = true,
619 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
620 { .name = "chdir", .errmsg = true, },
621 { .name = "chmod", .errmsg = true, },
622 { .name = "chroot", .errmsg = true, },
4f8c1b74 623 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 624 { .name = "clone", .errpid = true, },
75b757ca 625 { .name = "close", .errmsg = true,
48000a1a 626 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 627 { .name = "connect", .errmsg = true, },
12f3ca4f 628 { .name = "creat", .errmsg = true, },
b6565c90
ACM
629 { .name = "dup", .errmsg = true, },
630 { .name = "dup2", .errmsg = true, },
631 { .name = "dup3", .errmsg = true, },
453350dd 632 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
633 { .name = "eventfd2", .errmsg = true,
634 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 635 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
636 { .name = "fadvise64", .errmsg = true, },
637 { .name = "fallocate", .errmsg = true, },
638 { .name = "fchdir", .errmsg = true, },
639 { .name = "fchmod", .errmsg = true, },
75b757ca 640 { .name = "fchmodat", .errmsg = true,
12f3ca4f 641 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 642 { .name = "fchown", .errmsg = true, },
75b757ca 643 { .name = "fchownat", .errmsg = true,
12f3ca4f 644 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 645 { .name = "fcntl", .errmsg = true,
9c47f667
ACM
646 .arg_scnprintf = { [1] = SCA_FCNTL_CMD, /* cmd */
647 [2] = SCA_FCNTL_ARG, /* arg */ },
83a51694 648 .arg_parm = { [1] = &strarrays__fcntl_cmds_arrays, /* cmd */ }, },
b6565c90 649 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 650 { .name = "flock", .errmsg = true,
b6565c90
ACM
651 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
652 { .name = "fsetxattr", .errmsg = true, },
653 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 654 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
655 { .name = "fstatfs", .errmsg = true, },
656 { .name = "fsync", .errmsg = true, },
657 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
658 { .name = "futex", .errmsg = true,
659 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 660 { .name = "futimesat", .errmsg = true,
12f3ca4f 661 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
662 { .name = "getdents", .errmsg = true, },
663 { .name = "getdents64", .errmsg = true, },
453350dd 664 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 665 { .name = "getpid", .errpid = true, },
d1d438a3 666 { .name = "getpgid", .errpid = true, },
c65f1070 667 { .name = "getppid", .errpid = true, },
39878d49
ACM
668 { .name = "getrandom", .errmsg = true,
669 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 670 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
671 { .name = "getxattr", .errmsg = true, },
672 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 673 { .name = "ioctl", .errmsg = true,
b6565c90 674 .arg_scnprintf = {
844ae5b4
ACM
675#if defined(__i386__) || defined(__x86_64__)
676/*
677 * FIXME: Make this available to all arches.
678 */
78645cf3
ACM
679 [1] = SCA_STRHEXARRAY, /* cmd */
680 [2] = SCA_HEX, /* arg */ },
681 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
682#else
683 [2] = SCA_HEX, /* arg */ }, },
684#endif
b62bee1b 685 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
686 { .name = "kill", .errmsg = true,
687 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
688 { .name = "lchown", .errmsg = true, },
689 { .name = "lgetxattr", .errmsg = true, },
75b757ca 690 { .name = "linkat", .errmsg = true,
48000a1a 691 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
692 { .name = "listxattr", .errmsg = true, },
693 { .name = "llistxattr", .errmsg = true, },
694 { .name = "lremovexattr", .errmsg = true, },
75b757ca 695 { .name = "lseek", .errmsg = true,
b6565c90 696 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 697 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
698 { .name = "lsetxattr", .errmsg = true, },
699 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
700 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
701 { .name = "madvise", .errmsg = true,
702 .arg_scnprintf = { [0] = SCA_HEX, /* start */
703 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 704 { .name = "mkdir", .errmsg = true, },
75b757ca 705 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
706 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
707 { .name = "mknod", .errmsg = true, },
75b757ca 708 { .name = "mknodat", .errmsg = true,
12f3ca4f 709 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
710 { .name = "mlock", .errmsg = true,
711 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
712 { .name = "mlockall", .errmsg = true,
713 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 714 { .name = "mmap", .hexret = true,
54265664
JO
715/* The standard mmap maps to old_mmap on s390x */
716#if defined(__s390x__)
717 .alias = "old_mmap",
718#endif
ae685380 719 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 720 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 721 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 722 { .name = "mprotect", .errmsg = true,
ae685380
ACM
723 .arg_scnprintf = { [0] = SCA_HEX, /* start */
724 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
725 { .name = "mq_unlink", .errmsg = true,
726 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
727 { .name = "mremap", .hexret = true,
728 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 729 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 730 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
731 { .name = "munlock", .errmsg = true,
732 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
733 { .name = "munmap", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 735 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 736 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 737 { .name = "newfstatat", .errmsg = true,
12f3ca4f 738 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 739 { .name = "open", .errmsg = true,
12f3ca4f 740 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 741 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
742 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
743 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 744 { .name = "openat", .errmsg = true,
75b757ca
ACM
745 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
746 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 747 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 748 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
749 [3] = SCA_FD, /* group_fd */
750 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
751 { .name = "pipe2", .errmsg = true,
752 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
753 { .name = "poll", .errmsg = true, .timeout = true, },
754 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
755 { .name = "pread", .errmsg = true, .alias = "pread64", },
756 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 757 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
758 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
759 { .name = "pwritev", .errmsg = true, },
760 { .name = "read", .errmsg = true, },
12f3ca4f 761 { .name = "readlink", .errmsg = true, },
75b757ca 762 { .name = "readlinkat", .errmsg = true,
12f3ca4f 763 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 764 { .name = "readv", .errmsg = true, },
b2cc99fd 765 { .name = "recvfrom", .errmsg = true,
b6565c90 766 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 767 { .name = "recvmmsg", .errmsg = true,
b6565c90 768 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 769 { .name = "recvmsg", .errmsg = true,
b6565c90 770 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 771 { .name = "removexattr", .errmsg = true, },
75b757ca 772 { .name = "renameat", .errmsg = true,
48000a1a 773 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 774 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
775 { .name = "rt_sigaction", .errmsg = true,
776 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 777 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
778 { .name = "rt_sigqueueinfo", .errmsg = true,
779 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
780 { .name = "rt_tgsigqueueinfo", .errmsg = true,
781 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
782 { .name = "sched_getattr", .errmsg = true, },
783 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
784 { .name = "sched_setscheduler", .errmsg = true,
785 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
786 { .name = "seccomp", .errmsg = true,
787 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
788 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 789 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 790 { .name = "sendmmsg", .errmsg = true,
b6565c90 791 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 792 { .name = "sendmsg", .errmsg = true,
b6565c90 793 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 794 { .name = "sendto", .errmsg = true,
b6565c90 795 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 796 { .name = "set_tid_address", .errpid = true, },
453350dd 797 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 798 { .name = "setpgid", .errmsg = true, },
453350dd 799 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 800 { .name = "setxattr", .errmsg = true, },
b6565c90 801 { .name = "shutdown", .errmsg = true, },
e10bce81 802 { .name = "socket", .errmsg = true,
a28b24b2
ACM
803 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
804 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
805 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
806 { .name = "socketpair", .errmsg = true,
807 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
808 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 809 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
810 { .name = "stat", .errmsg = true, .alias = "newstat", },
811 { .name = "statfs", .errmsg = true, },
fd5cead2
ACM
812 { .name = "statx", .errmsg = true,
813 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
814 [2] = SCA_STATX_FLAGS, /* flags */
815 [3] = SCA_STATX_MASK, /* mask */ }, },
34221118
ACM
816 { .name = "swapoff", .errmsg = true,
817 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
818 { .name = "swapon", .errmsg = true,
819 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 820 { .name = "symlinkat", .errmsg = true,
48000a1a 821 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
822 { .name = "tgkill", .errmsg = true,
823 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
824 { .name = "tkill", .errmsg = true,
825 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 826 { .name = "truncate", .errmsg = true, },
e5959683 827 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 828 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
829 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
830 { .name = "utime", .errmsg = true, },
75b757ca 831 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
832 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
833 { .name = "utimes", .errmsg = true, },
b6565c90 834 { .name = "vmsplice", .errmsg = true, },
11c8e39f 835 { .name = "wait4", .errpid = true,
7206b900 836 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 837 { .name = "waitid", .errpid = true,
7206b900 838 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
839 { .name = "write", .errmsg = true, },
840 { .name = "writev", .errmsg = true, },
514f1c67
ACM
841};
842
843static int syscall_fmt__cmp(const void *name, const void *fmtp)
844{
845 const struct syscall_fmt *fmt = fmtp;
846 return strcmp(name, fmt->name);
847}
848
849static struct syscall_fmt *syscall_fmt__find(const char *name)
850{
851 const int nmemb = ARRAY_SIZE(syscall_fmts);
852 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
853}
854
855struct syscall {
856 struct event_format *tp_format;
f208bd8d
ACM
857 int nr_args;
858 struct format_field *args;
514f1c67 859 const char *name;
5089f20e 860 bool is_exit;
514f1c67 861 struct syscall_fmt *fmt;
01533e97 862 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 863 void **arg_parm;
514f1c67
ACM
864};
865
fd2b2975
ACM
866/*
867 * We need to have this 'calculated' boolean because in some cases we really
868 * don't know what is the duration of a syscall, for instance, when we start
869 * a session and some threads are waiting for a syscall to finish, say 'poll',
870 * in which case all we can do is to print "( ? ) for duration and for the
871 * start timestamp.
872 */
873static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
874{
875 double duration = (double)t / NSEC_PER_MSEC;
876 size_t printed = fprintf(fp, "(");
877
fd2b2975
ACM
878 if (!calculated)
879 printed += fprintf(fp, " ? ");
880 else if (duration >= 1.0)
60c907ab
ACM
881 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
882 else if (duration >= 0.01)
883 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
884 else
885 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 886 return printed + fprintf(fp, "): ");
60c907ab
ACM
887}
888
f994592d
ACM
889/**
890 * filename.ptr: The filename char pointer that will be vfs_getname'd
891 * filename.entry_str_pos: Where to insert the string translated from
892 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
893 * ret_scnprintf: syscall args may set this to a different syscall return
894 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 895 */
752fde44
ACM
896struct thread_trace {
897 u64 entry_time;
752fde44 898 bool entry_pending;
efd5745e 899 unsigned long nr_events;
a2ea67d7 900 unsigned long pfmaj, pfmin;
752fde44 901 char *entry_str;
1302d88e 902 double runtime_ms;
7ee57434 903 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
904 struct {
905 unsigned long ptr;
7f4f8001
ACM
906 short int entry_str_pos;
907 bool pending_open;
908 unsigned int namelen;
909 char *name;
f994592d 910 } filename;
75b757ca
ACM
911 struct {
912 int max;
913 char **table;
914 } paths;
bf2575c1
DA
915
916 struct intlist *syscall_stats;
752fde44
ACM
917};
918
919static struct thread_trace *thread_trace__new(void)
920{
75b757ca
ACM
921 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
922
923 if (ttrace)
924 ttrace->paths.max = -1;
925
bf2575c1
DA
926 ttrace->syscall_stats = intlist__new(NULL);
927
75b757ca 928 return ttrace;
752fde44
ACM
929}
930
c24ff998 931static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 932{
efd5745e
ACM
933 struct thread_trace *ttrace;
934
752fde44
ACM
935 if (thread == NULL)
936 goto fail;
937
89dceb22
NK
938 if (thread__priv(thread) == NULL)
939 thread__set_priv(thread, thread_trace__new());
48000a1a 940
89dceb22 941 if (thread__priv(thread) == NULL)
752fde44
ACM
942 goto fail;
943
89dceb22 944 ttrace = thread__priv(thread);
efd5745e
ACM
945 ++ttrace->nr_events;
946
947 return ttrace;
752fde44 948fail:
c24ff998 949 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
950 "WARNING: not enough memory, dropping samples!\n");
951 return NULL;
952}
953
84486caa
ACM
954
955void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 956 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
957{
958 struct thread_trace *ttrace = thread__priv(arg->thread);
959
960 ttrace->ret_scnprintf = ret_scnprintf;
961}
962
598d02c5
SF
963#define TRACE_PFMAJ (1 << 0)
964#define TRACE_PFMIN (1 << 1)
965
e4d44e83
ACM
966static const size_t trace__entry_str_size = 2048;
967
97119f37 968static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 969{
89dceb22 970 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
971
972 if (fd > ttrace->paths.max) {
973 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
974
975 if (npath == NULL)
976 return -1;
977
978 if (ttrace->paths.max != -1) {
979 memset(npath + ttrace->paths.max + 1, 0,
980 (fd - ttrace->paths.max) * sizeof(char *));
981 } else {
982 memset(npath, 0, (fd + 1) * sizeof(char *));
983 }
984
985 ttrace->paths.table = npath;
986 ttrace->paths.max = fd;
987 }
988
989 ttrace->paths.table[fd] = strdup(pathname);
990
991 return ttrace->paths.table[fd] != NULL ? 0 : -1;
992}
993
97119f37
ACM
994static int thread__read_fd_path(struct thread *thread, int fd)
995{
996 char linkname[PATH_MAX], pathname[PATH_MAX];
997 struct stat st;
998 int ret;
999
1000 if (thread->pid_ == thread->tid) {
1001 scnprintf(linkname, sizeof(linkname),
1002 "/proc/%d/fd/%d", thread->pid_, fd);
1003 } else {
1004 scnprintf(linkname, sizeof(linkname),
1005 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1006 }
1007
1008 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1009 return -1;
1010
1011 ret = readlink(linkname, pathname, sizeof(pathname));
1012
1013 if (ret < 0 || ret > st.st_size)
1014 return -1;
1015
1016 pathname[ret] = '\0';
1017 return trace__set_fd_pathname(thread, fd, pathname);
1018}
1019
c522739d
ACM
1020static const char *thread__fd_path(struct thread *thread, int fd,
1021 struct trace *trace)
75b757ca 1022{
89dceb22 1023 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
1024
1025 if (ttrace == NULL)
1026 return NULL;
1027
1028 if (fd < 0)
1029 return NULL;
1030
cdcd1e6b 1031 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
1032 if (!trace->live)
1033 return NULL;
1034 ++trace->stats.proc_getname;
cdcd1e6b 1035 if (thread__read_fd_path(thread, fd))
c522739d
ACM
1036 return NULL;
1037 }
75b757ca
ACM
1038
1039 return ttrace->paths.table[fd];
1040}
1041
fc65eb82 1042size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1043{
1044 int fd = arg->val;
1045 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1046 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1047
1048 if (path)
1049 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1050
1051 return printed;
1052}
1053
1054static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1055 struct syscall_arg *arg)
1056{
1057 int fd = arg->val;
1058 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1059 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1060
04662523
ACM
1061 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1062 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1063
1064 return printed;
1065}
1066
f994592d
ACM
1067static void thread__set_filename_pos(struct thread *thread, const char *bf,
1068 unsigned long ptr)
1069{
1070 struct thread_trace *ttrace = thread__priv(thread);
1071
1072 ttrace->filename.ptr = ptr;
1073 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1074}
1075
1076static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1077 struct syscall_arg *arg)
1078{
1079 unsigned long ptr = arg->val;
1080
1081 if (!arg->trace->vfs_getname)
1082 return scnprintf(bf, size, "%#x", ptr);
1083
1084 thread__set_filename_pos(arg->thread, bf, ptr);
1085 return 0;
1086}
1087
ae9ed035
ACM
1088static bool trace__filter_duration(struct trace *trace, double t)
1089{
1090 return t < (trace->duration_filter * NSEC_PER_MSEC);
1091}
1092
fd2b2975 1093static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1094{
1095 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1096
60c907ab 1097 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1098}
1099
fd2b2975
ACM
1100/*
1101 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1102 * using ttrace->entry_time for a thread that receives a sys_exit without
1103 * first having received a sys_enter ("poll" issued before tracing session
1104 * starts, lost sys_enter exit due to ring buffer overflow).
1105 */
1106static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1107{
1108 if (tstamp > 0)
1109 return __trace__fprintf_tstamp(trace, tstamp, fp);
1110
1111 return fprintf(fp, " ? ");
1112}
1113
f15eb531 1114static bool done = false;
ba209f85 1115static bool interrupted = false;
f15eb531 1116
ba209f85 1117static void sig_handler(int sig)
f15eb531
NK
1118{
1119 done = true;
ba209f85 1120 interrupted = sig == SIGINT;
f15eb531
NK
1121}
1122
752fde44 1123static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1124 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1125{
1126 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1127 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1128
50c95cbd
ACM
1129 if (trace->multiple_threads) {
1130 if (trace->show_comm)
1902efe7 1131 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1132 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1133 }
752fde44
ACM
1134
1135 return printed;
1136}
1137
c24ff998 1138static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1139 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1140{
1141 int ret = 0;
1142
1143 switch (event->header.type) {
1144 case PERF_RECORD_LOST:
c24ff998 1145 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1146 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1147 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1148 break;
752fde44 1149 default:
162f0bef 1150 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1151 break;
1152 }
1153
1154 return ret;
1155}
1156
c24ff998 1157static int trace__tool_process(struct perf_tool *tool,
752fde44 1158 union perf_event *event,
162f0bef 1159 struct perf_sample *sample,
752fde44
ACM
1160 struct machine *machine)
1161{
c24ff998 1162 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1163 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1164}
1165
caf8a0d0
ACM
1166static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1167{
1168 struct machine *machine = vmachine;
1169
1170 if (machine->kptr_restrict_warned)
1171 return NULL;
1172
1173 if (symbol_conf.kptr_restrict) {
1174 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1175 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1176 "Kernel samples will not be resolved.\n");
1177 machine->kptr_restrict_warned = true;
1178 return NULL;
1179 }
1180
1181 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1182}
1183
752fde44
ACM
1184static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1185{
0a7e6d1b 1186 int err = symbol__init(NULL);
752fde44
ACM
1187
1188 if (err)
1189 return err;
1190
8fb598e5
DA
1191 trace->host = machine__new_host();
1192 if (trace->host == NULL)
1193 return -ENOMEM;
752fde44 1194
caf8a0d0 1195 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1196 return -errno;
1197
a33fbd56 1198 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1199 evlist->threads, trace__tool_process, false,
1200 trace->opts.proc_map_timeout);
752fde44
ACM
1201 if (err)
1202 symbol__exit();
1203
1204 return err;
1205}
1206
13d4ff3e
ACM
1207static int syscall__set_arg_fmts(struct syscall *sc)
1208{
1209 struct format_field *field;
b6565c90 1210 int idx = 0, len;
13d4ff3e 1211
f208bd8d 1212 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1213 if (sc->arg_scnprintf == NULL)
1214 return -1;
1215
1f115cb7
ACM
1216 if (sc->fmt)
1217 sc->arg_parm = sc->fmt->arg_parm;
1218
f208bd8d 1219 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1220 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1221 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1222 else if (strcmp(field->type, "const char *") == 0 &&
1223 (strcmp(field->name, "filename") == 0 ||
1224 strcmp(field->name, "path") == 0 ||
1225 strcmp(field->name, "pathname") == 0))
1226 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1227 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1228 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1229 else if (strcmp(field->type, "pid_t") == 0)
1230 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1231 else if (strcmp(field->type, "umode_t") == 0)
1232 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1233 else if ((strcmp(field->type, "int") == 0 ||
1234 strcmp(field->type, "unsigned int") == 0 ||
1235 strcmp(field->type, "long") == 0) &&
1236 (len = strlen(field->name)) >= 2 &&
1237 strcmp(field->name + len - 2, "fd") == 0) {
1238 /*
1239 * /sys/kernel/tracing/events/syscalls/sys_enter*
1240 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1241 * 65 int
1242 * 23 unsigned int
1243 * 7 unsigned long
1244 */
1245 sc->arg_scnprintf[idx] = SCA_FD;
1246 }
13d4ff3e
ACM
1247 ++idx;
1248 }
1249
1250 return 0;
1251}
1252
514f1c67
ACM
1253static int trace__read_syscall_info(struct trace *trace, int id)
1254{
1255 char tp_name[128];
1256 struct syscall *sc;
fd0db102 1257 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1258
1259 if (name == NULL)
1260 return -1;
514f1c67
ACM
1261
1262 if (id > trace->syscalls.max) {
1263 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1264
1265 if (nsyscalls == NULL)
1266 return -1;
1267
1268 if (trace->syscalls.max != -1) {
1269 memset(nsyscalls + trace->syscalls.max + 1, 0,
1270 (id - trace->syscalls.max) * sizeof(*sc));
1271 } else {
1272 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1273 }
1274
1275 trace->syscalls.table = nsyscalls;
1276 trace->syscalls.max = id;
1277 }
1278
1279 sc = trace->syscalls.table + id;
3a531260 1280 sc->name = name;
2ae3a312 1281
3a531260 1282 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1283
aec1930b 1284 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1285 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1286
8dd2a131 1287 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1288 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1289 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1290 }
514f1c67 1291
8dd2a131 1292 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1293 return -1;
1294
f208bd8d
ACM
1295 sc->args = sc->tp_format->format.fields;
1296 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1297 /*
1298 * We need to check and discard the first variable '__syscall_nr'
1299 * or 'nr' that mean the syscall number. It is needless here.
1300 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1301 */
1302 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1303 sc->args = sc->args->next;
1304 --sc->nr_args;
1305 }
1306
5089f20e
ACM
1307 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1308
13d4ff3e 1309 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1310}
1311
d0cc439b
ACM
1312static int trace__validate_ev_qualifier(struct trace *trace)
1313{
8b3ce757 1314 int err = 0, i;
d0cc439b
ACM
1315 struct str_node *pos;
1316
8b3ce757
ACM
1317 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1318 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1319 sizeof(trace->ev_qualifier_ids.entries[0]));
1320
1321 if (trace->ev_qualifier_ids.entries == NULL) {
1322 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1323 trace->output);
1324 err = -EINVAL;
1325 goto out;
1326 }
1327
1328 i = 0;
1329
602a1f4d 1330 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1331 const char *sc = pos->s;
fd0db102 1332 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1333
8b3ce757 1334 if (id < 0) {
d0cc439b
ACM
1335 if (err == 0) {
1336 fputs("Error:\tInvalid syscall ", trace->output);
1337 err = -EINVAL;
1338 } else {
1339 fputs(", ", trace->output);
1340 }
1341
1342 fputs(sc, trace->output);
1343 }
8b3ce757
ACM
1344
1345 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1346 }
1347
1348 if (err < 0) {
1349 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1350 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1351 zfree(&trace->ev_qualifier_ids.entries);
1352 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1353 }
8b3ce757 1354out:
d0cc439b
ACM
1355 return err;
1356}
1357
55d43bca
DA
1358/*
1359 * args is to be interpreted as a series of longs but we need to handle
1360 * 8-byte unaligned accesses. args points to raw_data within the event
1361 * and raw_data is guaranteed to be 8-byte unaligned because it is
1362 * preceded by raw_size which is a u32. So we need to copy args to a temp
1363 * variable to read it. Most notably this avoids extended load instructions
1364 * on unaligned addresses
1365 */
f9f83b33
ACM
1366static unsigned long __syscall_arg__val(unsigned char *args, u8 idx)
1367{
1368 unsigned long val;
1369 unsigned char *p = args + sizeof(unsigned long) * idx;
1370
1371 memcpy(&val, p, sizeof(val));
1372 return val;
1373}
1374
1375unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1376{
1377 return __syscall_arg__val(arg->args, idx);
1378}
55d43bca 1379
752fde44 1380static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1381 unsigned char *args, struct trace *trace,
75b757ca 1382 struct thread *thread)
514f1c67 1383{
514f1c67 1384 size_t printed = 0;
55d43bca 1385 unsigned long val;
84486caa
ACM
1386 struct thread_trace *ttrace = thread__priv(thread);
1387
1388 /*
1389 * Things like fcntl will set this in its 'cmd' formatter to pick the
1390 * right formatter for the return value (an fd? file flags?), which is
1391 * not needed for syscalls that always return a given type, say an fd.
1392 */
1393 ttrace->ret_scnprintf = NULL;
514f1c67 1394
f208bd8d 1395 if (sc->args != NULL) {
514f1c67 1396 struct format_field *field;
01533e97
ACM
1397 u8 bit = 1;
1398 struct syscall_arg arg = {
f9f83b33 1399 .args = args,
75b757ca
ACM
1400 .idx = 0,
1401 .mask = 0,
1402 .trace = trace,
1403 .thread = thread,
01533e97 1404 };
6e7eeb51 1405
f208bd8d 1406 for (field = sc->args; field;
01533e97
ACM
1407 field = field->next, ++arg.idx, bit <<= 1) {
1408 if (arg.mask & bit)
6e7eeb51 1409 continue;
55d43bca 1410
f9f83b33 1411 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1412
4aa58232
ACM
1413 /*
1414 * Suppress this argument if its value is zero and
1415 * and we don't have a string associated in an
1416 * strarray for it.
1417 */
55d43bca 1418 if (val == 0 &&
4aa58232 1419 !(sc->arg_scnprintf &&
83a51694
ACM
1420 (sc->arg_scnprintf[arg.idx] == SCA_STRARRAY ||
1421 sc->arg_scnprintf[arg.idx] == SCA_STRARRAYS) &&
4aa58232 1422 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1423 continue;
1424
752fde44 1425 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1426 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1427 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1428 arg.val = val;
1f115cb7
ACM
1429 if (sc->arg_parm)
1430 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1431 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1432 size - printed, &arg);
6e7eeb51 1433 } else {
13d4ff3e 1434 printed += scnprintf(bf + printed, size - printed,
55d43bca 1435 "%ld", val);
6e7eeb51 1436 }
514f1c67 1437 }
4c4d6e51
ACM
1438 } else if (IS_ERR(sc->tp_format)) {
1439 /*
1440 * If we managed to read the tracepoint /format file, then we
1441 * may end up not having any args, like with gettid(), so only
1442 * print the raw args when we didn't manage to read it.
1443 */
01533e97
ACM
1444 int i = 0;
1445
514f1c67 1446 while (i < 6) {
f9f83b33 1447 val = __syscall_arg__val(args, i);
752fde44
ACM
1448 printed += scnprintf(bf + printed, size - printed,
1449 "%sarg%d: %ld",
55d43bca 1450 printed ? ", " : "", i, val);
514f1c67
ACM
1451 ++i;
1452 }
1453 }
1454
1455 return printed;
1456}
1457
ba3d7dee 1458typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1459 union perf_event *event,
ba3d7dee
ACM
1460 struct perf_sample *sample);
1461
1462static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1463 struct perf_evsel *evsel, int id)
ba3d7dee 1464{
ba3d7dee
ACM
1465
1466 if (id < 0) {
adaa18bf
ACM
1467
1468 /*
1469 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1470 * before that, leaving at a higher verbosity level till that is
1471 * explained. Reproduced with plain ftrace with:
1472 *
1473 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1474 * grep "NR -1 " /t/trace_pipe
1475 *
1476 * After generating some load on the machine.
1477 */
1478 if (verbose > 1) {
1479 static u64 n;
1480 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1481 id, perf_evsel__name(evsel), ++n);
1482 }
ba3d7dee
ACM
1483 return NULL;
1484 }
1485
1486 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1487 trace__read_syscall_info(trace, id))
1488 goto out_cant_read;
1489
1490 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1491 goto out_cant_read;
1492
1493 return &trace->syscalls.table[id];
1494
1495out_cant_read:
bb963e16 1496 if (verbose > 0) {
7c304ee0
ACM
1497 fprintf(trace->output, "Problems reading syscall %d", id);
1498 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1499 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1500 fputs(" information\n", trace->output);
1501 }
ba3d7dee
ACM
1502 return NULL;
1503}
1504
bf2575c1
DA
1505static void thread__update_stats(struct thread_trace *ttrace,
1506 int id, struct perf_sample *sample)
1507{
1508 struct int_node *inode;
1509 struct stats *stats;
1510 u64 duration = 0;
1511
1512 inode = intlist__findnew(ttrace->syscall_stats, id);
1513 if (inode == NULL)
1514 return;
1515
1516 stats = inode->priv;
1517 if (stats == NULL) {
1518 stats = malloc(sizeof(struct stats));
1519 if (stats == NULL)
1520 return;
1521 init_stats(stats);
1522 inode->priv = stats;
1523 }
1524
1525 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1526 duration = sample->time - ttrace->entry_time;
1527
1528 update_stats(stats, duration);
1529}
1530
e596663e
ACM
1531static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1532{
1533 struct thread_trace *ttrace;
1534 u64 duration;
1535 size_t printed;
1536
1537 if (trace->current == NULL)
1538 return 0;
1539
1540 ttrace = thread__priv(trace->current);
1541
1542 if (!ttrace->entry_pending)
1543 return 0;
1544
1545 duration = sample->time - ttrace->entry_time;
1546
fd2b2975 1547 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1548 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1549 ttrace->entry_pending = false;
1550
1551 return printed;
1552}
1553
ba3d7dee 1554static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1555 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1556 struct perf_sample *sample)
1557{
752fde44 1558 char *msg;
ba3d7dee 1559 void *args;
752fde44 1560 size_t printed = 0;
2ae3a312 1561 struct thread *thread;
b91fc39f 1562 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1563 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1564 struct thread_trace *ttrace;
1565
1566 if (sc == NULL)
1567 return -1;
ba3d7dee 1568
8fb598e5 1569 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1570 ttrace = thread__trace(thread, trace->output);
2ae3a312 1571 if (ttrace == NULL)
b91fc39f 1572 goto out_put;
ba3d7dee 1573
77170988 1574 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1575
1576 if (ttrace->entry_str == NULL) {
e4d44e83 1577 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1578 if (!ttrace->entry_str)
b91fc39f 1579 goto out_put;
752fde44
ACM
1580 }
1581
5cf9c84e 1582 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1583 trace__printf_interrupted_entry(trace, sample);
e596663e 1584
752fde44
ACM
1585 ttrace->entry_time = sample->time;
1586 msg = ttrace->entry_str;
e4d44e83 1587 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1588
e4d44e83 1589 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1590 args, trace, thread);
752fde44 1591
5089f20e 1592 if (sc->is_exit) {
5cf9c84e 1593 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1594 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1595 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1596 }
7f4f8001 1597 } else {
752fde44 1598 ttrace->entry_pending = true;
7f4f8001
ACM
1599 /* See trace__vfs_getname & trace__sys_exit */
1600 ttrace->filename.pending_open = false;
1601 }
ba3d7dee 1602
f3b623b8
ACM
1603 if (trace->current != thread) {
1604 thread__put(trace->current);
1605 trace->current = thread__get(thread);
1606 }
b91fc39f
ACM
1607 err = 0;
1608out_put:
1609 thread__put(thread);
1610 return err;
ba3d7dee
ACM
1611}
1612
5cf9c84e
ACM
1613static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1614 struct perf_sample *sample,
1615 struct callchain_cursor *cursor)
202ff968
ACM
1616{
1617 struct addr_location al;
5cf9c84e
ACM
1618
1619 if (machine__resolve(trace->host, &al, sample) < 0 ||
1620 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1621 return -1;
1622
1623 return 0;
1624}
1625
1626static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1627{
202ff968 1628 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1629 const unsigned int print_opts = EVSEL__PRINT_SYM |
1630 EVSEL__PRINT_DSO |
1631 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1632
d327e60c 1633 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1634}
1635
ba3d7dee 1636static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1637 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1638 struct perf_sample *sample)
1639{
2c82c3ad 1640 long ret;
60c907ab 1641 u64 duration = 0;
fd2b2975 1642 bool duration_calculated = false;
2ae3a312 1643 struct thread *thread;
5cf9c84e 1644 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1645 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1646 struct thread_trace *ttrace;
1647
1648 if (sc == NULL)
1649 return -1;
ba3d7dee 1650
8fb598e5 1651 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1652 ttrace = thread__trace(thread, trace->output);
2ae3a312 1653 if (ttrace == NULL)
b91fc39f 1654 goto out_put;
ba3d7dee 1655
bf2575c1
DA
1656 if (trace->summary)
1657 thread__update_stats(ttrace, id, sample);
1658
77170988 1659 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1660
fd0db102 1661 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1662 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1663 ttrace->filename.pending_open = false;
c522739d
ACM
1664 ++trace->stats.vfs_getname;
1665 }
1666
ae9ed035 1667 if (ttrace->entry_time) {
60c907ab 1668 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1669 if (trace__filter_duration(trace, duration))
1670 goto out;
fd2b2975 1671 duration_calculated = true;
ae9ed035
ACM
1672 } else if (trace->duration_filter)
1673 goto out;
60c907ab 1674
5cf9c84e
ACM
1675 if (sample->callchain) {
1676 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1677 if (callchain_ret == 0) {
1678 if (callchain_cursor.nr < trace->min_stack)
1679 goto out;
1680 callchain_ret = 1;
1681 }
1682 }
1683
fd2eabaf
DA
1684 if (trace->summary_only)
1685 goto out;
1686
fd2b2975 1687 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1688
1689 if (ttrace->entry_pending) {
c24ff998 1690 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1691 } else {
c24ff998
ACM
1692 fprintf(trace->output, " ... [");
1693 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1694 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1695 }
1696
da3c9a44
ACM
1697 if (sc->fmt == NULL) {
1698signed_print:
2c82c3ad 1699 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1700 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1701 char bf[STRERR_BUFSIZE];
c8b5f2c9 1702 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1703 *e = audit_errno_to_name(-ret);
1704
c24ff998 1705 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1706 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1707 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1708 else if (ttrace->ret_scnprintf) {
1709 char bf[1024];
7ee57434
ACM
1710 struct syscall_arg arg = {
1711 .val = ret,
1712 .thread = thread,
1713 .trace = trace,
1714 };
1715 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1716 ttrace->ret_scnprintf = NULL;
1717 fprintf(trace->output, ") = %s", bf);
1718 } else if (sc->fmt->hexret)
2c82c3ad 1719 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1720 else if (sc->fmt->errpid) {
1721 struct thread *child = machine__find_thread(trace->host, ret, ret);
1722
1723 if (child != NULL) {
1724 fprintf(trace->output, ") = %ld", ret);
1725 if (child->comm_set)
1726 fprintf(trace->output, " (%s)", thread__comm_str(child));
1727 thread__put(child);
1728 }
1729 } else
da3c9a44 1730 goto signed_print;
ba3d7dee 1731
c24ff998 1732 fputc('\n', trace->output);
566a0885 1733
5cf9c84e
ACM
1734 if (callchain_ret > 0)
1735 trace__fprintf_callchain(trace, sample);
1736 else if (callchain_ret < 0)
1737 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1738out:
752fde44 1739 ttrace->entry_pending = false;
b91fc39f
ACM
1740 err = 0;
1741out_put:
1742 thread__put(thread);
1743 return err;
ba3d7dee
ACM
1744}
1745
c522739d 1746static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1747 union perf_event *event __maybe_unused,
c522739d
ACM
1748 struct perf_sample *sample)
1749{
f994592d
ACM
1750 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1751 struct thread_trace *ttrace;
1752 size_t filename_len, entry_str_len, to_move;
1753 ssize_t remaining_space;
1754 char *pos;
7f4f8001 1755 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1756
1757 if (!thread)
1758 goto out;
1759
1760 ttrace = thread__priv(thread);
1761 if (!ttrace)
ef65e96e 1762 goto out_put;
f994592d 1763
7f4f8001 1764 filename_len = strlen(filename);
39f0e7a8 1765 if (filename_len == 0)
ef65e96e 1766 goto out_put;
7f4f8001
ACM
1767
1768 if (ttrace->filename.namelen < filename_len) {
1769 char *f = realloc(ttrace->filename.name, filename_len + 1);
1770
1771 if (f == NULL)
ef65e96e 1772 goto out_put;
7f4f8001
ACM
1773
1774 ttrace->filename.namelen = filename_len;
1775 ttrace->filename.name = f;
1776 }
1777
1778 strcpy(ttrace->filename.name, filename);
1779 ttrace->filename.pending_open = true;
1780
f994592d 1781 if (!ttrace->filename.ptr)
ef65e96e 1782 goto out_put;
f994592d
ACM
1783
1784 entry_str_len = strlen(ttrace->entry_str);
1785 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1786 if (remaining_space <= 0)
ef65e96e 1787 goto out_put;
f994592d 1788
f994592d
ACM
1789 if (filename_len > (size_t)remaining_space) {
1790 filename += filename_len - remaining_space;
1791 filename_len = remaining_space;
1792 }
1793
1794 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1795 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1796 memmove(pos + filename_len, pos, to_move);
1797 memcpy(pos, filename, filename_len);
1798
1799 ttrace->filename.ptr = 0;
1800 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1801out_put:
1802 thread__put(thread);
f994592d 1803out:
c522739d
ACM
1804 return 0;
1805}
1806
1302d88e 1807static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1808 union perf_event *event __maybe_unused,
1302d88e
ACM
1809 struct perf_sample *sample)
1810{
1811 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1812 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1813 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1814 sample->pid,
1815 sample->tid);
c24ff998 1816 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1817
1818 if (ttrace == NULL)
1819 goto out_dump;
1820
1821 ttrace->runtime_ms += runtime_ms;
1822 trace->runtime_ms += runtime_ms;
ef65e96e 1823out_put:
b91fc39f 1824 thread__put(thread);
1302d88e
ACM
1825 return 0;
1826
1827out_dump:
c24ff998 1828 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1829 evsel->name,
1830 perf_evsel__strval(evsel, sample, "comm"),
1831 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1832 runtime,
1833 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1834 goto out_put;
1302d88e
ACM
1835}
1836
1d6c9407
WN
1837static void bpf_output__printer(enum binary_printer_ops op,
1838 unsigned int val, void *extra)
1839{
1840 FILE *output = extra;
1841 unsigned char ch = (unsigned char)val;
1842
1843 switch (op) {
1844 case BINARY_PRINT_CHAR_DATA:
1845 fprintf(output, "%c", isprint(ch) ? ch : '.');
1846 break;
1847 case BINARY_PRINT_DATA_BEGIN:
1848 case BINARY_PRINT_LINE_BEGIN:
1849 case BINARY_PRINT_ADDR:
1850 case BINARY_PRINT_NUM_DATA:
1851 case BINARY_PRINT_NUM_PAD:
1852 case BINARY_PRINT_SEP:
1853 case BINARY_PRINT_CHAR_PAD:
1854 case BINARY_PRINT_LINE_END:
1855 case BINARY_PRINT_DATA_END:
1856 default:
1857 break;
1858 }
1859}
1860
1861static void bpf_output__fprintf(struct trace *trace,
1862 struct perf_sample *sample)
1863{
1864 print_binary(sample->raw_data, sample->raw_size, 8,
1865 bpf_output__printer, trace->output);
1866}
1867
14a052df
ACM
1868static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1869 union perf_event *event __maybe_unused,
1870 struct perf_sample *sample)
1871{
7ad35615
ACM
1872 int callchain_ret = 0;
1873
1874 if (sample->callchain) {
1875 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1876 if (callchain_ret == 0) {
1877 if (callchain_cursor.nr < trace->min_stack)
1878 goto out;
1879 callchain_ret = 1;
1880 }
1881 }
1882
14a052df
ACM
1883 trace__printf_interrupted_entry(trace, sample);
1884 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1885
1886 if (trace->trace_syscalls)
1887 fprintf(trace->output, "( ): ");
1888
1889 fprintf(trace->output, "%s:", evsel->name);
14a052df 1890
1d6c9407
WN
1891 if (perf_evsel__is_bpf_output(evsel)) {
1892 bpf_output__fprintf(trace, sample);
1893 } else if (evsel->tp_format) {
14a052df
ACM
1894 event_format__fprintf(evsel->tp_format, sample->cpu,
1895 sample->raw_data, sample->raw_size,
1896 trace->output);
1897 }
1898
1899 fprintf(trace->output, ")\n");
202ff968 1900
7ad35615
ACM
1901 if (callchain_ret > 0)
1902 trace__fprintf_callchain(trace, sample);
1903 else if (callchain_ret < 0)
1904 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1905out:
14a052df
ACM
1906 return 0;
1907}
1908
598d02c5
SF
1909static void print_location(FILE *f, struct perf_sample *sample,
1910 struct addr_location *al,
1911 bool print_dso, bool print_sym)
1912{
1913
bb963e16 1914 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1915 fprintf(f, "%s@", al->map->dso->long_name);
1916
bb963e16 1917 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1918 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1919 al->addr - al->sym->start);
1920 else if (al->map)
4414a3c5 1921 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1922 else
4414a3c5 1923 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1924}
1925
1926static int trace__pgfault(struct trace *trace,
1927 struct perf_evsel *evsel,
473398a2 1928 union perf_event *event __maybe_unused,
598d02c5
SF
1929 struct perf_sample *sample)
1930{
1931 struct thread *thread;
598d02c5
SF
1932 struct addr_location al;
1933 char map_type = 'd';
a2ea67d7 1934 struct thread_trace *ttrace;
b91fc39f 1935 int err = -1;
1df54290 1936 int callchain_ret = 0;
598d02c5
SF
1937
1938 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1939
1940 if (sample->callchain) {
1941 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1942 if (callchain_ret == 0) {
1943 if (callchain_cursor.nr < trace->min_stack)
1944 goto out_put;
1945 callchain_ret = 1;
1946 }
1947 }
1948
a2ea67d7
SF
1949 ttrace = thread__trace(thread, trace->output);
1950 if (ttrace == NULL)
b91fc39f 1951 goto out_put;
a2ea67d7
SF
1952
1953 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1954 ttrace->pfmaj++;
1955 else
1956 ttrace->pfmin++;
1957
1958 if (trace->summary_only)
b91fc39f 1959 goto out;
598d02c5 1960
473398a2 1961 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1962 sample->ip, &al);
1963
fd2b2975 1964 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1965
1966 fprintf(trace->output, "%sfault [",
1967 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1968 "maj" : "min");
1969
1970 print_location(trace->output, sample, &al, false, true);
1971
1972 fprintf(trace->output, "] => ");
1973
473398a2 1974 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1975 sample->addr, &al);
1976
1977 if (!al.map) {
473398a2 1978 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1979 MAP__FUNCTION, sample->addr, &al);
1980
1981 if (al.map)
1982 map_type = 'x';
1983 else
1984 map_type = '?';
1985 }
1986
1987 print_location(trace->output, sample, &al, true, false);
1988
1989 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1990
1df54290
ACM
1991 if (callchain_ret > 0)
1992 trace__fprintf_callchain(trace, sample);
1993 else if (callchain_ret < 0)
1994 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1995out:
1996 err = 0;
1997out_put:
1998 thread__put(thread);
1999 return err;
598d02c5
SF
2000}
2001
e6001980 2002static void trace__set_base_time(struct trace *trace,
8a07a809 2003 struct perf_evsel *evsel,
e6001980
ACM
2004 struct perf_sample *sample)
2005{
8a07a809
ACM
2006 /*
2007 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2008 * and don't use sample->time unconditionally, we may end up having
2009 * some other event in the future without PERF_SAMPLE_TIME for good
2010 * reason, i.e. we may not be interested in its timestamps, just in
2011 * it taking place, picking some piece of information when it
2012 * appears in our event stream (vfs_getname comes to mind).
2013 */
2014 if (trace->base_time == 0 && !trace->full_time &&
2015 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2016 trace->base_time = sample->time;
2017}
2018
6810fc91 2019static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2020 union perf_event *event,
6810fc91
DA
2021 struct perf_sample *sample,
2022 struct perf_evsel *evsel,
2023 struct machine *machine __maybe_unused)
2024{
2025 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2026 struct thread *thread;
6810fc91
DA
2027 int err = 0;
2028
744a9719 2029 tracepoint_handler handler = evsel->handler;
6810fc91 2030
aa07df6e
DA
2031 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2032 if (thread && thread__is_filtered(thread))
ef65e96e 2033 goto out;
bdc89661 2034
e6001980 2035 trace__set_base_time(trace, evsel, sample);
6810fc91 2036
3160565f
DA
2037 if (handler) {
2038 ++trace->nr_events;
0c82adcf 2039 handler(trace, evsel, event, sample);
3160565f 2040 }
ef65e96e
ACM
2041out:
2042 thread__put(thread);
6810fc91
DA
2043 return err;
2044}
2045
1e28fe0a 2046static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2047{
2048 unsigned int rec_argc, i, j;
2049 const char **rec_argv;
2050 const char * const record_args[] = {
2051 "record",
2052 "-R",
2053 "-m", "1024",
2054 "-c", "1",
5e2485b1
DA
2055 };
2056
1e28fe0a
SF
2057 const char * const sc_args[] = { "-e", };
2058 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2059 const char * const majpf_args[] = { "-e", "major-faults" };
2060 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2061 const char * const minpf_args[] = { "-e", "minor-faults" };
2062 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2063
9aca7f17 2064 /* +1 is for the event string below */
1e28fe0a
SF
2065 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2066 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2067 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2068
2069 if (rec_argv == NULL)
2070 return -ENOMEM;
2071
1e28fe0a 2072 j = 0;
5e2485b1 2073 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2074 rec_argv[j++] = record_args[i];
2075
e281a960
SF
2076 if (trace->trace_syscalls) {
2077 for (i = 0; i < sc_args_nr; i++)
2078 rec_argv[j++] = sc_args[i];
2079
2080 /* event string may be different for older kernels - e.g., RHEL6 */
2081 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2082 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2083 else if (is_valid_tracepoint("syscalls:sys_enter"))
2084 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2085 else {
2086 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2087 return -1;
2088 }
9aca7f17 2089 }
9aca7f17 2090
1e28fe0a
SF
2091 if (trace->trace_pgfaults & TRACE_PFMAJ)
2092 for (i = 0; i < majpf_args_nr; i++)
2093 rec_argv[j++] = majpf_args[i];
2094
2095 if (trace->trace_pgfaults & TRACE_PFMIN)
2096 for (i = 0; i < minpf_args_nr; i++)
2097 rec_argv[j++] = minpf_args[i];
2098
2099 for (i = 0; i < (unsigned int)argc; i++)
2100 rec_argv[j++] = argv[i];
5e2485b1 2101
b0ad8ea6 2102 return cmd_record(j, rec_argv);
5e2485b1
DA
2103}
2104
bf2575c1
DA
2105static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2106
08c98776 2107static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2108{
ef503831 2109 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2110
2111 if (IS_ERR(evsel))
08c98776 2112 return false;
c522739d
ACM
2113
2114 if (perf_evsel__field(evsel, "pathname") == NULL) {
2115 perf_evsel__delete(evsel);
08c98776 2116 return false;
c522739d
ACM
2117 }
2118
744a9719 2119 evsel->handler = trace__vfs_getname;
c522739d 2120 perf_evlist__add(evlist, evsel);
08c98776 2121 return true;
c522739d
ACM
2122}
2123
0ae537cb 2124static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2125{
2126 struct perf_evsel *evsel;
2127 struct perf_event_attr attr = {
2128 .type = PERF_TYPE_SOFTWARE,
2129 .mmap_data = 1,
598d02c5
SF
2130 };
2131
2132 attr.config = config;
0524798c 2133 attr.sample_period = 1;
598d02c5
SF
2134
2135 event_attr_init(&attr);
2136
2137 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2138 if (evsel)
2139 evsel->handler = trace__pgfault;
598d02c5 2140
0ae537cb 2141 return evsel;
598d02c5
SF
2142}
2143
ddbb1b13
ACM
2144static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2145{
2146 const u32 type = event->header.type;
2147 struct perf_evsel *evsel;
2148
ddbb1b13
ACM
2149 if (type != PERF_RECORD_SAMPLE) {
2150 trace__process_event(trace, trace->host, event, sample);
2151 return;
2152 }
2153
2154 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2155 if (evsel == NULL) {
2156 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2157 return;
2158 }
2159
e6001980
ACM
2160 trace__set_base_time(trace, evsel, sample);
2161
ddbb1b13
ACM
2162 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2163 sample->raw_data == NULL) {
2164 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2165 perf_evsel__name(evsel), sample->tid,
2166 sample->cpu, sample->raw_size);
2167 } else {
2168 tracepoint_handler handler = evsel->handler;
2169 handler(trace, evsel, event, sample);
2170 }
2171}
2172
c27366f0
ACM
2173static int trace__add_syscall_newtp(struct trace *trace)
2174{
2175 int ret = -1;
2176 struct perf_evlist *evlist = trace->evlist;
2177 struct perf_evsel *sys_enter, *sys_exit;
2178
2179 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2180 if (sys_enter == NULL)
2181 goto out;
2182
2183 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2184 goto out_delete_sys_enter;
2185
2186 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2187 if (sys_exit == NULL)
2188 goto out_delete_sys_enter;
2189
2190 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2191 goto out_delete_sys_exit;
2192
2193 perf_evlist__add(evlist, sys_enter);
2194 perf_evlist__add(evlist, sys_exit);
2195
2ddd5c04 2196 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2197 /*
2198 * We're interested only in the user space callchain
2199 * leading to the syscall, allow overriding that for
2200 * debugging reasons using --kernel_syscall_callchains
2201 */
2202 sys_exit->attr.exclude_callchain_kernel = 1;
2203 }
2204
8b3ce757
ACM
2205 trace->syscalls.events.sys_enter = sys_enter;
2206 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2207
2208 ret = 0;
2209out:
2210 return ret;
2211
2212out_delete_sys_exit:
2213 perf_evsel__delete_priv(sys_exit);
2214out_delete_sys_enter:
2215 perf_evsel__delete_priv(sys_enter);
2216 goto out;
2217}
2218
19867b61
ACM
2219static int trace__set_ev_qualifier_filter(struct trace *trace)
2220{
2221 int err = -1;
b15d0a4c 2222 struct perf_evsel *sys_exit;
19867b61
ACM
2223 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2224 trace->ev_qualifier_ids.nr,
2225 trace->ev_qualifier_ids.entries);
2226
2227 if (filter == NULL)
2228 goto out_enomem;
2229
3541c034
MP
2230 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2231 filter)) {
b15d0a4c 2232 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2233 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2234 }
19867b61
ACM
2235
2236 free(filter);
2237out:
2238 return err;
2239out_enomem:
2240 errno = ENOMEM;
2241 goto out;
2242}
c27366f0 2243
f15eb531 2244static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2245{
14a052df 2246 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2247 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2248 int err = -1, i;
2249 unsigned long before;
f15eb531 2250 const bool forks = argc > 0;
46fb3c21 2251 bool draining = false;
514f1c67 2252
75b757ca
ACM
2253 trace->live = true;
2254
c27366f0 2255 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2256 goto out_error_raw_syscalls;
514f1c67 2257
e281a960 2258 if (trace->trace_syscalls)
08c98776 2259 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2260
0ae537cb
ACM
2261 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2262 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2263 if (pgfault_maj == NULL)
2264 goto out_error_mem;
2265 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2266 }
598d02c5 2267
0ae537cb
ACM
2268 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2269 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2270 if (pgfault_min == NULL)
2271 goto out_error_mem;
2272 perf_evlist__add(evlist, pgfault_min);
2273 }
598d02c5 2274
1302d88e 2275 if (trace->sched &&
2cc990ba
ACM
2276 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2277 trace__sched_stat_runtime))
2278 goto out_error_sched_stat_runtime;
1302d88e 2279
514f1c67
ACM
2280 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2281 if (err < 0) {
c24ff998 2282 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2283 goto out_delete_evlist;
2284 }
2285
752fde44
ACM
2286 err = trace__symbols_init(trace, evlist);
2287 if (err < 0) {
c24ff998 2288 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2289 goto out_delete_evlist;
752fde44
ACM
2290 }
2291
fde54b78
ACM
2292 perf_evlist__config(evlist, &trace->opts, NULL);
2293
0c3a6ef4
ACM
2294 if (callchain_param.enabled) {
2295 bool use_identifier = false;
2296
2297 if (trace->syscalls.events.sys_exit) {
2298 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2299 &trace->opts, &callchain_param);
2300 use_identifier = true;
2301 }
2302
2303 if (pgfault_maj) {
2304 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2305 use_identifier = true;
2306 }
2307
2308 if (pgfault_min) {
2309 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2310 use_identifier = true;
2311 }
2312
2313 if (use_identifier) {
2314 /*
2315 * Now we have evsels with different sample_ids, use
2316 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2317 * from a fixed position in each ring buffer record.
2318 *
2319 * As of this the changeset introducing this comment, this
2320 * isn't strictly needed, as the fields that can come before
2321 * PERF_SAMPLE_ID are all used, but we'll probably disable
2322 * some of those for things like copying the payload of
2323 * pointer syscall arguments, and for vfs_getname we don't
2324 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2325 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2326 */
2327 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2328 perf_evlist__reset_sample_bit(evlist, ID);
2329 }
fde54b78 2330 }
514f1c67 2331
f15eb531
NK
2332 signal(SIGCHLD, sig_handler);
2333 signal(SIGINT, sig_handler);
2334
2335 if (forks) {
6ef73ec4 2336 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2337 argv, false, NULL);
f15eb531 2338 if (err < 0) {
c24ff998 2339 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2340 goto out_delete_evlist;
f15eb531
NK
2341 }
2342 }
2343
514f1c67 2344 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2345 if (err < 0)
2346 goto out_error_open;
514f1c67 2347
ba504235
WN
2348 err = bpf__apply_obj_config();
2349 if (err) {
2350 char errbuf[BUFSIZ];
2351
2352 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2353 pr_err("ERROR: Apply config to BPF failed: %s\n",
2354 errbuf);
2355 goto out_error_open;
2356 }
2357
241b057c
ACM
2358 /*
2359 * Better not use !target__has_task() here because we need to cover the
2360 * case where no threads were specified in the command line, but a
2361 * workload was, and in that case we will fill in the thread_map when
2362 * we fork the workload in perf_evlist__prepare_workload.
2363 */
f078c385
ACM
2364 if (trace->filter_pids.nr > 0)
2365 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2366 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2367 err = perf_evlist__set_filter_pid(evlist, getpid());
2368
94ad89bc
ACM
2369 if (err < 0)
2370 goto out_error_mem;
2371
19867b61
ACM
2372 if (trace->ev_qualifier_ids.nr > 0) {
2373 err = trace__set_ev_qualifier_filter(trace);
2374 if (err < 0)
2375 goto out_errno;
19867b61 2376
2e5e5f87
ACM
2377 pr_debug("event qualifier tracepoint filter: %s\n",
2378 trace->syscalls.events.sys_exit->filter);
2379 }
19867b61 2380
94ad89bc
ACM
2381 err = perf_evlist__apply_filters(evlist, &evsel);
2382 if (err < 0)
2383 goto out_error_apply_filters;
241b057c 2384
f885037e 2385 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2386 if (err < 0)
2387 goto out_error_mmap;
514f1c67 2388
e36b7821 2389 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2390 perf_evlist__enable(evlist);
2391
f15eb531
NK
2392 if (forks)
2393 perf_evlist__start_workload(evlist);
2394
e36b7821
AB
2395 if (trace->opts.initial_delay) {
2396 usleep(trace->opts.initial_delay * 1000);
2397 perf_evlist__enable(evlist);
2398 }
2399
e13798c7 2400 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2401 evlist->threads->nr > 1 ||
2402 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2403again:
efd5745e 2404 before = trace->nr_events;
514f1c67
ACM
2405
2406 for (i = 0; i < evlist->nr_mmaps; i++) {
2407 union perf_event *event;
2408
2409 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2410 struct perf_sample sample;
514f1c67 2411
efd5745e 2412 ++trace->nr_events;
514f1c67 2413
514f1c67
ACM
2414 err = perf_evlist__parse_sample(evlist, event, &sample);
2415 if (err) {
c24ff998 2416 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2417 goto next_event;
514f1c67
ACM
2418 }
2419
ddbb1b13 2420 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2421next_event:
2422 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2423
ba209f85
ACM
2424 if (interrupted)
2425 goto out_disable;
02ac5421
ACM
2426
2427 if (done && !draining) {
2428 perf_evlist__disable(evlist);
2429 draining = true;
2430 }
514f1c67
ACM
2431 }
2432 }
2433
efd5745e 2434 if (trace->nr_events == before) {
ba209f85 2435 int timeout = done ? 100 : -1;
f15eb531 2436
46fb3c21
ACM
2437 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2438 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2439 draining = true;
2440
ba209f85 2441 goto again;
46fb3c21 2442 }
ba209f85
ACM
2443 } else {
2444 goto again;
f15eb531
NK
2445 }
2446
ba209f85 2447out_disable:
f3b623b8
ACM
2448 thread__zput(trace->current);
2449
ba209f85 2450 perf_evlist__disable(evlist);
514f1c67 2451
c522739d
ACM
2452 if (!err) {
2453 if (trace->summary)
2454 trace__fprintf_thread_summary(trace, trace->output);
2455
2456 if (trace->show_tool_stats) {
2457 fprintf(trace->output, "Stats:\n "
2458 " vfs_getname : %" PRIu64 "\n"
2459 " proc_getname: %" PRIu64 "\n",
2460 trace->stats.vfs_getname,
2461 trace->stats.proc_getname);
2462 }
2463 }
bf2575c1 2464
514f1c67
ACM
2465out_delete_evlist:
2466 perf_evlist__delete(evlist);
14a052df 2467 trace->evlist = NULL;
75b757ca 2468 trace->live = false;
514f1c67 2469 return err;
6ef068cb
ACM
2470{
2471 char errbuf[BUFSIZ];
a8f23d8f 2472
2cc990ba 2473out_error_sched_stat_runtime:
988bdb31 2474 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2475 goto out_error;
2476
801c67b0 2477out_error_raw_syscalls:
988bdb31 2478 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2479 goto out_error;
2480
e09b18d4
ACM
2481out_error_mmap:
2482 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2483 goto out_error;
2484
a8f23d8f
ACM
2485out_error_open:
2486 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2487
2488out_error:
6ef068cb 2489 fprintf(trace->output, "%s\n", errbuf);
87f91868 2490 goto out_delete_evlist;
94ad89bc
ACM
2491
2492out_error_apply_filters:
2493 fprintf(trace->output,
2494 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2495 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2496 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2497 goto out_delete_evlist;
514f1c67 2498}
5ed08dae
ACM
2499out_error_mem:
2500 fprintf(trace->output, "Not enough memory to run!\n");
2501 goto out_delete_evlist;
19867b61
ACM
2502
2503out_errno:
2504 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2505 goto out_delete_evlist;
a8f23d8f 2506}
514f1c67 2507
6810fc91
DA
2508static int trace__replay(struct trace *trace)
2509{
2510 const struct perf_evsel_str_handler handlers[] = {
c522739d 2511 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2512 };
f5fc1412
JO
2513 struct perf_data_file file = {
2514 .path = input_name,
2515 .mode = PERF_DATA_MODE_READ,
e366a6d8 2516 .force = trace->force,
f5fc1412 2517 };
6810fc91 2518 struct perf_session *session;
003824e8 2519 struct perf_evsel *evsel;
6810fc91
DA
2520 int err = -1;
2521
2522 trace->tool.sample = trace__process_sample;
2523 trace->tool.mmap = perf_event__process_mmap;
384c671e 2524 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2525 trace->tool.comm = perf_event__process_comm;
2526 trace->tool.exit = perf_event__process_exit;
2527 trace->tool.fork = perf_event__process_fork;
2528 trace->tool.attr = perf_event__process_attr;
f3b3614a 2529 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2530 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2531 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2532
0a8cb85c 2533 trace->tool.ordered_events = true;
6810fc91
DA
2534 trace->tool.ordering_requires_timestamps = true;
2535
2536 /* add tid to output */
2537 trace->multiple_threads = true;
2538
f5fc1412 2539 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2540 if (session == NULL)
52e02834 2541 return -1;
6810fc91 2542
aa07df6e
DA
2543 if (trace->opts.target.pid)
2544 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2545
2546 if (trace->opts.target.tid)
2547 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2548
0a7e6d1b 2549 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2550 goto out;
2551
8fb598e5
DA
2552 trace->host = &session->machines.host;
2553
6810fc91
DA
2554 err = perf_session__set_tracepoints_handlers(session, handlers);
2555 if (err)
2556 goto out;
2557
003824e8
NK
2558 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2559 "raw_syscalls:sys_enter");
9aca7f17
DA
2560 /* older kernels have syscalls tp versus raw_syscalls */
2561 if (evsel == NULL)
2562 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2563 "syscalls:sys_enter");
003824e8 2564
e281a960
SF
2565 if (evsel &&
2566 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2567 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2568 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2569 goto out;
2570 }
2571
2572 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2573 "raw_syscalls:sys_exit");
9aca7f17
DA
2574 if (evsel == NULL)
2575 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2576 "syscalls:sys_exit");
e281a960
SF
2577 if (evsel &&
2578 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2579 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2580 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2581 goto out;
2582 }
2583
e5cadb93 2584 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2585 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2586 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2587 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2588 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2589 evsel->handler = trace__pgfault;
2590 }
2591
6810fc91
DA
2592 setup_pager();
2593
b7b61cbe 2594 err = perf_session__process_events(session);
6810fc91
DA
2595 if (err)
2596 pr_err("Failed to process events, error %d", err);
2597
bf2575c1
DA
2598 else if (trace->summary)
2599 trace__fprintf_thread_summary(trace, trace->output);
2600
6810fc91
DA
2601out:
2602 perf_session__delete(session);
2603
2604 return err;
2605}
2606
1302d88e
ACM
2607static size_t trace__fprintf_threads_header(FILE *fp)
2608{
2609 size_t printed;
2610
99ff7150 2611 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2612
2613 return printed;
2614}
2615
b535d523
ACM
2616DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2617 struct stats *stats;
2618 double msecs;
2619 int syscall;
2620)
2621{
2622 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2623 struct stats *stats = source->priv;
2624
2625 entry->syscall = source->i;
2626 entry->stats = stats;
2627 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2628}
2629
bf2575c1
DA
2630static size_t thread__dump_stats(struct thread_trace *ttrace,
2631 struct trace *trace, FILE *fp)
2632{
bf2575c1
DA
2633 size_t printed = 0;
2634 struct syscall *sc;
b535d523
ACM
2635 struct rb_node *nd;
2636 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2637
b535d523 2638 if (syscall_stats == NULL)
bf2575c1
DA
2639 return 0;
2640
2641 printed += fprintf(fp, "\n");
2642
834fd46d
MW
2643 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2644 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2645 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2646
98a91837 2647 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2648 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2649 if (stats) {
2650 double min = (double)(stats->min) / NSEC_PER_MSEC;
2651 double max = (double)(stats->max) / NSEC_PER_MSEC;
2652 double avg = avg_stats(stats);
2653 double pct;
2654 u64 n = (u64) stats->n;
2655
2656 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2657 avg /= NSEC_PER_MSEC;
2658
b535d523 2659 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2660 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2661 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2662 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2663 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2664 }
bf2575c1
DA
2665 }
2666
b535d523 2667 resort_rb__delete(syscall_stats);
bf2575c1 2668 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2669
2670 return printed;
2671}
2672
96c14451 2673static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2674{
96c14451 2675 size_t printed = 0;
89dceb22 2676 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2677 double ratio;
2678
2679 if (ttrace == NULL)
2680 return 0;
2681
2682 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2683
15e65c69 2684 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2685 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2686 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2687 if (ttrace->pfmaj)
2688 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2689 if (ttrace->pfmin)
2690 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2691 if (trace->sched)
2692 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2693 else if (fputc('\n', fp) != EOF)
2694 ++printed;
2695
bf2575c1 2696 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2697
96c14451
ACM
2698 return printed;
2699}
896cbb56 2700
96c14451
ACM
2701static unsigned long thread__nr_events(struct thread_trace *ttrace)
2702{
2703 return ttrace ? ttrace->nr_events : 0;
2704}
2705
2706DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2707 struct thread *thread;
2708)
2709{
2710 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2711}
2712
1302d88e
ACM
2713static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2714{
96c14451
ACM
2715 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2716 size_t printed = trace__fprintf_threads_header(fp);
2717 struct rb_node *nd;
1302d88e 2718
96c14451
ACM
2719 if (threads == NULL) {
2720 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2721 return 0;
2722 }
2723
98a91837 2724 resort_rb__for_each_entry(nd, threads)
96c14451 2725 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2726
96c14451
ACM
2727 resort_rb__delete(threads);
2728
2729 return printed;
1302d88e
ACM
2730}
2731
ae9ed035
ACM
2732static int trace__set_duration(const struct option *opt, const char *str,
2733 int unset __maybe_unused)
2734{
2735 struct trace *trace = opt->value;
2736
2737 trace->duration_filter = atof(str);
2738 return 0;
2739}
2740
f078c385
ACM
2741static int trace__set_filter_pids(const struct option *opt, const char *str,
2742 int unset __maybe_unused)
2743{
2744 int ret = -1;
2745 size_t i;
2746 struct trace *trace = opt->value;
2747 /*
2748 * FIXME: introduce a intarray class, plain parse csv and create a
2749 * { int nr, int entries[] } struct...
2750 */
2751 struct intlist *list = intlist__new(str);
2752
2753 if (list == NULL)
2754 return -1;
2755
2756 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2757 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2758
2759 if (trace->filter_pids.entries == NULL)
2760 goto out;
2761
2762 trace->filter_pids.entries[0] = getpid();
2763
2764 for (i = 1; i < trace->filter_pids.nr; ++i)
2765 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2766
2767 intlist__delete(list);
2768 ret = 0;
2769out:
2770 return ret;
2771}
2772
c24ff998
ACM
2773static int trace__open_output(struct trace *trace, const char *filename)
2774{
2775 struct stat st;
2776
2777 if (!stat(filename, &st) && st.st_size) {
2778 char oldname[PATH_MAX];
2779
2780 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2781 unlink(oldname);
2782 rename(filename, oldname);
2783 }
2784
2785 trace->output = fopen(filename, "w");
2786
2787 return trace->output == NULL ? -errno : 0;
2788}
2789
598d02c5
SF
2790static int parse_pagefaults(const struct option *opt, const char *str,
2791 int unset __maybe_unused)
2792{
2793 int *trace_pgfaults = opt->value;
2794
2795 if (strcmp(str, "all") == 0)
2796 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2797 else if (strcmp(str, "maj") == 0)
2798 *trace_pgfaults |= TRACE_PFMAJ;
2799 else if (strcmp(str, "min") == 0)
2800 *trace_pgfaults |= TRACE_PFMIN;
2801 else
2802 return -1;
2803
2804 return 0;
2805}
2806
14a052df
ACM
2807static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2808{
2809 struct perf_evsel *evsel;
2810
e5cadb93 2811 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2812 evsel->handler = handler;
2813}
2814
017037ff
ACM
2815/*
2816 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2817 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2818 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2819 *
2820 * It'd be better to introduce a parse_options() variant that would return a
2821 * list with the terms it didn't match to an event...
2822 */
2823static int trace__parse_events_option(const struct option *opt, const char *str,
2824 int unset __maybe_unused)
2825{
2826 struct trace *trace = (struct trace *)opt->value;
2827 const char *s = str;
2828 char *sep = NULL, *lists[2] = { NULL, NULL, };
2829 int len = strlen(str), err = -1, list;
2830 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2831 char group_name[PATH_MAX];
2832
2833 if (strace_groups_dir == NULL)
2834 return -1;
2835
2836 if (*s == '!') {
2837 ++s;
2838 trace->not_ev_qualifier = true;
2839 }
2840
2841 while (1) {
2842 if ((sep = strchr(s, ',')) != NULL)
2843 *sep = '\0';
2844
2845 list = 0;
2846 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2847 list = 1;
2848 } else {
2849 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2850 if (access(group_name, R_OK) == 0)
2851 list = 1;
2852 }
2853
2854 if (lists[list]) {
2855 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2856 } else {
2857 lists[list] = malloc(len);
2858 if (lists[list] == NULL)
2859 goto out;
2860 strcpy(lists[list], s);
2861 }
2862
2863 if (!sep)
2864 break;
2865
2866 *sep = ',';
2867 s = sep + 1;
2868 }
2869
2870 if (lists[1] != NULL) {
2871 struct strlist_config slist_config = {
2872 .dirname = strace_groups_dir,
2873 };
2874
2875 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2876 if (trace->ev_qualifier == NULL) {
2877 fputs("Not enough memory to parse event qualifier", trace->output);
2878 goto out;
2879 }
2880
2881 if (trace__validate_ev_qualifier(trace))
2882 goto out;
2883 }
2884
2885 err = 0;
2886
2887 if (lists[0]) {
2888 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2889 "event selector. use 'perf list' to list available events",
2890 parse_events_option);
2891 err = parse_events_option(&o, lists[0], 0);
2892 }
2893out:
2894 if (sep)
2895 *sep = ',';
2896
2897 return err;
2898}
2899
b0ad8ea6 2900int cmd_trace(int argc, const char **argv)
514f1c67 2901{
6fdd9cb7 2902 const char *trace_usage[] = {
f15eb531
NK
2903 "perf trace [<options>] [<command>]",
2904 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2905 "perf trace record [<options>] [<command>]",
2906 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2907 NULL
2908 };
2909 struct trace trace = {
514f1c67
ACM
2910 .syscalls = {
2911 . max = -1,
2912 },
2913 .opts = {
2914 .target = {
2915 .uid = UINT_MAX,
2916 .uses_mmap = true,
2917 },
2918 .user_freq = UINT_MAX,
2919 .user_interval = ULLONG_MAX,
509051ea 2920 .no_buffering = true,
38d5447d 2921 .mmap_pages = UINT_MAX,
9d9cad76 2922 .proc_map_timeout = 500,
514f1c67 2923 },
007d66a0 2924 .output = stderr,
50c95cbd 2925 .show_comm = true,
e281a960 2926 .trace_syscalls = true,
44621819 2927 .kernel_syscallchains = false,
05614993 2928 .max_stack = UINT_MAX,
514f1c67 2929 };
c24ff998 2930 const char *output_name = NULL;
514f1c67 2931 const struct option trace_options[] = {
017037ff
ACM
2932 OPT_CALLBACK('e', "event", &trace, "event",
2933 "event/syscall selector. use 'perf list' to list available events",
2934 trace__parse_events_option),
50c95cbd
ACM
2935 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2936 "show the thread COMM next to its id"),
c522739d 2937 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2938 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2939 trace__parse_events_option),
c24ff998 2940 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2941 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2942 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2943 "trace events on existing process id"),
ac9be8ee 2944 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2945 "trace events on existing thread id"),
fa0e4ffe
ACM
2946 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2947 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2948 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2949 "system-wide collection from all CPUs"),
ac9be8ee 2950 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2951 "list of cpus to monitor"),
6810fc91 2952 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2953 "child tasks do not inherit counters"),
994a1f78
JO
2954 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2955 "number of mmap data pages",
2956 perf_evlist__parse_mmap_pages),
ac9be8ee 2957 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2958 "user to profile"),
ae9ed035
ACM
2959 OPT_CALLBACK(0, "duration", &trace, "float",
2960 "show only events with duration > N.M ms",
2961 trace__set_duration),
1302d88e 2962 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2963 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2964 OPT_BOOLEAN('T', "time", &trace.full_time,
2965 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2966 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2967 "Show only syscall summary with statistics"),
2968 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2969 "Show all syscalls and summary with statistics"),
598d02c5
SF
2970 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2971 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2972 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2973 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2974 OPT_CALLBACK(0, "call-graph", &trace.opts,
2975 "record_mode[,record_size]", record_callchain_help,
2976 &record_parse_callchain_opt),
44621819
ACM
2977 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2978 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2979 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2980 "Set the minimum stack depth when parsing the callchain, "
2981 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2982 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2983 "Set the maximum stack depth when parsing the callchain, "
2984 "anything beyond the specified depth will be ignored. "
4cb93446 2985 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2986 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2987 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2988 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2989 "ms to wait before starting measurement after program "
2990 "start"),
514f1c67
ACM
2991 OPT_END()
2992 };
ccd62a89 2993 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2994 bool mmap_pages_user_set = true;
6fdd9cb7 2995 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2996 int err;
32caf0d1 2997 char bf[BUFSIZ];
514f1c67 2998
4d08cb80
ACM
2999 signal(SIGSEGV, sighandler_dump_stack);
3000 signal(SIGFPE, sighandler_dump_stack);
3001
14a052df 3002 trace.evlist = perf_evlist__new();
fd0db102 3003 trace.sctbl = syscalltbl__new();
14a052df 3004
fd0db102 3005 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3006 pr_err("Not enough memory to run!\n");
ff8f695c 3007 err = -ENOMEM;
14a052df
ACM
3008 goto out;
3009 }
3010
6fdd9cb7
YS
3011 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3012 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3013
d7888573
WN
3014 err = bpf__setup_stdout(trace.evlist);
3015 if (err) {
3016 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3017 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3018 goto out;
3019 }
3020
59247e33
ACM
3021 err = -1;
3022
598d02c5
SF
3023 if (trace.trace_pgfaults) {
3024 trace.opts.sample_address = true;
3025 trace.opts.sample_time = true;
3026 }
3027
f3e459d1
ACM
3028 if (trace.opts.mmap_pages == UINT_MAX)
3029 mmap_pages_user_set = false;
3030
05614993 3031 if (trace.max_stack == UINT_MAX) {
fe176085 3032 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
3033 max_stack_user_set = false;
3034 }
3035
3036#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 3037 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
3038 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3039#endif
3040
2ddd5c04 3041 if (callchain_param.enabled) {
f3e459d1
ACM
3042 if (!mmap_pages_user_set && geteuid() == 0)
3043 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3044
566a0885 3045 symbol_conf.use_callchain = true;
f3e459d1 3046 }
566a0885 3047
14a052df
ACM
3048 if (trace.evlist->nr_entries > 0)
3049 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3050
1e28fe0a
SF
3051 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3052 return trace__record(&trace, argc-1, &argv[1]);
3053
3054 /* summary_only implies summary option, but don't overwrite summary if set */
3055 if (trace.summary_only)
3056 trace.summary = trace.summary_only;
3057
726f3234
ACM
3058 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3059 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
3060 pr_err("Please specify something to trace.\n");
3061 return -1;
3062 }
3063
017037ff 3064 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
3065 pr_err("The -e option can't be used with --no-syscalls.\n");
3066 goto out;
3067 }
3068
c24ff998
ACM
3069 if (output_name != NULL) {
3070 err = trace__open_output(&trace, output_name);
3071 if (err < 0) {
3072 perror("failed to create output file");
3073 goto out;
3074 }
3075 }
3076
fd0db102
ACM
3077 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3078
602ad878 3079 err = target__validate(&trace.opts.target);
32caf0d1 3080 if (err) {
602ad878 3081 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3082 fprintf(trace.output, "%s", bf);
3083 goto out_close;
32caf0d1
NK
3084 }
3085
602ad878 3086 err = target__parse_uid(&trace.opts.target);
514f1c67 3087 if (err) {
602ad878 3088 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3089 fprintf(trace.output, "%s", bf);
3090 goto out_close;
514f1c67
ACM
3091 }
3092
602ad878 3093 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3094 trace.opts.target.system_wide = true;
3095
6810fc91
DA
3096 if (input_name)
3097 err = trace__replay(&trace);
3098 else
3099 err = trace__run(&trace, argc, argv);
1302d88e 3100
c24ff998
ACM
3101out_close:
3102 if (output_name != NULL)
3103 fclose(trace.output);
3104out:
1302d88e 3105 return err;
514f1c67 3106}