perf trace beauty fcntl: Beautify the 'arg' for DUPFD
[linux-2.6-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
5ab8c689 24#include "util/event.h"
514f1c67 25#include "util/evlist.h"
4b6ab94e 26#include <subcmd/exec-cmd.h>
752fde44 27#include "util/machine.h"
9a3993d4 28#include "util/path.h"
6810fc91 29#include "util/session.h"
752fde44 30#include "util/thread.h"
4b6ab94e 31#include <subcmd/parse-options.h>
2ae3a312 32#include "util/strlist.h"
bdc89661 33#include "util/intlist.h"
514f1c67 34#include "util/thread_map.h"
bf2575c1 35#include "util/stat.h"
fd5cead2 36#include "trace/beauty/beauty.h"
97978b3e 37#include "trace-event.h"
9aca7f17 38#include "util/parse-events.h"
ba504235 39#include "util/bpf-loader.h"
566a0885 40#include "callchain.h"
fea01392 41#include "print_binary.h"
a067558e 42#include "string2.h"
fd0db102 43#include "syscalltbl.h"
96c14451 44#include "rb_resort.h"
514f1c67 45
a43783ae 46#include <errno.h>
fd20e811 47#include <inttypes.h>
fd0db102 48#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c
ACM
54#include <linux/filter.h>
55#include <linux/audit.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
514f1c67 60
3d689ed6
ACM
61#include "sane_ctype.h"
62
c188e7ac
ACM
63#ifndef O_CLOEXEC
64# define O_CLOEXEC 02000000
65#endif
66
83a51694
ACM
67#ifndef F_LINUX_SPECIFIC_BASE
68# define F_LINUX_SPECIFIC_BASE 1024
69#endif
70
d1d438a3
ACM
71struct trace {
72 struct perf_tool tool;
fd0db102 73 struct syscalltbl *sctbl;
d1d438a3
ACM
74 struct {
75 int max;
76 struct syscall *table;
77 struct {
78 struct perf_evsel *sys_enter,
79 *sys_exit;
80 } events;
81 } syscalls;
82 struct record_opts opts;
83 struct perf_evlist *evlist;
84 struct machine *host;
85 struct thread *current;
86 u64 base_time;
87 FILE *output;
88 unsigned long nr_events;
89 struct strlist *ev_qualifier;
90 struct {
91 size_t nr;
92 int *entries;
93 } ev_qualifier_ids;
d1d438a3
ACM
94 struct {
95 size_t nr;
96 pid_t *entries;
97 } filter_pids;
98 double duration_filter;
99 double runtime_ms;
100 struct {
101 u64 vfs_getname,
102 proc_getname;
103 } stats;
c6d4a494 104 unsigned int max_stack;
5cf9c84e 105 unsigned int min_stack;
d1d438a3
ACM
106 bool not_ev_qualifier;
107 bool live;
108 bool full_time;
109 bool sched;
110 bool multiple_threads;
111 bool summary;
112 bool summary_only;
113 bool show_comm;
114 bool show_tool_stats;
115 bool trace_syscalls;
44621819 116 bool kernel_syscallchains;
d1d438a3
ACM
117 bool force;
118 bool vfs_getname;
119 int trace_pgfaults;
fd0db102 120 int open_id;
d1d438a3 121};
a1c2552d 122
77170988
ACM
123struct tp_field {
124 int offset;
125 union {
126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
128 };
129};
130
131#define TP_UINT_FIELD(bits) \
132static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
55d43bca
DA
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return value; \
77170988
ACM
137}
138
139TP_UINT_FIELD(8);
140TP_UINT_FIELD(16);
141TP_UINT_FIELD(32);
142TP_UINT_FIELD(64);
143
144#define TP_UINT_FIELD__SWAPPED(bits) \
145static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
146{ \
55d43bca
DA
147 u##bits value; \
148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
149 return bswap_##bits(value);\
150}
151
152TP_UINT_FIELD__SWAPPED(16);
153TP_UINT_FIELD__SWAPPED(32);
154TP_UINT_FIELD__SWAPPED(64);
155
156static int tp_field__init_uint(struct tp_field *field,
157 struct format_field *format_field,
158 bool needs_swap)
159{
160 field->offset = format_field->offset;
161
162 switch (format_field->size) {
163 case 1:
164 field->integer = tp_field__u8;
165 break;
166 case 2:
167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
168 break;
169 case 4:
170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
171 break;
172 case 8:
173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
174 break;
175 default:
176 return -1;
177 }
178
179 return 0;
180}
181
182static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
183{
184 return sample->raw_data + field->offset;
185}
186
187static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
188{
189 field->offset = format_field->offset;
190 field->pointer = tp_field__ptr;
191 return 0;
192}
193
194struct syscall_tp {
195 struct tp_field id;
196 union {
197 struct tp_field args, ret;
198 };
199};
200
201static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202 struct tp_field *field,
203 const char *name)
204{
205 struct format_field *format_field = perf_evsel__field(evsel, name);
206
207 if (format_field == NULL)
208 return -1;
209
210 return tp_field__init_uint(field, format_field, evsel->needs_swap);
211}
212
213#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
216
217static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218 struct tp_field *field,
219 const char *name)
220{
221 struct format_field *format_field = perf_evsel__field(evsel, name);
222
223 if (format_field == NULL)
224 return -1;
225
226 return tp_field__init_ptr(field, format_field);
227}
228
229#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230 ({ struct syscall_tp *sc = evsel->priv;\
231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
232
233static void perf_evsel__delete_priv(struct perf_evsel *evsel)
234{
04662523 235 zfree(&evsel->priv);
77170988
ACM
236 perf_evsel__delete(evsel);
237}
238
96695d44
NK
239static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
240{
241 evsel->priv = malloc(sizeof(struct syscall_tp));
242 if (evsel->priv != NULL) {
243 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
244 goto out_delete;
245
246 evsel->handler = handler;
247 return 0;
248 }
249
250 return -ENOMEM;
251
252out_delete:
04662523 253 zfree(&evsel->priv);
96695d44
NK
254 return -ENOENT;
255}
256
ef503831 257static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 258{
ef503831 259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 260
9aca7f17 261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 262 if (IS_ERR(evsel))
9aca7f17
DA
263 evsel = perf_evsel__newtp("syscalls", direction);
264
8dd2a131
JO
265 if (IS_ERR(evsel))
266 return NULL;
267
268 if (perf_evsel__init_syscall_tp(evsel, handler))
269 goto out_delete;
77170988
ACM
270
271 return evsel;
272
273out_delete:
274 perf_evsel__delete_priv(evsel);
275 return NULL;
276}
277
278#define perf_evsel__sc_tp_uint(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.integer(&fields->name, sample); })
281
282#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.pointer(&fields->name, sample); })
285
0ae79636
ACM
286size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
287{
288 int idx = val - sa->offset;
1f115cb7 289
0ae79636
ACM
290 if (idx < 0 || idx >= sa->nr_entries)
291 return scnprintf(bf, size, intfmt, val);
1f115cb7 292
0ae79636 293 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
0ae79636 300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
301}
302
975b7c2f
ACM
303static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
305{
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
307}
308
1f115cb7
ACM
309#define SCA_STRARRAY syscall_arg__scnprintf_strarray
310
83a51694
ACM
311struct strarrays {
312 int nr_entries;
313 struct strarray **entries;
314};
315
316#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317 .nr_entries = ARRAY_SIZE(array), \
318 .entries = array, \
319}
320
274e86fd
ACM
321size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322 struct syscall_arg *arg)
83a51694
ACM
323{
324 struct strarrays *sas = arg->parm;
325 int i;
326
327 for (i = 0; i < sas->nr_entries; ++i) {
328 struct strarray *sa = sas->entries[i];
329 int idx = arg->val - sa->offset;
330
331 if (idx >= 0 && idx < sa->nr_entries) {
332 if (sa->entries[idx] == NULL)
333 break;
334 return scnprintf(bf, size, "%s", sa->entries[idx]);
335 }
336 }
337
338 return scnprintf(bf, size, "%d", arg->val);
339}
340
844ae5b4
ACM
341#if defined(__i386__) || defined(__x86_64__)
342/*
343 * FIXME: Make this available to all arches as soon as the ioctl beautifier
344 * gets rewritten to support all arches.
345 */
78645cf3
ACM
346static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
347 struct syscall_arg *arg)
348{
349 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
350}
351
352#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 353#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 354
48e1f91a
ACM
355#ifndef AT_FDCWD
356#define AT_FDCWD -100
357#endif
358
75b757ca
ACM
359static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
360 struct syscall_arg *arg)
361{
362 int fd = arg->val;
363
364 if (fd == AT_FDCWD)
365 return scnprintf(bf, size, "CWD");
366
367 return syscall_arg__scnprintf_fd(bf, size, arg);
368}
369
370#define SCA_FDAT syscall_arg__scnprintf_fd_at
371
372static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
373 struct syscall_arg *arg);
374
375#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
376
2c2b1623 377size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 378{
01533e97 379 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
380}
381
2c2b1623 382size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
383{
384 return scnprintf(bf, size, "%d", arg->val);
385}
386
5dde91ed
ACM
387size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
388{
389 return scnprintf(bf, size, "%ld", arg->val);
390}
391
729a7841
ACM
392static const char *bpf_cmd[] = {
393 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
394 "MAP_GET_NEXT_KEY", "PROG_LOAD",
395};
396static DEFINE_STRARRAY(bpf_cmd);
397
03e3adc9
ACM
398static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
399static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 400
1f115cb7
ACM
401static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
402static DEFINE_STRARRAY(itimers);
403
b62bee1b
ACM
404static const char *keyctl_options[] = {
405 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
406 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
407 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
408 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
409 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
410};
411static DEFINE_STRARRAY(keyctl_options);
412
efe6b882
ACM
413static const char *whences[] = { "SET", "CUR", "END",
414#ifdef SEEK_DATA
415"DATA",
416#endif
417#ifdef SEEK_HOLE
418"HOLE",
419#endif
420};
421static DEFINE_STRARRAY(whences);
f9da0b0c 422
80f587d5
ACM
423static const char *fcntl_cmds[] = {
424 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
425 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
426 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
427 "GETOWNER_UIDS",
80f587d5
ACM
428};
429static DEFINE_STRARRAY(fcntl_cmds);
430
83a51694
ACM
431static const char *fcntl_linux_specific_cmds[] = {
432 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
433 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 434 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
435};
436
437static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
438
439static struct strarray *fcntl_cmds_arrays[] = {
440 &strarray__fcntl_cmds,
441 &strarray__fcntl_linux_specific_cmds,
442};
443
444static DEFINE_STRARRAYS(fcntl_cmds_arrays);
445
c045bf02
ACM
446static const char *rlimit_resources[] = {
447 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
448 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
449 "RTTIME",
450};
451static DEFINE_STRARRAY(rlimit_resources);
452
eb5b1b14
ACM
453static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
454static DEFINE_STRARRAY(sighow);
455
4f8c1b74
DA
456static const char *clockid[] = {
457 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
458 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
459 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
460};
461static DEFINE_STRARRAY(clockid);
462
e10bce81
ACM
463static const char *socket_families[] = {
464 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
465 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
466 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
467 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
468 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
469 "ALG", "NFC", "VSOCK",
470};
471static DEFINE_STRARRAY(socket_families);
472
51108999
ACM
473static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
474 struct syscall_arg *arg)
475{
476 size_t printed = 0;
477 int mode = arg->val;
478
479 if (mode == F_OK) /* 0 */
480 return scnprintf(bf, size, "F");
481#define P_MODE(n) \
482 if (mode & n##_OK) { \
483 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
484 mode &= ~n##_OK; \
485 }
486
487 P_MODE(R);
488 P_MODE(W);
489 P_MODE(X);
490#undef P_MODE
491
492 if (mode)
493 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
494
495 return printed;
496}
497
498#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
499
f994592d
ACM
500static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
501 struct syscall_arg *arg);
502
503#define SCA_FILENAME syscall_arg__scnprintf_filename
504
46cce19b
ACM
505static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
506 struct syscall_arg *arg)
507{
508 int printed = 0, flags = arg->val;
509
510#define P_FLAG(n) \
511 if (flags & O_##n) { \
512 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513 flags &= ~O_##n; \
514 }
515
516 P_FLAG(CLOEXEC);
517 P_FLAG(NONBLOCK);
518#undef P_FLAG
519
520 if (flags)
521 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
522
523 return printed;
524}
525
526#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
527
844ae5b4
ACM
528#if defined(__i386__) || defined(__x86_64__)
529/*
530 * FIXME: Make this available to all arches.
531 */
78645cf3
ACM
532#define TCGETS 0x5401
533
534static const char *tioctls[] = {
535 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
536 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
537 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
538 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
539 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
540 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
541 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
542 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
543 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
544 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
545 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
546 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
547 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
548 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
549 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
550};
551
552static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 553#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 554
a355a61e
ACM
555#ifndef GRND_NONBLOCK
556#define GRND_NONBLOCK 0x0001
557#endif
558#ifndef GRND_RANDOM
559#define GRND_RANDOM 0x0002
560#endif
561
39878d49
ACM
562static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
563 struct syscall_arg *arg)
564{
565 int printed = 0, flags = arg->val;
566
567#define P_FLAG(n) \
568 if (flags & GRND_##n) { \
569 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 flags &= ~GRND_##n; \
571 }
572
573 P_FLAG(RANDOM);
574 P_FLAG(NONBLOCK);
575#undef P_FLAG
576
577 if (flags)
578 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
579
580 return printed;
581}
582
583#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
584
82d4a110
ACM
585#define STRARRAY(name, array) \
586 { .scnprintf = SCA_STRARRAY, \
587 .parm = &strarray__##array, }
453350dd 588
ea8dc3ce 589#include "trace/beauty/eventfd.c"
8bf382ce 590#include "trace/beauty/flock.c"
d5d71e86 591#include "trace/beauty/futex_op.c"
df4cb167 592#include "trace/beauty/mmap.c"
ba2f22cf 593#include "trace/beauty/mode_t.c"
a30e6259 594#include "trace/beauty/msg_flags.c"
8f48df69 595#include "trace/beauty/open_flags.c"
62de344e 596#include "trace/beauty/perf_event_open.c"
d5d71e86 597#include "trace/beauty/pid.c"
a3bca91f 598#include "trace/beauty/sched_policy.c"
f5cd95ea 599#include "trace/beauty/seccomp.c"
12199d8e 600#include "trace/beauty/signum.c"
bbf86c43 601#include "trace/beauty/socket_type.c"
7206b900 602#include "trace/beauty/waitid_options.c"
a3bca91f 603
82d4a110
ACM
604struct syscall_arg_fmt {
605 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
606 void *parm;
d47737d5 607 bool show_zero;
82d4a110
ACM
608};
609
514f1c67
ACM
610static struct syscall_fmt {
611 const char *name;
aec1930b 612 const char *alias;
82d4a110 613 struct syscall_arg_fmt arg[6];
514f1c67 614 bool errmsg;
11c8e39f 615 bool errpid;
514f1c67 616 bool timeout;
04b34729 617 bool hexret;
514f1c67 618} syscall_fmts[] = {
51108999 619 { .name = "access", .errmsg = true,
82d4a110 620 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
aec1930b 621 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
82d4a110
ACM
622 { .name = "bpf", .errmsg = true,
623 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 624 { .name = "brk", .hexret = true,
82d4a110 625 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
12f3ca4f
ACM
626 { .name = "chdir", .errmsg = true, },
627 { .name = "chmod", .errmsg = true, },
628 { .name = "chroot", .errmsg = true, },
82d4a110
ACM
629 { .name = "clock_gettime", .errmsg = true,
630 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
11c8e39f 631 { .name = "clone", .errpid = true, },
75b757ca 632 { .name = "close", .errmsg = true,
82d4a110 633 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
a14bb860 634 { .name = "connect", .errmsg = true, },
12f3ca4f 635 { .name = "creat", .errmsg = true, },
b6565c90
ACM
636 { .name = "dup", .errmsg = true, },
637 { .name = "dup2", .errmsg = true, },
638 { .name = "dup3", .errmsg = true, },
82d4a110
ACM
639 { .name = "epoll_ctl", .errmsg = true,
640 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
49af9e93 641 { .name = "eventfd2", .errmsg = true,
82d4a110 642 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
12f3ca4f 643 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
644 { .name = "fadvise64", .errmsg = true, },
645 { .name = "fallocate", .errmsg = true, },
646 { .name = "fchdir", .errmsg = true, },
647 { .name = "fchmod", .errmsg = true, },
75b757ca 648 { .name = "fchmodat", .errmsg = true,
82d4a110 649 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
b6565c90 650 { .name = "fchown", .errmsg = true, },
75b757ca 651 { .name = "fchownat", .errmsg = true,
82d4a110 652 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
75b757ca 653 { .name = "fcntl", .errmsg = true,
82d4a110 654 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
655 .parm = &strarrays__fcntl_cmds_arrays,
656 .show_zero = true, },
82d4a110 657 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
b6565c90 658 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 659 { .name = "flock", .errmsg = true,
82d4a110 660 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
b6565c90
ACM
661 { .name = "fsetxattr", .errmsg = true, },
662 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 663 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
664 { .name = "fstatfs", .errmsg = true, },
665 { .name = "fsync", .errmsg = true, },
666 { .name = "ftruncate", .errmsg = true, },
f9da0b0c 667 { .name = "futex", .errmsg = true,
82d4a110 668 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, },
75b757ca 669 { .name = "futimesat", .errmsg = true,
82d4a110 670 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
b6565c90
ACM
671 { .name = "getdents", .errmsg = true, },
672 { .name = "getdents64", .errmsg = true, },
82d4a110
ACM
673 { .name = "getitimer", .errmsg = true,
674 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 675 { .name = "getpid", .errpid = true, },
d1d438a3 676 { .name = "getpgid", .errpid = true, },
c65f1070 677 { .name = "getppid", .errpid = true, },
39878d49 678 { .name = "getrandom", .errmsg = true,
82d4a110
ACM
679 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
680 { .name = "getrlimit", .errmsg = true,
681 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
12f3ca4f 682 { .name = "getxattr", .errmsg = true, },
82d4a110 683 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 684 { .name = "ioctl", .errmsg = true,
82d4a110 685 .arg = {
844ae5b4
ACM
686#if defined(__i386__) || defined(__x86_64__)
687/*
688 * FIXME: Make this available to all arches.
689 */
82d4a110
ACM
690 [1] = { .scnprintf = SCA_STRHEXARRAY, /* cmd */
691 .parm = &strarray__tioctls, },
692 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 693#else
82d4a110 694 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 695#endif
82d4a110
ACM
696 { .name = "keyctl", .errmsg = true,
697 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
8bad5b0a 698 { .name = "kill", .errmsg = true,
82d4a110 699 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
12f3ca4f
ACM
700 { .name = "lchown", .errmsg = true, },
701 { .name = "lgetxattr", .errmsg = true, },
75b757ca 702 { .name = "linkat", .errmsg = true,
82d4a110 703 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
12f3ca4f
ACM
704 { .name = "listxattr", .errmsg = true, },
705 { .name = "llistxattr", .errmsg = true, },
706 { .name = "lremovexattr", .errmsg = true, },
75b757ca 707 { .name = "lseek", .errmsg = true,
82d4a110 708 .arg = { [2] = STRARRAY(whence, whences), }, },
12f3ca4f
ACM
709 { .name = "lsetxattr", .errmsg = true, },
710 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
711 { .name = "lsxattr", .errmsg = true, },
9e9716d1 712 { .name = "madvise", .errmsg = true,
82d4a110
ACM
713 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
714 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
12f3ca4f 715 { .name = "mkdir", .errmsg = true, },
75b757ca 716 { .name = "mkdirat", .errmsg = true,
82d4a110 717 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
12f3ca4f 718 { .name = "mknod", .errmsg = true, },
75b757ca 719 { .name = "mknodat", .errmsg = true,
82d4a110 720 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
3d903aa7 721 { .name = "mlock", .errmsg = true,
82d4a110 722 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
3d903aa7 723 { .name = "mlockall", .errmsg = true,
82d4a110 724 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 725 { .name = "mmap", .hexret = true,
54265664
JO
726/* The standard mmap maps to old_mmap on s390x */
727#if defined(__s390x__)
728 .alias = "old_mmap",
729#endif
82d4a110
ACM
730 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
731 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
732 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
beccb2b5 733 { .name = "mprotect", .errmsg = true,
82d4a110
ACM
734 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
735 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
090389b6 736 { .name = "mq_unlink", .errmsg = true,
82d4a110 737 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 738 { .name = "mremap", .hexret = true,
82d4a110
ACM
739 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
740 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
741 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
3d903aa7 742 { .name = "munlock", .errmsg = true,
82d4a110 743 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 744 { .name = "munmap", .errmsg = true,
82d4a110 745 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
75b757ca 746 { .name = "name_to_handle_at", .errmsg = true,
82d4a110 747 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
75b757ca 748 { .name = "newfstatat", .errmsg = true,
82d4a110 749 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
be65a89a 750 { .name = "open", .errmsg = true,
82d4a110 751 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
31cd3855 752 { .name = "open_by_handle_at", .errmsg = true,
82d4a110
ACM
753 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
754 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
31cd3855 755 { .name = "openat", .errmsg = true,
82d4a110
ACM
756 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
757 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
a1c2552d 758 { .name = "perf_event_open", .errmsg = true,
82d4a110
ACM
759 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
760 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
761 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
46cce19b 762 { .name = "pipe2", .errmsg = true,
82d4a110 763 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
aec1930b
ACM
764 { .name = "poll", .errmsg = true, .timeout = true, },
765 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
766 { .name = "pread", .errmsg = true, .alias = "pread64", },
767 { .name = "preadv", .errmsg = true, .alias = "pread", },
82d4a110
ACM
768 { .name = "prlimit64", .errmsg = true,
769 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
b6565c90
ACM
770 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
771 { .name = "pwritev", .errmsg = true, },
772 { .name = "read", .errmsg = true, },
12f3ca4f 773 { .name = "readlink", .errmsg = true, },
75b757ca 774 { .name = "readlinkat", .errmsg = true,
82d4a110 775 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
b6565c90 776 { .name = "readv", .errmsg = true, },
b2cc99fd 777 { .name = "recvfrom", .errmsg = true,
82d4a110 778 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 779 { .name = "recvmmsg", .errmsg = true,
82d4a110 780 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 781 { .name = "recvmsg", .errmsg = true,
82d4a110 782 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
12f3ca4f 783 { .name = "removexattr", .errmsg = true, },
75b757ca 784 { .name = "renameat", .errmsg = true,
82d4a110 785 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
12f3ca4f 786 { .name = "rmdir", .errmsg = true, },
8bad5b0a 787 { .name = "rt_sigaction", .errmsg = true,
82d4a110
ACM
788 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
789 { .name = "rt_sigprocmask", .errmsg = true,
790 .arg = { [0] = STRARRAY(how, sighow), }, },
8bad5b0a 791 { .name = "rt_sigqueueinfo", .errmsg = true,
82d4a110 792 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
8bad5b0a 793 { .name = "rt_tgsigqueueinfo", .errmsg = true,
82d4a110 794 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
f0bbd602
ACM
795 { .name = "sched_getattr", .errmsg = true, },
796 { .name = "sched_setattr", .errmsg = true, },
a3bca91f 797 { .name = "sched_setscheduler", .errmsg = true,
82d4a110 798 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
997bba8c 799 { .name = "seccomp", .errmsg = true,
82d4a110
ACM
800 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
801 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
aec1930b 802 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 803 { .name = "sendmmsg", .errmsg = true,
82d4a110 804 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 805 { .name = "sendmsg", .errmsg = true,
82d4a110 806 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
b2cc99fd 807 { .name = "sendto", .errmsg = true,
82d4a110 808 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 809 { .name = "set_tid_address", .errpid = true, },
82d4a110
ACM
810 { .name = "setitimer", .errmsg = true,
811 .arg = { [0] = STRARRAY(which, itimers), }, },
d1d438a3 812 { .name = "setpgid", .errmsg = true, },
82d4a110
ACM
813 { .name = "setrlimit", .errmsg = true,
814 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
12f3ca4f 815 { .name = "setxattr", .errmsg = true, },
b6565c90 816 { .name = "shutdown", .errmsg = true, },
e10bce81 817 { .name = "socket", .errmsg = true,
82d4a110
ACM
818 .arg = { [0] = STRARRAY(family, socket_families),
819 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
07120aa5 820 { .name = "socketpair", .errmsg = true,
82d4a110
ACM
821 .arg = { [0] = STRARRAY(family, socket_families),
822 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
12f3ca4f
ACM
823 { .name = "stat", .errmsg = true, .alias = "newstat", },
824 { .name = "statfs", .errmsg = true, },
fd5cead2 825 { .name = "statx", .errmsg = true,
82d4a110
ACM
826 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
827 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
828 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
34221118 829 { .name = "swapoff", .errmsg = true,
82d4a110 830 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
34221118 831 { .name = "swapon", .errmsg = true,
82d4a110 832 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
75b757ca 833 { .name = "symlinkat", .errmsg = true,
82d4a110 834 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
8bad5b0a 835 { .name = "tgkill", .errmsg = true,
82d4a110 836 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
8bad5b0a 837 { .name = "tkill", .errmsg = true,
82d4a110 838 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
12f3ca4f 839 { .name = "truncate", .errmsg = true, },
e5959683 840 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 841 { .name = "unlinkat", .errmsg = true,
82d4a110 842 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
12f3ca4f 843 { .name = "utime", .errmsg = true, },
75b757ca 844 { .name = "utimensat", .errmsg = true,
82d4a110 845 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
12f3ca4f 846 { .name = "utimes", .errmsg = true, },
b6565c90 847 { .name = "vmsplice", .errmsg = true, },
11c8e39f 848 { .name = "wait4", .errpid = true,
82d4a110 849 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 850 { .name = "waitid", .errpid = true,
82d4a110 851 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
b6565c90
ACM
852 { .name = "write", .errmsg = true, },
853 { .name = "writev", .errmsg = true, },
514f1c67
ACM
854};
855
856static int syscall_fmt__cmp(const void *name, const void *fmtp)
857{
858 const struct syscall_fmt *fmt = fmtp;
859 return strcmp(name, fmt->name);
860}
861
862static struct syscall_fmt *syscall_fmt__find(const char *name)
863{
864 const int nmemb = ARRAY_SIZE(syscall_fmts);
865 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
866}
867
868struct syscall {
869 struct event_format *tp_format;
f208bd8d
ACM
870 int nr_args;
871 struct format_field *args;
514f1c67 872 const char *name;
5089f20e 873 bool is_exit;
514f1c67 874 struct syscall_fmt *fmt;
82d4a110 875 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
876};
877
fd2b2975
ACM
878/*
879 * We need to have this 'calculated' boolean because in some cases we really
880 * don't know what is the duration of a syscall, for instance, when we start
881 * a session and some threads are waiting for a syscall to finish, say 'poll',
882 * in which case all we can do is to print "( ? ) for duration and for the
883 * start timestamp.
884 */
885static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
886{
887 double duration = (double)t / NSEC_PER_MSEC;
888 size_t printed = fprintf(fp, "(");
889
fd2b2975
ACM
890 if (!calculated)
891 printed += fprintf(fp, " ? ");
892 else if (duration >= 1.0)
60c907ab
ACM
893 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
894 else if (duration >= 0.01)
895 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
896 else
897 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 898 return printed + fprintf(fp, "): ");
60c907ab
ACM
899}
900
f994592d
ACM
901/**
902 * filename.ptr: The filename char pointer that will be vfs_getname'd
903 * filename.entry_str_pos: Where to insert the string translated from
904 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
905 * ret_scnprintf: syscall args may set this to a different syscall return
906 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 907 */
752fde44
ACM
908struct thread_trace {
909 u64 entry_time;
752fde44 910 bool entry_pending;
efd5745e 911 unsigned long nr_events;
a2ea67d7 912 unsigned long pfmaj, pfmin;
752fde44 913 char *entry_str;
1302d88e 914 double runtime_ms;
7ee57434 915 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
916 struct {
917 unsigned long ptr;
7f4f8001
ACM
918 short int entry_str_pos;
919 bool pending_open;
920 unsigned int namelen;
921 char *name;
f994592d 922 } filename;
75b757ca
ACM
923 struct {
924 int max;
925 char **table;
926 } paths;
bf2575c1
DA
927
928 struct intlist *syscall_stats;
752fde44
ACM
929};
930
931static struct thread_trace *thread_trace__new(void)
932{
75b757ca
ACM
933 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
934
935 if (ttrace)
936 ttrace->paths.max = -1;
937
bf2575c1
DA
938 ttrace->syscall_stats = intlist__new(NULL);
939
75b757ca 940 return ttrace;
752fde44
ACM
941}
942
c24ff998 943static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 944{
efd5745e
ACM
945 struct thread_trace *ttrace;
946
752fde44
ACM
947 if (thread == NULL)
948 goto fail;
949
89dceb22
NK
950 if (thread__priv(thread) == NULL)
951 thread__set_priv(thread, thread_trace__new());
48000a1a 952
89dceb22 953 if (thread__priv(thread) == NULL)
752fde44
ACM
954 goto fail;
955
89dceb22 956 ttrace = thread__priv(thread);
efd5745e
ACM
957 ++ttrace->nr_events;
958
959 return ttrace;
752fde44 960fail:
c24ff998 961 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
962 "WARNING: not enough memory, dropping samples!\n");
963 return NULL;
964}
965
84486caa
ACM
966
967void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 968 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
969{
970 struct thread_trace *ttrace = thread__priv(arg->thread);
971
972 ttrace->ret_scnprintf = ret_scnprintf;
973}
974
598d02c5
SF
975#define TRACE_PFMAJ (1 << 0)
976#define TRACE_PFMIN (1 << 1)
977
e4d44e83
ACM
978static const size_t trace__entry_str_size = 2048;
979
97119f37 980static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 981{
89dceb22 982 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
983
984 if (fd > ttrace->paths.max) {
985 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
986
987 if (npath == NULL)
988 return -1;
989
990 if (ttrace->paths.max != -1) {
991 memset(npath + ttrace->paths.max + 1, 0,
992 (fd - ttrace->paths.max) * sizeof(char *));
993 } else {
994 memset(npath, 0, (fd + 1) * sizeof(char *));
995 }
996
997 ttrace->paths.table = npath;
998 ttrace->paths.max = fd;
999 }
1000
1001 ttrace->paths.table[fd] = strdup(pathname);
1002
1003 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1004}
1005
97119f37
ACM
1006static int thread__read_fd_path(struct thread *thread, int fd)
1007{
1008 char linkname[PATH_MAX], pathname[PATH_MAX];
1009 struct stat st;
1010 int ret;
1011
1012 if (thread->pid_ == thread->tid) {
1013 scnprintf(linkname, sizeof(linkname),
1014 "/proc/%d/fd/%d", thread->pid_, fd);
1015 } else {
1016 scnprintf(linkname, sizeof(linkname),
1017 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1018 }
1019
1020 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1021 return -1;
1022
1023 ret = readlink(linkname, pathname, sizeof(pathname));
1024
1025 if (ret < 0 || ret > st.st_size)
1026 return -1;
1027
1028 pathname[ret] = '\0';
1029 return trace__set_fd_pathname(thread, fd, pathname);
1030}
1031
c522739d
ACM
1032static const char *thread__fd_path(struct thread *thread, int fd,
1033 struct trace *trace)
75b757ca 1034{
89dceb22 1035 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
1036
1037 if (ttrace == NULL)
1038 return NULL;
1039
1040 if (fd < 0)
1041 return NULL;
1042
cdcd1e6b 1043 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
1044 if (!trace->live)
1045 return NULL;
1046 ++trace->stats.proc_getname;
cdcd1e6b 1047 if (thread__read_fd_path(thread, fd))
c522739d
ACM
1048 return NULL;
1049 }
75b757ca
ACM
1050
1051 return ttrace->paths.table[fd];
1052}
1053
fc65eb82 1054size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1055{
1056 int fd = arg->val;
1057 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1058 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1059
1060 if (path)
1061 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1062
1063 return printed;
1064}
1065
1066static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1067 struct syscall_arg *arg)
1068{
1069 int fd = arg->val;
1070 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1071 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1072
04662523
ACM
1073 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1074 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1075
1076 return printed;
1077}
1078
f994592d
ACM
1079static void thread__set_filename_pos(struct thread *thread, const char *bf,
1080 unsigned long ptr)
1081{
1082 struct thread_trace *ttrace = thread__priv(thread);
1083
1084 ttrace->filename.ptr = ptr;
1085 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1086}
1087
1088static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1089 struct syscall_arg *arg)
1090{
1091 unsigned long ptr = arg->val;
1092
1093 if (!arg->trace->vfs_getname)
1094 return scnprintf(bf, size, "%#x", ptr);
1095
1096 thread__set_filename_pos(arg->thread, bf, ptr);
1097 return 0;
1098}
1099
ae9ed035
ACM
1100static bool trace__filter_duration(struct trace *trace, double t)
1101{
1102 return t < (trace->duration_filter * NSEC_PER_MSEC);
1103}
1104
fd2b2975 1105static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1106{
1107 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1108
60c907ab 1109 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1110}
1111
fd2b2975
ACM
1112/*
1113 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1114 * using ttrace->entry_time for a thread that receives a sys_exit without
1115 * first having received a sys_enter ("poll" issued before tracing session
1116 * starts, lost sys_enter exit due to ring buffer overflow).
1117 */
1118static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1119{
1120 if (tstamp > 0)
1121 return __trace__fprintf_tstamp(trace, tstamp, fp);
1122
1123 return fprintf(fp, " ? ");
1124}
1125
f15eb531 1126static bool done = false;
ba209f85 1127static bool interrupted = false;
f15eb531 1128
ba209f85 1129static void sig_handler(int sig)
f15eb531
NK
1130{
1131 done = true;
ba209f85 1132 interrupted = sig == SIGINT;
f15eb531
NK
1133}
1134
752fde44 1135static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1136 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1137{
1138 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1139 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1140
50c95cbd
ACM
1141 if (trace->multiple_threads) {
1142 if (trace->show_comm)
1902efe7 1143 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1144 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1145 }
752fde44
ACM
1146
1147 return printed;
1148}
1149
c24ff998 1150static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1151 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1152{
1153 int ret = 0;
1154
1155 switch (event->header.type) {
1156 case PERF_RECORD_LOST:
c24ff998 1157 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1158 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1159 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1160 break;
752fde44 1161 default:
162f0bef 1162 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1163 break;
1164 }
1165
1166 return ret;
1167}
1168
c24ff998 1169static int trace__tool_process(struct perf_tool *tool,
752fde44 1170 union perf_event *event,
162f0bef 1171 struct perf_sample *sample,
752fde44
ACM
1172 struct machine *machine)
1173{
c24ff998 1174 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1175 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1176}
1177
caf8a0d0
ACM
1178static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1179{
1180 struct machine *machine = vmachine;
1181
1182 if (machine->kptr_restrict_warned)
1183 return NULL;
1184
1185 if (symbol_conf.kptr_restrict) {
1186 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1187 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1188 "Kernel samples will not be resolved.\n");
1189 machine->kptr_restrict_warned = true;
1190 return NULL;
1191 }
1192
1193 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1194}
1195
752fde44
ACM
1196static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1197{
0a7e6d1b 1198 int err = symbol__init(NULL);
752fde44
ACM
1199
1200 if (err)
1201 return err;
1202
8fb598e5
DA
1203 trace->host = machine__new_host();
1204 if (trace->host == NULL)
1205 return -ENOMEM;
752fde44 1206
caf8a0d0 1207 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1208 return -errno;
1209
a33fbd56 1210 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1211 evlist->threads, trace__tool_process, false,
1212 trace->opts.proc_map_timeout);
752fde44
ACM
1213 if (err)
1214 symbol__exit();
1215
1216 return err;
1217}
1218
13d4ff3e
ACM
1219static int syscall__set_arg_fmts(struct syscall *sc)
1220{
1221 struct format_field *field;
b6565c90 1222 int idx = 0, len;
13d4ff3e 1223
82d4a110
ACM
1224 sc->arg_fmt = calloc(sc->nr_args, sizeof(*sc->arg_fmt));
1225 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1226 return -1;
1227
82d4a110
ACM
1228 for (field = sc->args; field; field = field->next, ++idx) {
1229 if (sc->fmt) {
1230 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1231
1232 if (sc->fmt->arg[idx].scnprintf)
1233 continue;
1234 }
1f115cb7 1235
82d4a110 1236 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1237 (strcmp(field->name, "filename") == 0 ||
1238 strcmp(field->name, "path") == 0 ||
1239 strcmp(field->name, "pathname") == 0))
82d4a110 1240 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1241 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1242 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1243 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1244 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1245 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1246 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1247 else if ((strcmp(field->type, "int") == 0 ||
1248 strcmp(field->type, "unsigned int") == 0 ||
1249 strcmp(field->type, "long") == 0) &&
1250 (len = strlen(field->name)) >= 2 &&
1251 strcmp(field->name + len - 2, "fd") == 0) {
1252 /*
1253 * /sys/kernel/tracing/events/syscalls/sys_enter*
1254 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1255 * 65 int
1256 * 23 unsigned int
1257 * 7 unsigned long
1258 */
82d4a110 1259 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1260 }
13d4ff3e
ACM
1261 }
1262
1263 return 0;
1264}
1265
514f1c67
ACM
1266static int trace__read_syscall_info(struct trace *trace, int id)
1267{
1268 char tp_name[128];
1269 struct syscall *sc;
fd0db102 1270 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1271
1272 if (name == NULL)
1273 return -1;
514f1c67
ACM
1274
1275 if (id > trace->syscalls.max) {
1276 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1277
1278 if (nsyscalls == NULL)
1279 return -1;
1280
1281 if (trace->syscalls.max != -1) {
1282 memset(nsyscalls + trace->syscalls.max + 1, 0,
1283 (id - trace->syscalls.max) * sizeof(*sc));
1284 } else {
1285 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1286 }
1287
1288 trace->syscalls.table = nsyscalls;
1289 trace->syscalls.max = id;
1290 }
1291
1292 sc = trace->syscalls.table + id;
3a531260 1293 sc->name = name;
2ae3a312 1294
3a531260 1295 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1296
aec1930b 1297 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1298 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1299
8dd2a131 1300 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1301 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1302 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1303 }
514f1c67 1304
8dd2a131 1305 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1306 return -1;
1307
f208bd8d
ACM
1308 sc->args = sc->tp_format->format.fields;
1309 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1310 /*
1311 * We need to check and discard the first variable '__syscall_nr'
1312 * or 'nr' that mean the syscall number. It is needless here.
1313 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1314 */
1315 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1316 sc->args = sc->args->next;
1317 --sc->nr_args;
1318 }
1319
5089f20e
ACM
1320 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1321
13d4ff3e 1322 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1323}
1324
d0cc439b
ACM
1325static int trace__validate_ev_qualifier(struct trace *trace)
1326{
8b3ce757 1327 int err = 0, i;
d0cc439b
ACM
1328 struct str_node *pos;
1329
8b3ce757
ACM
1330 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1331 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1332 sizeof(trace->ev_qualifier_ids.entries[0]));
1333
1334 if (trace->ev_qualifier_ids.entries == NULL) {
1335 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1336 trace->output);
1337 err = -EINVAL;
1338 goto out;
1339 }
1340
1341 i = 0;
1342
602a1f4d 1343 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1344 const char *sc = pos->s;
fd0db102 1345 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1346
8b3ce757 1347 if (id < 0) {
d0cc439b
ACM
1348 if (err == 0) {
1349 fputs("Error:\tInvalid syscall ", trace->output);
1350 err = -EINVAL;
1351 } else {
1352 fputs(", ", trace->output);
1353 }
1354
1355 fputs(sc, trace->output);
1356 }
8b3ce757
ACM
1357
1358 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1359 }
1360
1361 if (err < 0) {
1362 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1363 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1364 zfree(&trace->ev_qualifier_ids.entries);
1365 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1366 }
8b3ce757 1367out:
d0cc439b
ACM
1368 return err;
1369}
1370
55d43bca
DA
1371/*
1372 * args is to be interpreted as a series of longs but we need to handle
1373 * 8-byte unaligned accesses. args points to raw_data within the event
1374 * and raw_data is guaranteed to be 8-byte unaligned because it is
1375 * preceded by raw_size which is a u32. So we need to copy args to a temp
1376 * variable to read it. Most notably this avoids extended load instructions
1377 * on unaligned addresses
1378 */
f9f83b33
ACM
1379static unsigned long __syscall_arg__val(unsigned char *args, u8 idx)
1380{
1381 unsigned long val;
1382 unsigned char *p = args + sizeof(unsigned long) * idx;
1383
1384 memcpy(&val, p, sizeof(val));
1385 return val;
1386}
1387
1388unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1389{
1390 return __syscall_arg__val(arg->args, idx);
1391}
55d43bca 1392
752fde44 1393static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1394 unsigned char *args, struct trace *trace,
75b757ca 1395 struct thread *thread)
514f1c67 1396{
514f1c67 1397 size_t printed = 0;
55d43bca 1398 unsigned long val;
84486caa
ACM
1399 struct thread_trace *ttrace = thread__priv(thread);
1400
1401 /*
1402 * Things like fcntl will set this in its 'cmd' formatter to pick the
1403 * right formatter for the return value (an fd? file flags?), which is
1404 * not needed for syscalls that always return a given type, say an fd.
1405 */
1406 ttrace->ret_scnprintf = NULL;
514f1c67 1407
f208bd8d 1408 if (sc->args != NULL) {
514f1c67 1409 struct format_field *field;
01533e97
ACM
1410 u8 bit = 1;
1411 struct syscall_arg arg = {
f9f83b33 1412 .args = args,
75b757ca
ACM
1413 .idx = 0,
1414 .mask = 0,
1415 .trace = trace,
1416 .thread = thread,
01533e97 1417 };
6e7eeb51 1418
f208bd8d 1419 for (field = sc->args; field;
01533e97
ACM
1420 field = field->next, ++arg.idx, bit <<= 1) {
1421 if (arg.mask & bit)
6e7eeb51 1422 continue;
55d43bca 1423
f9f83b33 1424 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1425
4aa58232
ACM
1426 /*
1427 * Suppress this argument if its value is zero and
1428 * and we don't have a string associated in an
1429 * strarray for it.
1430 */
55d43bca 1431 if (val == 0 &&
82d4a110 1432 !(sc->arg_fmt &&
d47737d5
ACM
1433 (sc->arg_fmt[arg.idx].show_zero ||
1434 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1435 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1436 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1437 continue;
1438
752fde44 1439 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1440 "%s%s: ", printed ? ", " : "", field->name);
82d4a110 1441 if (sc->arg_fmt && sc->arg_fmt[arg.idx].scnprintf) {
55d43bca 1442 arg.val = val;
82d4a110
ACM
1443 if (sc->arg_fmt[arg.idx].parm)
1444 arg.parm = sc->arg_fmt[arg.idx].parm;
1445 printed += sc->arg_fmt[arg.idx].scnprintf(bf + printed, size - printed, &arg);
6e7eeb51 1446 } else {
13d4ff3e 1447 printed += scnprintf(bf + printed, size - printed,
55d43bca 1448 "%ld", val);
6e7eeb51 1449 }
514f1c67 1450 }
4c4d6e51
ACM
1451 } else if (IS_ERR(sc->tp_format)) {
1452 /*
1453 * If we managed to read the tracepoint /format file, then we
1454 * may end up not having any args, like with gettid(), so only
1455 * print the raw args when we didn't manage to read it.
1456 */
01533e97
ACM
1457 int i = 0;
1458
514f1c67 1459 while (i < 6) {
f9f83b33 1460 val = __syscall_arg__val(args, i);
752fde44
ACM
1461 printed += scnprintf(bf + printed, size - printed,
1462 "%sarg%d: %ld",
55d43bca 1463 printed ? ", " : "", i, val);
514f1c67
ACM
1464 ++i;
1465 }
1466 }
1467
1468 return printed;
1469}
1470
ba3d7dee 1471typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1472 union perf_event *event,
ba3d7dee
ACM
1473 struct perf_sample *sample);
1474
1475static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1476 struct perf_evsel *evsel, int id)
ba3d7dee 1477{
ba3d7dee
ACM
1478
1479 if (id < 0) {
adaa18bf
ACM
1480
1481 /*
1482 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1483 * before that, leaving at a higher verbosity level till that is
1484 * explained. Reproduced with plain ftrace with:
1485 *
1486 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1487 * grep "NR -1 " /t/trace_pipe
1488 *
1489 * After generating some load on the machine.
1490 */
1491 if (verbose > 1) {
1492 static u64 n;
1493 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1494 id, perf_evsel__name(evsel), ++n);
1495 }
ba3d7dee
ACM
1496 return NULL;
1497 }
1498
1499 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1500 trace__read_syscall_info(trace, id))
1501 goto out_cant_read;
1502
1503 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1504 goto out_cant_read;
1505
1506 return &trace->syscalls.table[id];
1507
1508out_cant_read:
bb963e16 1509 if (verbose > 0) {
7c304ee0
ACM
1510 fprintf(trace->output, "Problems reading syscall %d", id);
1511 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1512 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1513 fputs(" information\n", trace->output);
1514 }
ba3d7dee
ACM
1515 return NULL;
1516}
1517
bf2575c1
DA
1518static void thread__update_stats(struct thread_trace *ttrace,
1519 int id, struct perf_sample *sample)
1520{
1521 struct int_node *inode;
1522 struct stats *stats;
1523 u64 duration = 0;
1524
1525 inode = intlist__findnew(ttrace->syscall_stats, id);
1526 if (inode == NULL)
1527 return;
1528
1529 stats = inode->priv;
1530 if (stats == NULL) {
1531 stats = malloc(sizeof(struct stats));
1532 if (stats == NULL)
1533 return;
1534 init_stats(stats);
1535 inode->priv = stats;
1536 }
1537
1538 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1539 duration = sample->time - ttrace->entry_time;
1540
1541 update_stats(stats, duration);
1542}
1543
e596663e
ACM
1544static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1545{
1546 struct thread_trace *ttrace;
1547 u64 duration;
1548 size_t printed;
1549
1550 if (trace->current == NULL)
1551 return 0;
1552
1553 ttrace = thread__priv(trace->current);
1554
1555 if (!ttrace->entry_pending)
1556 return 0;
1557
1558 duration = sample->time - ttrace->entry_time;
1559
fd2b2975 1560 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1561 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1562 ttrace->entry_pending = false;
1563
1564 return printed;
1565}
1566
ba3d7dee 1567static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1568 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1569 struct perf_sample *sample)
1570{
752fde44 1571 char *msg;
ba3d7dee 1572 void *args;
752fde44 1573 size_t printed = 0;
2ae3a312 1574 struct thread *thread;
b91fc39f 1575 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1576 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1577 struct thread_trace *ttrace;
1578
1579 if (sc == NULL)
1580 return -1;
ba3d7dee 1581
8fb598e5 1582 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1583 ttrace = thread__trace(thread, trace->output);
2ae3a312 1584 if (ttrace == NULL)
b91fc39f 1585 goto out_put;
ba3d7dee 1586
77170988 1587 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1588
1589 if (ttrace->entry_str == NULL) {
e4d44e83 1590 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1591 if (!ttrace->entry_str)
b91fc39f 1592 goto out_put;
752fde44
ACM
1593 }
1594
5cf9c84e 1595 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1596 trace__printf_interrupted_entry(trace, sample);
e596663e 1597
752fde44
ACM
1598 ttrace->entry_time = sample->time;
1599 msg = ttrace->entry_str;
e4d44e83 1600 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1601
e4d44e83 1602 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1603 args, trace, thread);
752fde44 1604
5089f20e 1605 if (sc->is_exit) {
5cf9c84e 1606 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1607 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1608 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1609 }
7f4f8001 1610 } else {
752fde44 1611 ttrace->entry_pending = true;
7f4f8001
ACM
1612 /* See trace__vfs_getname & trace__sys_exit */
1613 ttrace->filename.pending_open = false;
1614 }
ba3d7dee 1615
f3b623b8
ACM
1616 if (trace->current != thread) {
1617 thread__put(trace->current);
1618 trace->current = thread__get(thread);
1619 }
b91fc39f
ACM
1620 err = 0;
1621out_put:
1622 thread__put(thread);
1623 return err;
ba3d7dee
ACM
1624}
1625
5cf9c84e
ACM
1626static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1627 struct perf_sample *sample,
1628 struct callchain_cursor *cursor)
202ff968
ACM
1629{
1630 struct addr_location al;
5cf9c84e
ACM
1631
1632 if (machine__resolve(trace->host, &al, sample) < 0 ||
1633 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1634 return -1;
1635
1636 return 0;
1637}
1638
1639static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1640{
202ff968 1641 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1642 const unsigned int print_opts = EVSEL__PRINT_SYM |
1643 EVSEL__PRINT_DSO |
1644 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1645
d327e60c 1646 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1647}
1648
ba3d7dee 1649static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1650 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1651 struct perf_sample *sample)
1652{
2c82c3ad 1653 long ret;
60c907ab 1654 u64 duration = 0;
fd2b2975 1655 bool duration_calculated = false;
2ae3a312 1656 struct thread *thread;
5cf9c84e 1657 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1658 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1659 struct thread_trace *ttrace;
1660
1661 if (sc == NULL)
1662 return -1;
ba3d7dee 1663
8fb598e5 1664 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1665 ttrace = thread__trace(thread, trace->output);
2ae3a312 1666 if (ttrace == NULL)
b91fc39f 1667 goto out_put;
ba3d7dee 1668
bf2575c1
DA
1669 if (trace->summary)
1670 thread__update_stats(ttrace, id, sample);
1671
77170988 1672 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1673
fd0db102 1674 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1675 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1676 ttrace->filename.pending_open = false;
c522739d
ACM
1677 ++trace->stats.vfs_getname;
1678 }
1679
ae9ed035 1680 if (ttrace->entry_time) {
60c907ab 1681 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1682 if (trace__filter_duration(trace, duration))
1683 goto out;
fd2b2975 1684 duration_calculated = true;
ae9ed035
ACM
1685 } else if (trace->duration_filter)
1686 goto out;
60c907ab 1687
5cf9c84e
ACM
1688 if (sample->callchain) {
1689 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1690 if (callchain_ret == 0) {
1691 if (callchain_cursor.nr < trace->min_stack)
1692 goto out;
1693 callchain_ret = 1;
1694 }
1695 }
1696
fd2eabaf
DA
1697 if (trace->summary_only)
1698 goto out;
1699
fd2b2975 1700 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1701
1702 if (ttrace->entry_pending) {
c24ff998 1703 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1704 } else {
c24ff998
ACM
1705 fprintf(trace->output, " ... [");
1706 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1707 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1708 }
1709
da3c9a44
ACM
1710 if (sc->fmt == NULL) {
1711signed_print:
2c82c3ad 1712 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1713 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1714 char bf[STRERR_BUFSIZE];
c8b5f2c9 1715 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1716 *e = audit_errno_to_name(-ret);
1717
c24ff998 1718 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1719 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1720 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1721 else if (ttrace->ret_scnprintf) {
1722 char bf[1024];
7ee57434
ACM
1723 struct syscall_arg arg = {
1724 .val = ret,
1725 .thread = thread,
1726 .trace = trace,
1727 };
1728 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1729 ttrace->ret_scnprintf = NULL;
1730 fprintf(trace->output, ") = %s", bf);
1731 } else if (sc->fmt->hexret)
2c82c3ad 1732 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1733 else if (sc->fmt->errpid) {
1734 struct thread *child = machine__find_thread(trace->host, ret, ret);
1735
1736 if (child != NULL) {
1737 fprintf(trace->output, ") = %ld", ret);
1738 if (child->comm_set)
1739 fprintf(trace->output, " (%s)", thread__comm_str(child));
1740 thread__put(child);
1741 }
1742 } else
da3c9a44 1743 goto signed_print;
ba3d7dee 1744
c24ff998 1745 fputc('\n', trace->output);
566a0885 1746
5cf9c84e
ACM
1747 if (callchain_ret > 0)
1748 trace__fprintf_callchain(trace, sample);
1749 else if (callchain_ret < 0)
1750 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1751out:
752fde44 1752 ttrace->entry_pending = false;
b91fc39f
ACM
1753 err = 0;
1754out_put:
1755 thread__put(thread);
1756 return err;
ba3d7dee
ACM
1757}
1758
c522739d 1759static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1760 union perf_event *event __maybe_unused,
c522739d
ACM
1761 struct perf_sample *sample)
1762{
f994592d
ACM
1763 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1764 struct thread_trace *ttrace;
1765 size_t filename_len, entry_str_len, to_move;
1766 ssize_t remaining_space;
1767 char *pos;
7f4f8001 1768 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1769
1770 if (!thread)
1771 goto out;
1772
1773 ttrace = thread__priv(thread);
1774 if (!ttrace)
ef65e96e 1775 goto out_put;
f994592d 1776
7f4f8001 1777 filename_len = strlen(filename);
39f0e7a8 1778 if (filename_len == 0)
ef65e96e 1779 goto out_put;
7f4f8001
ACM
1780
1781 if (ttrace->filename.namelen < filename_len) {
1782 char *f = realloc(ttrace->filename.name, filename_len + 1);
1783
1784 if (f == NULL)
ef65e96e 1785 goto out_put;
7f4f8001
ACM
1786
1787 ttrace->filename.namelen = filename_len;
1788 ttrace->filename.name = f;
1789 }
1790
1791 strcpy(ttrace->filename.name, filename);
1792 ttrace->filename.pending_open = true;
1793
f994592d 1794 if (!ttrace->filename.ptr)
ef65e96e 1795 goto out_put;
f994592d
ACM
1796
1797 entry_str_len = strlen(ttrace->entry_str);
1798 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1799 if (remaining_space <= 0)
ef65e96e 1800 goto out_put;
f994592d 1801
f994592d
ACM
1802 if (filename_len > (size_t)remaining_space) {
1803 filename += filename_len - remaining_space;
1804 filename_len = remaining_space;
1805 }
1806
1807 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1808 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1809 memmove(pos + filename_len, pos, to_move);
1810 memcpy(pos, filename, filename_len);
1811
1812 ttrace->filename.ptr = 0;
1813 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1814out_put:
1815 thread__put(thread);
f994592d 1816out:
c522739d
ACM
1817 return 0;
1818}
1819
1302d88e 1820static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1821 union perf_event *event __maybe_unused,
1302d88e
ACM
1822 struct perf_sample *sample)
1823{
1824 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1825 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1826 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1827 sample->pid,
1828 sample->tid);
c24ff998 1829 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1830
1831 if (ttrace == NULL)
1832 goto out_dump;
1833
1834 ttrace->runtime_ms += runtime_ms;
1835 trace->runtime_ms += runtime_ms;
ef65e96e 1836out_put:
b91fc39f 1837 thread__put(thread);
1302d88e
ACM
1838 return 0;
1839
1840out_dump:
c24ff998 1841 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1842 evsel->name,
1843 perf_evsel__strval(evsel, sample, "comm"),
1844 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1845 runtime,
1846 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1847 goto out_put;
1302d88e
ACM
1848}
1849
1d6c9407
WN
1850static void bpf_output__printer(enum binary_printer_ops op,
1851 unsigned int val, void *extra)
1852{
1853 FILE *output = extra;
1854 unsigned char ch = (unsigned char)val;
1855
1856 switch (op) {
1857 case BINARY_PRINT_CHAR_DATA:
1858 fprintf(output, "%c", isprint(ch) ? ch : '.');
1859 break;
1860 case BINARY_PRINT_DATA_BEGIN:
1861 case BINARY_PRINT_LINE_BEGIN:
1862 case BINARY_PRINT_ADDR:
1863 case BINARY_PRINT_NUM_DATA:
1864 case BINARY_PRINT_NUM_PAD:
1865 case BINARY_PRINT_SEP:
1866 case BINARY_PRINT_CHAR_PAD:
1867 case BINARY_PRINT_LINE_END:
1868 case BINARY_PRINT_DATA_END:
1869 default:
1870 break;
1871 }
1872}
1873
1874static void bpf_output__fprintf(struct trace *trace,
1875 struct perf_sample *sample)
1876{
1877 print_binary(sample->raw_data, sample->raw_size, 8,
1878 bpf_output__printer, trace->output);
1879}
1880
14a052df
ACM
1881static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1882 union perf_event *event __maybe_unused,
1883 struct perf_sample *sample)
1884{
7ad35615
ACM
1885 int callchain_ret = 0;
1886
1887 if (sample->callchain) {
1888 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1889 if (callchain_ret == 0) {
1890 if (callchain_cursor.nr < trace->min_stack)
1891 goto out;
1892 callchain_ret = 1;
1893 }
1894 }
1895
14a052df
ACM
1896 trace__printf_interrupted_entry(trace, sample);
1897 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1898
1899 if (trace->trace_syscalls)
1900 fprintf(trace->output, "( ): ");
1901
1902 fprintf(trace->output, "%s:", evsel->name);
14a052df 1903
1d6c9407
WN
1904 if (perf_evsel__is_bpf_output(evsel)) {
1905 bpf_output__fprintf(trace, sample);
1906 } else if (evsel->tp_format) {
14a052df
ACM
1907 event_format__fprintf(evsel->tp_format, sample->cpu,
1908 sample->raw_data, sample->raw_size,
1909 trace->output);
1910 }
1911
1912 fprintf(trace->output, ")\n");
202ff968 1913
7ad35615
ACM
1914 if (callchain_ret > 0)
1915 trace__fprintf_callchain(trace, sample);
1916 else if (callchain_ret < 0)
1917 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1918out:
14a052df
ACM
1919 return 0;
1920}
1921
598d02c5
SF
1922static void print_location(FILE *f, struct perf_sample *sample,
1923 struct addr_location *al,
1924 bool print_dso, bool print_sym)
1925{
1926
bb963e16 1927 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1928 fprintf(f, "%s@", al->map->dso->long_name);
1929
bb963e16 1930 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1931 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1932 al->addr - al->sym->start);
1933 else if (al->map)
4414a3c5 1934 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1935 else
4414a3c5 1936 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1937}
1938
1939static int trace__pgfault(struct trace *trace,
1940 struct perf_evsel *evsel,
473398a2 1941 union perf_event *event __maybe_unused,
598d02c5
SF
1942 struct perf_sample *sample)
1943{
1944 struct thread *thread;
598d02c5
SF
1945 struct addr_location al;
1946 char map_type = 'd';
a2ea67d7 1947 struct thread_trace *ttrace;
b91fc39f 1948 int err = -1;
1df54290 1949 int callchain_ret = 0;
598d02c5
SF
1950
1951 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1952
1953 if (sample->callchain) {
1954 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1955 if (callchain_ret == 0) {
1956 if (callchain_cursor.nr < trace->min_stack)
1957 goto out_put;
1958 callchain_ret = 1;
1959 }
1960 }
1961
a2ea67d7
SF
1962 ttrace = thread__trace(thread, trace->output);
1963 if (ttrace == NULL)
b91fc39f 1964 goto out_put;
a2ea67d7
SF
1965
1966 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1967 ttrace->pfmaj++;
1968 else
1969 ttrace->pfmin++;
1970
1971 if (trace->summary_only)
b91fc39f 1972 goto out;
598d02c5 1973
473398a2 1974 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1975 sample->ip, &al);
1976
fd2b2975 1977 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1978
1979 fprintf(trace->output, "%sfault [",
1980 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1981 "maj" : "min");
1982
1983 print_location(trace->output, sample, &al, false, true);
1984
1985 fprintf(trace->output, "] => ");
1986
473398a2 1987 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1988 sample->addr, &al);
1989
1990 if (!al.map) {
473398a2 1991 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1992 MAP__FUNCTION, sample->addr, &al);
1993
1994 if (al.map)
1995 map_type = 'x';
1996 else
1997 map_type = '?';
1998 }
1999
2000 print_location(trace->output, sample, &al, true, false);
2001
2002 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2003
1df54290
ACM
2004 if (callchain_ret > 0)
2005 trace__fprintf_callchain(trace, sample);
2006 else if (callchain_ret < 0)
2007 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2008out:
2009 err = 0;
2010out_put:
2011 thread__put(thread);
2012 return err;
598d02c5
SF
2013}
2014
e6001980 2015static void trace__set_base_time(struct trace *trace,
8a07a809 2016 struct perf_evsel *evsel,
e6001980
ACM
2017 struct perf_sample *sample)
2018{
8a07a809
ACM
2019 /*
2020 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2021 * and don't use sample->time unconditionally, we may end up having
2022 * some other event in the future without PERF_SAMPLE_TIME for good
2023 * reason, i.e. we may not be interested in its timestamps, just in
2024 * it taking place, picking some piece of information when it
2025 * appears in our event stream (vfs_getname comes to mind).
2026 */
2027 if (trace->base_time == 0 && !trace->full_time &&
2028 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2029 trace->base_time = sample->time;
2030}
2031
6810fc91 2032static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2033 union perf_event *event,
6810fc91
DA
2034 struct perf_sample *sample,
2035 struct perf_evsel *evsel,
2036 struct machine *machine __maybe_unused)
2037{
2038 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2039 struct thread *thread;
6810fc91
DA
2040 int err = 0;
2041
744a9719 2042 tracepoint_handler handler = evsel->handler;
6810fc91 2043
aa07df6e
DA
2044 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2045 if (thread && thread__is_filtered(thread))
ef65e96e 2046 goto out;
bdc89661 2047
e6001980 2048 trace__set_base_time(trace, evsel, sample);
6810fc91 2049
3160565f
DA
2050 if (handler) {
2051 ++trace->nr_events;
0c82adcf 2052 handler(trace, evsel, event, sample);
3160565f 2053 }
ef65e96e
ACM
2054out:
2055 thread__put(thread);
6810fc91
DA
2056 return err;
2057}
2058
1e28fe0a 2059static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2060{
2061 unsigned int rec_argc, i, j;
2062 const char **rec_argv;
2063 const char * const record_args[] = {
2064 "record",
2065 "-R",
2066 "-m", "1024",
2067 "-c", "1",
5e2485b1
DA
2068 };
2069
1e28fe0a
SF
2070 const char * const sc_args[] = { "-e", };
2071 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2072 const char * const majpf_args[] = { "-e", "major-faults" };
2073 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2074 const char * const minpf_args[] = { "-e", "minor-faults" };
2075 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2076
9aca7f17 2077 /* +1 is for the event string below */
1e28fe0a
SF
2078 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2079 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2080 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2081
2082 if (rec_argv == NULL)
2083 return -ENOMEM;
2084
1e28fe0a 2085 j = 0;
5e2485b1 2086 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2087 rec_argv[j++] = record_args[i];
2088
e281a960
SF
2089 if (trace->trace_syscalls) {
2090 for (i = 0; i < sc_args_nr; i++)
2091 rec_argv[j++] = sc_args[i];
2092
2093 /* event string may be different for older kernels - e.g., RHEL6 */
2094 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2095 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2096 else if (is_valid_tracepoint("syscalls:sys_enter"))
2097 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2098 else {
2099 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2100 return -1;
2101 }
9aca7f17 2102 }
9aca7f17 2103
1e28fe0a
SF
2104 if (trace->trace_pgfaults & TRACE_PFMAJ)
2105 for (i = 0; i < majpf_args_nr; i++)
2106 rec_argv[j++] = majpf_args[i];
2107
2108 if (trace->trace_pgfaults & TRACE_PFMIN)
2109 for (i = 0; i < minpf_args_nr; i++)
2110 rec_argv[j++] = minpf_args[i];
2111
2112 for (i = 0; i < (unsigned int)argc; i++)
2113 rec_argv[j++] = argv[i];
5e2485b1 2114
b0ad8ea6 2115 return cmd_record(j, rec_argv);
5e2485b1
DA
2116}
2117
bf2575c1
DA
2118static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2119
08c98776 2120static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2121{
ef503831 2122 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2123
2124 if (IS_ERR(evsel))
08c98776 2125 return false;
c522739d
ACM
2126
2127 if (perf_evsel__field(evsel, "pathname") == NULL) {
2128 perf_evsel__delete(evsel);
08c98776 2129 return false;
c522739d
ACM
2130 }
2131
744a9719 2132 evsel->handler = trace__vfs_getname;
c522739d 2133 perf_evlist__add(evlist, evsel);
08c98776 2134 return true;
c522739d
ACM
2135}
2136
0ae537cb 2137static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2138{
2139 struct perf_evsel *evsel;
2140 struct perf_event_attr attr = {
2141 .type = PERF_TYPE_SOFTWARE,
2142 .mmap_data = 1,
598d02c5
SF
2143 };
2144
2145 attr.config = config;
0524798c 2146 attr.sample_period = 1;
598d02c5
SF
2147
2148 event_attr_init(&attr);
2149
2150 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2151 if (evsel)
2152 evsel->handler = trace__pgfault;
598d02c5 2153
0ae537cb 2154 return evsel;
598d02c5
SF
2155}
2156
ddbb1b13
ACM
2157static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2158{
2159 const u32 type = event->header.type;
2160 struct perf_evsel *evsel;
2161
ddbb1b13
ACM
2162 if (type != PERF_RECORD_SAMPLE) {
2163 trace__process_event(trace, trace->host, event, sample);
2164 return;
2165 }
2166
2167 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2168 if (evsel == NULL) {
2169 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2170 return;
2171 }
2172
e6001980
ACM
2173 trace__set_base_time(trace, evsel, sample);
2174
ddbb1b13
ACM
2175 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2176 sample->raw_data == NULL) {
2177 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2178 perf_evsel__name(evsel), sample->tid,
2179 sample->cpu, sample->raw_size);
2180 } else {
2181 tracepoint_handler handler = evsel->handler;
2182 handler(trace, evsel, event, sample);
2183 }
2184}
2185
c27366f0
ACM
2186static int trace__add_syscall_newtp(struct trace *trace)
2187{
2188 int ret = -1;
2189 struct perf_evlist *evlist = trace->evlist;
2190 struct perf_evsel *sys_enter, *sys_exit;
2191
2192 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2193 if (sys_enter == NULL)
2194 goto out;
2195
2196 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2197 goto out_delete_sys_enter;
2198
2199 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2200 if (sys_exit == NULL)
2201 goto out_delete_sys_enter;
2202
2203 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2204 goto out_delete_sys_exit;
2205
2206 perf_evlist__add(evlist, sys_enter);
2207 perf_evlist__add(evlist, sys_exit);
2208
2ddd5c04 2209 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2210 /*
2211 * We're interested only in the user space callchain
2212 * leading to the syscall, allow overriding that for
2213 * debugging reasons using --kernel_syscall_callchains
2214 */
2215 sys_exit->attr.exclude_callchain_kernel = 1;
2216 }
2217
8b3ce757
ACM
2218 trace->syscalls.events.sys_enter = sys_enter;
2219 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2220
2221 ret = 0;
2222out:
2223 return ret;
2224
2225out_delete_sys_exit:
2226 perf_evsel__delete_priv(sys_exit);
2227out_delete_sys_enter:
2228 perf_evsel__delete_priv(sys_enter);
2229 goto out;
2230}
2231
19867b61
ACM
2232static int trace__set_ev_qualifier_filter(struct trace *trace)
2233{
2234 int err = -1;
b15d0a4c 2235 struct perf_evsel *sys_exit;
19867b61
ACM
2236 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2237 trace->ev_qualifier_ids.nr,
2238 trace->ev_qualifier_ids.entries);
2239
2240 if (filter == NULL)
2241 goto out_enomem;
2242
3541c034
MP
2243 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2244 filter)) {
b15d0a4c 2245 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2246 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2247 }
19867b61
ACM
2248
2249 free(filter);
2250out:
2251 return err;
2252out_enomem:
2253 errno = ENOMEM;
2254 goto out;
2255}
c27366f0 2256
f15eb531 2257static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2258{
14a052df 2259 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2260 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2261 int err = -1, i;
2262 unsigned long before;
f15eb531 2263 const bool forks = argc > 0;
46fb3c21 2264 bool draining = false;
514f1c67 2265
75b757ca
ACM
2266 trace->live = true;
2267
c27366f0 2268 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2269 goto out_error_raw_syscalls;
514f1c67 2270
e281a960 2271 if (trace->trace_syscalls)
08c98776 2272 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2273
0ae537cb
ACM
2274 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2275 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2276 if (pgfault_maj == NULL)
2277 goto out_error_mem;
2278 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2279 }
598d02c5 2280
0ae537cb
ACM
2281 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2282 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2283 if (pgfault_min == NULL)
2284 goto out_error_mem;
2285 perf_evlist__add(evlist, pgfault_min);
2286 }
598d02c5 2287
1302d88e 2288 if (trace->sched &&
2cc990ba
ACM
2289 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2290 trace__sched_stat_runtime))
2291 goto out_error_sched_stat_runtime;
1302d88e 2292
514f1c67
ACM
2293 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2294 if (err < 0) {
c24ff998 2295 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2296 goto out_delete_evlist;
2297 }
2298
752fde44
ACM
2299 err = trace__symbols_init(trace, evlist);
2300 if (err < 0) {
c24ff998 2301 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2302 goto out_delete_evlist;
752fde44
ACM
2303 }
2304
fde54b78
ACM
2305 perf_evlist__config(evlist, &trace->opts, NULL);
2306
0c3a6ef4
ACM
2307 if (callchain_param.enabled) {
2308 bool use_identifier = false;
2309
2310 if (trace->syscalls.events.sys_exit) {
2311 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2312 &trace->opts, &callchain_param);
2313 use_identifier = true;
2314 }
2315
2316 if (pgfault_maj) {
2317 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2318 use_identifier = true;
2319 }
2320
2321 if (pgfault_min) {
2322 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2323 use_identifier = true;
2324 }
2325
2326 if (use_identifier) {
2327 /*
2328 * Now we have evsels with different sample_ids, use
2329 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2330 * from a fixed position in each ring buffer record.
2331 *
2332 * As of this the changeset introducing this comment, this
2333 * isn't strictly needed, as the fields that can come before
2334 * PERF_SAMPLE_ID are all used, but we'll probably disable
2335 * some of those for things like copying the payload of
2336 * pointer syscall arguments, and for vfs_getname we don't
2337 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2338 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2339 */
2340 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2341 perf_evlist__reset_sample_bit(evlist, ID);
2342 }
fde54b78 2343 }
514f1c67 2344
f15eb531
NK
2345 signal(SIGCHLD, sig_handler);
2346 signal(SIGINT, sig_handler);
2347
2348 if (forks) {
6ef73ec4 2349 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2350 argv, false, NULL);
f15eb531 2351 if (err < 0) {
c24ff998 2352 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2353 goto out_delete_evlist;
f15eb531
NK
2354 }
2355 }
2356
514f1c67 2357 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2358 if (err < 0)
2359 goto out_error_open;
514f1c67 2360
ba504235
WN
2361 err = bpf__apply_obj_config();
2362 if (err) {
2363 char errbuf[BUFSIZ];
2364
2365 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2366 pr_err("ERROR: Apply config to BPF failed: %s\n",
2367 errbuf);
2368 goto out_error_open;
2369 }
2370
241b057c
ACM
2371 /*
2372 * Better not use !target__has_task() here because we need to cover the
2373 * case where no threads were specified in the command line, but a
2374 * workload was, and in that case we will fill in the thread_map when
2375 * we fork the workload in perf_evlist__prepare_workload.
2376 */
f078c385
ACM
2377 if (trace->filter_pids.nr > 0)
2378 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2379 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2380 err = perf_evlist__set_filter_pid(evlist, getpid());
2381
94ad89bc
ACM
2382 if (err < 0)
2383 goto out_error_mem;
2384
19867b61
ACM
2385 if (trace->ev_qualifier_ids.nr > 0) {
2386 err = trace__set_ev_qualifier_filter(trace);
2387 if (err < 0)
2388 goto out_errno;
19867b61 2389
2e5e5f87
ACM
2390 pr_debug("event qualifier tracepoint filter: %s\n",
2391 trace->syscalls.events.sys_exit->filter);
2392 }
19867b61 2393
94ad89bc
ACM
2394 err = perf_evlist__apply_filters(evlist, &evsel);
2395 if (err < 0)
2396 goto out_error_apply_filters;
241b057c 2397
f885037e 2398 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2399 if (err < 0)
2400 goto out_error_mmap;
514f1c67 2401
e36b7821 2402 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2403 perf_evlist__enable(evlist);
2404
f15eb531
NK
2405 if (forks)
2406 perf_evlist__start_workload(evlist);
2407
e36b7821
AB
2408 if (trace->opts.initial_delay) {
2409 usleep(trace->opts.initial_delay * 1000);
2410 perf_evlist__enable(evlist);
2411 }
2412
e13798c7 2413 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2414 evlist->threads->nr > 1 ||
2415 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2416again:
efd5745e 2417 before = trace->nr_events;
514f1c67
ACM
2418
2419 for (i = 0; i < evlist->nr_mmaps; i++) {
2420 union perf_event *event;
2421
2422 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2423 struct perf_sample sample;
514f1c67 2424
efd5745e 2425 ++trace->nr_events;
514f1c67 2426
514f1c67
ACM
2427 err = perf_evlist__parse_sample(evlist, event, &sample);
2428 if (err) {
c24ff998 2429 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2430 goto next_event;
514f1c67
ACM
2431 }
2432
ddbb1b13 2433 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2434next_event:
2435 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2436
ba209f85
ACM
2437 if (interrupted)
2438 goto out_disable;
02ac5421
ACM
2439
2440 if (done && !draining) {
2441 perf_evlist__disable(evlist);
2442 draining = true;
2443 }
514f1c67
ACM
2444 }
2445 }
2446
efd5745e 2447 if (trace->nr_events == before) {
ba209f85 2448 int timeout = done ? 100 : -1;
f15eb531 2449
46fb3c21
ACM
2450 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2451 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2452 draining = true;
2453
ba209f85 2454 goto again;
46fb3c21 2455 }
ba209f85
ACM
2456 } else {
2457 goto again;
f15eb531
NK
2458 }
2459
ba209f85 2460out_disable:
f3b623b8
ACM
2461 thread__zput(trace->current);
2462
ba209f85 2463 perf_evlist__disable(evlist);
514f1c67 2464
c522739d
ACM
2465 if (!err) {
2466 if (trace->summary)
2467 trace__fprintf_thread_summary(trace, trace->output);
2468
2469 if (trace->show_tool_stats) {
2470 fprintf(trace->output, "Stats:\n "
2471 " vfs_getname : %" PRIu64 "\n"
2472 " proc_getname: %" PRIu64 "\n",
2473 trace->stats.vfs_getname,
2474 trace->stats.proc_getname);
2475 }
2476 }
bf2575c1 2477
514f1c67
ACM
2478out_delete_evlist:
2479 perf_evlist__delete(evlist);
14a052df 2480 trace->evlist = NULL;
75b757ca 2481 trace->live = false;
514f1c67 2482 return err;
6ef068cb
ACM
2483{
2484 char errbuf[BUFSIZ];
a8f23d8f 2485
2cc990ba 2486out_error_sched_stat_runtime:
988bdb31 2487 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2488 goto out_error;
2489
801c67b0 2490out_error_raw_syscalls:
988bdb31 2491 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2492 goto out_error;
2493
e09b18d4
ACM
2494out_error_mmap:
2495 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2496 goto out_error;
2497
a8f23d8f
ACM
2498out_error_open:
2499 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2500
2501out_error:
6ef068cb 2502 fprintf(trace->output, "%s\n", errbuf);
87f91868 2503 goto out_delete_evlist;
94ad89bc
ACM
2504
2505out_error_apply_filters:
2506 fprintf(trace->output,
2507 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2508 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2509 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2510 goto out_delete_evlist;
514f1c67 2511}
5ed08dae
ACM
2512out_error_mem:
2513 fprintf(trace->output, "Not enough memory to run!\n");
2514 goto out_delete_evlist;
19867b61
ACM
2515
2516out_errno:
2517 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2518 goto out_delete_evlist;
a8f23d8f 2519}
514f1c67 2520
6810fc91
DA
2521static int trace__replay(struct trace *trace)
2522{
2523 const struct perf_evsel_str_handler handlers[] = {
c522739d 2524 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2525 };
f5fc1412
JO
2526 struct perf_data_file file = {
2527 .path = input_name,
2528 .mode = PERF_DATA_MODE_READ,
e366a6d8 2529 .force = trace->force,
f5fc1412 2530 };
6810fc91 2531 struct perf_session *session;
003824e8 2532 struct perf_evsel *evsel;
6810fc91
DA
2533 int err = -1;
2534
2535 trace->tool.sample = trace__process_sample;
2536 trace->tool.mmap = perf_event__process_mmap;
384c671e 2537 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2538 trace->tool.comm = perf_event__process_comm;
2539 trace->tool.exit = perf_event__process_exit;
2540 trace->tool.fork = perf_event__process_fork;
2541 trace->tool.attr = perf_event__process_attr;
f3b3614a 2542 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2543 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2544 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2545
0a8cb85c 2546 trace->tool.ordered_events = true;
6810fc91
DA
2547 trace->tool.ordering_requires_timestamps = true;
2548
2549 /* add tid to output */
2550 trace->multiple_threads = true;
2551
f5fc1412 2552 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2553 if (session == NULL)
52e02834 2554 return -1;
6810fc91 2555
aa07df6e
DA
2556 if (trace->opts.target.pid)
2557 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2558
2559 if (trace->opts.target.tid)
2560 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2561
0a7e6d1b 2562 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2563 goto out;
2564
8fb598e5
DA
2565 trace->host = &session->machines.host;
2566
6810fc91
DA
2567 err = perf_session__set_tracepoints_handlers(session, handlers);
2568 if (err)
2569 goto out;
2570
003824e8
NK
2571 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2572 "raw_syscalls:sys_enter");
9aca7f17
DA
2573 /* older kernels have syscalls tp versus raw_syscalls */
2574 if (evsel == NULL)
2575 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2576 "syscalls:sys_enter");
003824e8 2577
e281a960
SF
2578 if (evsel &&
2579 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2580 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2581 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2582 goto out;
2583 }
2584
2585 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2586 "raw_syscalls:sys_exit");
9aca7f17
DA
2587 if (evsel == NULL)
2588 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2589 "syscalls:sys_exit");
e281a960
SF
2590 if (evsel &&
2591 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2592 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2593 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2594 goto out;
2595 }
2596
e5cadb93 2597 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2598 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2599 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2600 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2601 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2602 evsel->handler = trace__pgfault;
2603 }
2604
6810fc91
DA
2605 setup_pager();
2606
b7b61cbe 2607 err = perf_session__process_events(session);
6810fc91
DA
2608 if (err)
2609 pr_err("Failed to process events, error %d", err);
2610
bf2575c1
DA
2611 else if (trace->summary)
2612 trace__fprintf_thread_summary(trace, trace->output);
2613
6810fc91
DA
2614out:
2615 perf_session__delete(session);
2616
2617 return err;
2618}
2619
1302d88e
ACM
2620static size_t trace__fprintf_threads_header(FILE *fp)
2621{
2622 size_t printed;
2623
99ff7150 2624 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2625
2626 return printed;
2627}
2628
b535d523
ACM
2629DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2630 struct stats *stats;
2631 double msecs;
2632 int syscall;
2633)
2634{
2635 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2636 struct stats *stats = source->priv;
2637
2638 entry->syscall = source->i;
2639 entry->stats = stats;
2640 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2641}
2642
bf2575c1
DA
2643static size_t thread__dump_stats(struct thread_trace *ttrace,
2644 struct trace *trace, FILE *fp)
2645{
bf2575c1
DA
2646 size_t printed = 0;
2647 struct syscall *sc;
b535d523
ACM
2648 struct rb_node *nd;
2649 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2650
b535d523 2651 if (syscall_stats == NULL)
bf2575c1
DA
2652 return 0;
2653
2654 printed += fprintf(fp, "\n");
2655
834fd46d
MW
2656 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2657 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2658 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2659
98a91837 2660 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2661 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2662 if (stats) {
2663 double min = (double)(stats->min) / NSEC_PER_MSEC;
2664 double max = (double)(stats->max) / NSEC_PER_MSEC;
2665 double avg = avg_stats(stats);
2666 double pct;
2667 u64 n = (u64) stats->n;
2668
2669 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2670 avg /= NSEC_PER_MSEC;
2671
b535d523 2672 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2673 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2674 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2675 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2676 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2677 }
bf2575c1
DA
2678 }
2679
b535d523 2680 resort_rb__delete(syscall_stats);
bf2575c1 2681 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2682
2683 return printed;
2684}
2685
96c14451 2686static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2687{
96c14451 2688 size_t printed = 0;
89dceb22 2689 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2690 double ratio;
2691
2692 if (ttrace == NULL)
2693 return 0;
2694
2695 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2696
15e65c69 2697 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2698 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2699 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2700 if (ttrace->pfmaj)
2701 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2702 if (ttrace->pfmin)
2703 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2704 if (trace->sched)
2705 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2706 else if (fputc('\n', fp) != EOF)
2707 ++printed;
2708
bf2575c1 2709 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2710
96c14451
ACM
2711 return printed;
2712}
896cbb56 2713
96c14451
ACM
2714static unsigned long thread__nr_events(struct thread_trace *ttrace)
2715{
2716 return ttrace ? ttrace->nr_events : 0;
2717}
2718
2719DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2720 struct thread *thread;
2721)
2722{
2723 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2724}
2725
1302d88e
ACM
2726static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2727{
96c14451
ACM
2728 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2729 size_t printed = trace__fprintf_threads_header(fp);
2730 struct rb_node *nd;
1302d88e 2731
96c14451
ACM
2732 if (threads == NULL) {
2733 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2734 return 0;
2735 }
2736
98a91837 2737 resort_rb__for_each_entry(nd, threads)
96c14451 2738 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2739
96c14451
ACM
2740 resort_rb__delete(threads);
2741
2742 return printed;
1302d88e
ACM
2743}
2744
ae9ed035
ACM
2745static int trace__set_duration(const struct option *opt, const char *str,
2746 int unset __maybe_unused)
2747{
2748 struct trace *trace = opt->value;
2749
2750 trace->duration_filter = atof(str);
2751 return 0;
2752}
2753
f078c385
ACM
2754static int trace__set_filter_pids(const struct option *opt, const char *str,
2755 int unset __maybe_unused)
2756{
2757 int ret = -1;
2758 size_t i;
2759 struct trace *trace = opt->value;
2760 /*
2761 * FIXME: introduce a intarray class, plain parse csv and create a
2762 * { int nr, int entries[] } struct...
2763 */
2764 struct intlist *list = intlist__new(str);
2765
2766 if (list == NULL)
2767 return -1;
2768
2769 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2770 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2771
2772 if (trace->filter_pids.entries == NULL)
2773 goto out;
2774
2775 trace->filter_pids.entries[0] = getpid();
2776
2777 for (i = 1; i < trace->filter_pids.nr; ++i)
2778 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2779
2780 intlist__delete(list);
2781 ret = 0;
2782out:
2783 return ret;
2784}
2785
c24ff998
ACM
2786static int trace__open_output(struct trace *trace, const char *filename)
2787{
2788 struct stat st;
2789
2790 if (!stat(filename, &st) && st.st_size) {
2791 char oldname[PATH_MAX];
2792
2793 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2794 unlink(oldname);
2795 rename(filename, oldname);
2796 }
2797
2798 trace->output = fopen(filename, "w");
2799
2800 return trace->output == NULL ? -errno : 0;
2801}
2802
598d02c5
SF
2803static int parse_pagefaults(const struct option *opt, const char *str,
2804 int unset __maybe_unused)
2805{
2806 int *trace_pgfaults = opt->value;
2807
2808 if (strcmp(str, "all") == 0)
2809 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2810 else if (strcmp(str, "maj") == 0)
2811 *trace_pgfaults |= TRACE_PFMAJ;
2812 else if (strcmp(str, "min") == 0)
2813 *trace_pgfaults |= TRACE_PFMIN;
2814 else
2815 return -1;
2816
2817 return 0;
2818}
2819
14a052df
ACM
2820static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2821{
2822 struct perf_evsel *evsel;
2823
e5cadb93 2824 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2825 evsel->handler = handler;
2826}
2827
017037ff
ACM
2828/*
2829 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2830 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2831 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2832 *
2833 * It'd be better to introduce a parse_options() variant that would return a
2834 * list with the terms it didn't match to an event...
2835 */
2836static int trace__parse_events_option(const struct option *opt, const char *str,
2837 int unset __maybe_unused)
2838{
2839 struct trace *trace = (struct trace *)opt->value;
2840 const char *s = str;
2841 char *sep = NULL, *lists[2] = { NULL, NULL, };
2842 int len = strlen(str), err = -1, list;
2843 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2844 char group_name[PATH_MAX];
2845
2846 if (strace_groups_dir == NULL)
2847 return -1;
2848
2849 if (*s == '!') {
2850 ++s;
2851 trace->not_ev_qualifier = true;
2852 }
2853
2854 while (1) {
2855 if ((sep = strchr(s, ',')) != NULL)
2856 *sep = '\0';
2857
2858 list = 0;
2859 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2860 list = 1;
2861 } else {
2862 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2863 if (access(group_name, R_OK) == 0)
2864 list = 1;
2865 }
2866
2867 if (lists[list]) {
2868 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2869 } else {
2870 lists[list] = malloc(len);
2871 if (lists[list] == NULL)
2872 goto out;
2873 strcpy(lists[list], s);
2874 }
2875
2876 if (!sep)
2877 break;
2878
2879 *sep = ',';
2880 s = sep + 1;
2881 }
2882
2883 if (lists[1] != NULL) {
2884 struct strlist_config slist_config = {
2885 .dirname = strace_groups_dir,
2886 };
2887
2888 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2889 if (trace->ev_qualifier == NULL) {
2890 fputs("Not enough memory to parse event qualifier", trace->output);
2891 goto out;
2892 }
2893
2894 if (trace__validate_ev_qualifier(trace))
2895 goto out;
2896 }
2897
2898 err = 0;
2899
2900 if (lists[0]) {
2901 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2902 "event selector. use 'perf list' to list available events",
2903 parse_events_option);
2904 err = parse_events_option(&o, lists[0], 0);
2905 }
2906out:
2907 if (sep)
2908 *sep = ',';
2909
2910 return err;
2911}
2912
b0ad8ea6 2913int cmd_trace(int argc, const char **argv)
514f1c67 2914{
6fdd9cb7 2915 const char *trace_usage[] = {
f15eb531
NK
2916 "perf trace [<options>] [<command>]",
2917 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2918 "perf trace record [<options>] [<command>]",
2919 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2920 NULL
2921 };
2922 struct trace trace = {
514f1c67
ACM
2923 .syscalls = {
2924 . max = -1,
2925 },
2926 .opts = {
2927 .target = {
2928 .uid = UINT_MAX,
2929 .uses_mmap = true,
2930 },
2931 .user_freq = UINT_MAX,
2932 .user_interval = ULLONG_MAX,
509051ea 2933 .no_buffering = true,
38d5447d 2934 .mmap_pages = UINT_MAX,
9d9cad76 2935 .proc_map_timeout = 500,
514f1c67 2936 },
007d66a0 2937 .output = stderr,
50c95cbd 2938 .show_comm = true,
e281a960 2939 .trace_syscalls = true,
44621819 2940 .kernel_syscallchains = false,
05614993 2941 .max_stack = UINT_MAX,
514f1c67 2942 };
c24ff998 2943 const char *output_name = NULL;
514f1c67 2944 const struct option trace_options[] = {
017037ff
ACM
2945 OPT_CALLBACK('e', "event", &trace, "event",
2946 "event/syscall selector. use 'perf list' to list available events",
2947 trace__parse_events_option),
50c95cbd
ACM
2948 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2949 "show the thread COMM next to its id"),
c522739d 2950 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2951 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2952 trace__parse_events_option),
c24ff998 2953 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2954 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2955 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2956 "trace events on existing process id"),
ac9be8ee 2957 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2958 "trace events on existing thread id"),
fa0e4ffe
ACM
2959 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2960 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2961 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2962 "system-wide collection from all CPUs"),
ac9be8ee 2963 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2964 "list of cpus to monitor"),
6810fc91 2965 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2966 "child tasks do not inherit counters"),
994a1f78
JO
2967 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2968 "number of mmap data pages",
2969 perf_evlist__parse_mmap_pages),
ac9be8ee 2970 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2971 "user to profile"),
ae9ed035
ACM
2972 OPT_CALLBACK(0, "duration", &trace, "float",
2973 "show only events with duration > N.M ms",
2974 trace__set_duration),
1302d88e 2975 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2976 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2977 OPT_BOOLEAN('T', "time", &trace.full_time,
2978 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2979 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2980 "Show only syscall summary with statistics"),
2981 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2982 "Show all syscalls and summary with statistics"),
598d02c5
SF
2983 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2984 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2985 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2986 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2987 OPT_CALLBACK(0, "call-graph", &trace.opts,
2988 "record_mode[,record_size]", record_callchain_help,
2989 &record_parse_callchain_opt),
44621819
ACM
2990 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2991 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2992 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2993 "Set the minimum stack depth when parsing the callchain, "
2994 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2995 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2996 "Set the maximum stack depth when parsing the callchain, "
2997 "anything beyond the specified depth will be ignored. "
4cb93446 2998 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2999 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3000 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
3001 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3002 "ms to wait before starting measurement after program "
3003 "start"),
514f1c67
ACM
3004 OPT_END()
3005 };
ccd62a89 3006 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3007 bool mmap_pages_user_set = true;
6fdd9cb7 3008 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 3009 int err;
32caf0d1 3010 char bf[BUFSIZ];
514f1c67 3011
4d08cb80
ACM
3012 signal(SIGSEGV, sighandler_dump_stack);
3013 signal(SIGFPE, sighandler_dump_stack);
3014
14a052df 3015 trace.evlist = perf_evlist__new();
fd0db102 3016 trace.sctbl = syscalltbl__new();
14a052df 3017
fd0db102 3018 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3019 pr_err("Not enough memory to run!\n");
ff8f695c 3020 err = -ENOMEM;
14a052df
ACM
3021 goto out;
3022 }
3023
6fdd9cb7
YS
3024 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3025 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3026
d7888573
WN
3027 err = bpf__setup_stdout(trace.evlist);
3028 if (err) {
3029 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3030 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3031 goto out;
3032 }
3033
59247e33
ACM
3034 err = -1;
3035
598d02c5
SF
3036 if (trace.trace_pgfaults) {
3037 trace.opts.sample_address = true;
3038 trace.opts.sample_time = true;
3039 }
3040
f3e459d1
ACM
3041 if (trace.opts.mmap_pages == UINT_MAX)
3042 mmap_pages_user_set = false;
3043
05614993 3044 if (trace.max_stack == UINT_MAX) {
fe176085 3045 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
3046 max_stack_user_set = false;
3047 }
3048
3049#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 3050 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
3051 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3052#endif
3053
2ddd5c04 3054 if (callchain_param.enabled) {
f3e459d1
ACM
3055 if (!mmap_pages_user_set && geteuid() == 0)
3056 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3057
566a0885 3058 symbol_conf.use_callchain = true;
f3e459d1 3059 }
566a0885 3060
14a052df
ACM
3061 if (trace.evlist->nr_entries > 0)
3062 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3063
1e28fe0a
SF
3064 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3065 return trace__record(&trace, argc-1, &argv[1]);
3066
3067 /* summary_only implies summary option, but don't overwrite summary if set */
3068 if (trace.summary_only)
3069 trace.summary = trace.summary_only;
3070
726f3234
ACM
3071 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3072 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
3073 pr_err("Please specify something to trace.\n");
3074 return -1;
3075 }
3076
017037ff 3077 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
3078 pr_err("The -e option can't be used with --no-syscalls.\n");
3079 goto out;
3080 }
3081
c24ff998
ACM
3082 if (output_name != NULL) {
3083 err = trace__open_output(&trace, output_name);
3084 if (err < 0) {
3085 perror("failed to create output file");
3086 goto out;
3087 }
3088 }
3089
fd0db102
ACM
3090 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3091
602ad878 3092 err = target__validate(&trace.opts.target);
32caf0d1 3093 if (err) {
602ad878 3094 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3095 fprintf(trace.output, "%s", bf);
3096 goto out_close;
32caf0d1
NK
3097 }
3098
602ad878 3099 err = target__parse_uid(&trace.opts.target);
514f1c67 3100 if (err) {
602ad878 3101 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3102 fprintf(trace.output, "%s", bf);
3103 goto out_close;
514f1c67
ACM
3104 }
3105
602ad878 3106 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3107 trace.opts.target.system_wide = true;
3108
6810fc91
DA
3109 if (input_name)
3110 err = trace__replay(&trace);
3111 else
3112 err = trace__run(&trace, argc, argv);
1302d88e 3113
c24ff998
ACM
3114out_close:
3115 if (output_name != NULL)
3116 fclose(trace.output);
3117out:
1302d88e 3118 return err;
514f1c67 3119}