perf trace: Introduce syscall__augmented_args() method
[linux-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
9ea42ba4 22#include "util/cgroup.h"
752fde44 23#include "util/color.h"
7c304ee0 24#include "util/debug.h"
092bd3cd 25#include "util/env.h"
5ab8c689 26#include "util/event.h"
514f1c67 27#include "util/evlist.h"
4b6ab94e 28#include <subcmd/exec-cmd.h>
752fde44 29#include "util/machine.h"
9a3993d4 30#include "util/path.h"
6810fc91 31#include "util/session.h"
752fde44 32#include "util/thread.h"
4b6ab94e 33#include <subcmd/parse-options.h>
2ae3a312 34#include "util/strlist.h"
bdc89661 35#include "util/intlist.h"
514f1c67 36#include "util/thread_map.h"
bf2575c1 37#include "util/stat.h"
fd5cead2 38#include "trace/beauty/beauty.h"
97978b3e 39#include "trace-event.h"
9aca7f17 40#include "util/parse-events.h"
ba504235 41#include "util/bpf-loader.h"
566a0885 42#include "callchain.h"
fea01392 43#include "print_binary.h"
a067558e 44#include "string2.h"
fd0db102 45#include "syscalltbl.h"
96c14451 46#include "rb_resort.h"
514f1c67 47
a43783ae 48#include <errno.h>
fd20e811 49#include <inttypes.h>
4208735d 50#include <poll.h>
9607ad3a 51#include <signal.h>
514f1c67 52#include <stdlib.h>
017037ff 53#include <string.h>
8dd2a131 54#include <linux/err.h>
997bba8c 55#include <linux/filter.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
bafae98e 60#include <fcntl.h>
514f1c67 61
3d689ed6
ACM
62#include "sane_ctype.h"
63
c188e7ac
ACM
64#ifndef O_CLOEXEC
65# define O_CLOEXEC 02000000
66#endif
67
83a51694
ACM
68#ifndef F_LINUX_SPECIFIC_BASE
69# define F_LINUX_SPECIFIC_BASE 1024
70#endif
71
d1d438a3
ACM
72struct trace {
73 struct perf_tool tool;
fd0db102 74 struct syscalltbl *sctbl;
d1d438a3
ACM
75 struct {
76 int max;
77 struct syscall *table;
78 struct {
79 struct perf_evsel *sys_enter,
d3d1c4bd
ACM
80 *sys_exit,
81 *augmented;
d1d438a3
ACM
82 } events;
83 } syscalls;
84 struct record_opts opts;
85 struct perf_evlist *evlist;
86 struct machine *host;
87 struct thread *current;
9ea42ba4 88 struct cgroup *cgroup;
d1d438a3
ACM
89 u64 base_time;
90 FILE *output;
91 unsigned long nr_events;
92 struct strlist *ev_qualifier;
93 struct {
94 size_t nr;
95 int *entries;
96 } ev_qualifier_ids;
d1d438a3
ACM
97 struct {
98 size_t nr;
99 pid_t *entries;
100 } filter_pids;
101 double duration_filter;
102 double runtime_ms;
103 struct {
104 u64 vfs_getname,
105 proc_getname;
106 } stats;
c6d4a494 107 unsigned int max_stack;
5cf9c84e 108 unsigned int min_stack;
d1d438a3
ACM
109 bool not_ev_qualifier;
110 bool live;
111 bool full_time;
112 bool sched;
113 bool multiple_threads;
114 bool summary;
115 bool summary_only;
0a6545bd 116 bool failure_only;
d1d438a3 117 bool show_comm;
591421e1 118 bool print_sample;
d1d438a3
ACM
119 bool show_tool_stats;
120 bool trace_syscalls;
44621819 121 bool kernel_syscallchains;
d1d438a3
ACM
122 bool force;
123 bool vfs_getname;
124 int trace_pgfaults;
125};
a1c2552d 126
77170988
ACM
127struct tp_field {
128 int offset;
129 union {
130 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
131 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
132 };
133};
134
135#define TP_UINT_FIELD(bits) \
136static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
137{ \
55d43bca
DA
138 u##bits value; \
139 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
140 return value; \
77170988
ACM
141}
142
143TP_UINT_FIELD(8);
144TP_UINT_FIELD(16);
145TP_UINT_FIELD(32);
146TP_UINT_FIELD(64);
147
148#define TP_UINT_FIELD__SWAPPED(bits) \
149static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
150{ \
55d43bca
DA
151 u##bits value; \
152 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
153 return bswap_##bits(value);\
154}
155
156TP_UINT_FIELD__SWAPPED(16);
157TP_UINT_FIELD__SWAPPED(32);
158TP_UINT_FIELD__SWAPPED(64);
159
aa823f58 160static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
77170988 161{
aa823f58 162 field->offset = offset;
77170988 163
aa823f58 164 switch (size) {
77170988
ACM
165 case 1:
166 field->integer = tp_field__u8;
167 break;
168 case 2:
169 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
170 break;
171 case 4:
172 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
173 break;
174 case 8:
175 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
176 break;
177 default:
178 return -1;
179 }
180
181 return 0;
182}
183
aa823f58
ACM
184static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
185{
186 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
187}
188
77170988
ACM
189static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
190{
191 return sample->raw_data + field->offset;
192}
193
aa823f58 194static int __tp_field__init_ptr(struct tp_field *field, int offset)
77170988 195{
aa823f58 196 field->offset = offset;
77170988
ACM
197 field->pointer = tp_field__ptr;
198 return 0;
199}
200
aa823f58
ACM
201static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
202{
203 return __tp_field__init_ptr(field, format_field->offset);
204}
205
77170988
ACM
206struct syscall_tp {
207 struct tp_field id;
208 union {
209 struct tp_field args, ret;
210 };
211};
212
213static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
214 struct tp_field *field,
215 const char *name)
216{
217 struct format_field *format_field = perf_evsel__field(evsel, name);
218
219 if (format_field == NULL)
220 return -1;
221
222 return tp_field__init_uint(field, format_field, evsel->needs_swap);
223}
224
225#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
226 ({ struct syscall_tp *sc = evsel->priv;\
227 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
228
229static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
230 struct tp_field *field,
231 const char *name)
232{
233 struct format_field *format_field = perf_evsel__field(evsel, name);
234
235 if (format_field == NULL)
236 return -1;
237
238 return tp_field__init_ptr(field, format_field);
239}
240
241#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
242 ({ struct syscall_tp *sc = evsel->priv;\
243 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
244
245static void perf_evsel__delete_priv(struct perf_evsel *evsel)
246{
04662523 247 zfree(&evsel->priv);
77170988
ACM
248 perf_evsel__delete(evsel);
249}
250
d32855fa
ACM
251static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
252{
253 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
254
255 if (evsel->priv != NULL) {
256 if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
257 goto out_delete;
258 return 0;
259 }
260
261 return -ENOMEM;
262out_delete:
263 zfree(&evsel->priv);
264 return -ENOENT;
265}
266
d3d1c4bd
ACM
267static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
268{
269 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
270
271 if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
272 if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
273 goto out_delete;
274
275 return 0;
276 }
277
278 return -ENOMEM;
279out_delete:
280 zfree(&evsel->priv);
281 return -EINVAL;
282}
283
284static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
285{
286 struct syscall_tp *sc = evsel->priv;
287
288 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
289}
290
63f11c80 291static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
96695d44
NK
292{
293 evsel->priv = malloc(sizeof(struct syscall_tp));
294 if (evsel->priv != NULL) {
295 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
296 goto out_delete;
297
298 evsel->handler = handler;
299 return 0;
300 }
301
302 return -ENOMEM;
303
304out_delete:
04662523 305 zfree(&evsel->priv);
96695d44
NK
306 return -ENOENT;
307}
308
63f11c80 309static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
77170988 310{
ef503831 311 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 312
9aca7f17 313 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 314 if (IS_ERR(evsel))
9aca7f17
DA
315 evsel = perf_evsel__newtp("syscalls", direction);
316
8dd2a131
JO
317 if (IS_ERR(evsel))
318 return NULL;
319
63f11c80 320 if (perf_evsel__init_raw_syscall_tp(evsel, handler))
8dd2a131 321 goto out_delete;
77170988
ACM
322
323 return evsel;
324
325out_delete:
326 perf_evsel__delete_priv(evsel);
327 return NULL;
328}
329
330#define perf_evsel__sc_tp_uint(evsel, name, sample) \
331 ({ struct syscall_tp *fields = evsel->priv; \
332 fields->name.integer(&fields->name, sample); })
333
334#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
335 ({ struct syscall_tp *fields = evsel->priv; \
336 fields->name.pointer(&fields->name, sample); })
337
0ae79636
ACM
338size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
339{
340 int idx = val - sa->offset;
1f115cb7 341
bc972ada 342 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
0ae79636 343 return scnprintf(bf, size, intfmt, val);
1f115cb7 344
0ae79636 345 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
346}
347
975b7c2f
ACM
348static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
349 const char *intfmt,
350 struct syscall_arg *arg)
1f115cb7 351{
0ae79636 352 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
353}
354
975b7c2f
ACM
355static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
356 struct syscall_arg *arg)
357{
358 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
359}
360
1f115cb7
ACM
361#define SCA_STRARRAY syscall_arg__scnprintf_strarray
362
83a51694
ACM
363struct strarrays {
364 int nr_entries;
365 struct strarray **entries;
366};
367
368#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
369 .nr_entries = ARRAY_SIZE(array), \
370 .entries = array, \
371}
372
274e86fd
ACM
373size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
374 struct syscall_arg *arg)
83a51694
ACM
375{
376 struct strarrays *sas = arg->parm;
377 int i;
378
379 for (i = 0; i < sas->nr_entries; ++i) {
380 struct strarray *sa = sas->entries[i];
381 int idx = arg->val - sa->offset;
382
383 if (idx >= 0 && idx < sa->nr_entries) {
384 if (sa->entries[idx] == NULL)
385 break;
386 return scnprintf(bf, size, "%s", sa->entries[idx]);
387 }
388 }
389
390 return scnprintf(bf, size, "%d", arg->val);
391}
392
48e1f91a
ACM
393#ifndef AT_FDCWD
394#define AT_FDCWD -100
395#endif
396
75b757ca
ACM
397static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
398 struct syscall_arg *arg)
399{
400 int fd = arg->val;
401
402 if (fd == AT_FDCWD)
403 return scnprintf(bf, size, "CWD");
404
405 return syscall_arg__scnprintf_fd(bf, size, arg);
406}
407
408#define SCA_FDAT syscall_arg__scnprintf_fd_at
409
410static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
411 struct syscall_arg *arg);
412
413#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
414
2c2b1623 415size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 416{
01533e97 417 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
418}
419
2c2b1623 420size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
421{
422 return scnprintf(bf, size, "%d", arg->val);
423}
424
5dde91ed
ACM
425size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
426{
427 return scnprintf(bf, size, "%ld", arg->val);
428}
429
729a7841
ACM
430static const char *bpf_cmd[] = {
431 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
432 "MAP_GET_NEXT_KEY", "PROG_LOAD",
433};
434static DEFINE_STRARRAY(bpf_cmd);
435
03e3adc9
ACM
436static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
437static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 438
1f115cb7
ACM
439static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
440static DEFINE_STRARRAY(itimers);
441
b62bee1b
ACM
442static const char *keyctl_options[] = {
443 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
444 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
445 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
446 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
447 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
448};
449static DEFINE_STRARRAY(keyctl_options);
450
efe6b882
ACM
451static const char *whences[] = { "SET", "CUR", "END",
452#ifdef SEEK_DATA
453"DATA",
454#endif
455#ifdef SEEK_HOLE
456"HOLE",
457#endif
458};
459static DEFINE_STRARRAY(whences);
f9da0b0c 460
80f587d5
ACM
461static const char *fcntl_cmds[] = {
462 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
463 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
464 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
465 "GETOWNER_UIDS",
80f587d5
ACM
466};
467static DEFINE_STRARRAY(fcntl_cmds);
468
83a51694
ACM
469static const char *fcntl_linux_specific_cmds[] = {
470 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
471 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 472 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
473};
474
475static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
476
477static struct strarray *fcntl_cmds_arrays[] = {
478 &strarray__fcntl_cmds,
479 &strarray__fcntl_linux_specific_cmds,
480};
481
482static DEFINE_STRARRAYS(fcntl_cmds_arrays);
483
c045bf02
ACM
484static const char *rlimit_resources[] = {
485 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
486 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
487 "RTTIME",
488};
489static DEFINE_STRARRAY(rlimit_resources);
490
eb5b1b14
ACM
491static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
492static DEFINE_STRARRAY(sighow);
493
4f8c1b74
DA
494static const char *clockid[] = {
495 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
496 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
497 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
498};
499static DEFINE_STRARRAY(clockid);
500
51108999
ACM
501static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
502 struct syscall_arg *arg)
503{
504 size_t printed = 0;
505 int mode = arg->val;
506
507 if (mode == F_OK) /* 0 */
508 return scnprintf(bf, size, "F");
509#define P_MODE(n) \
510 if (mode & n##_OK) { \
511 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
512 mode &= ~n##_OK; \
513 }
514
515 P_MODE(R);
516 P_MODE(W);
517 P_MODE(X);
518#undef P_MODE
519
520 if (mode)
521 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
522
523 return printed;
524}
525
526#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
527
f994592d
ACM
528static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
529 struct syscall_arg *arg);
530
531#define SCA_FILENAME syscall_arg__scnprintf_filename
532
46cce19b
ACM
533static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
534 struct syscall_arg *arg)
535{
536 int printed = 0, flags = arg->val;
537
538#define P_FLAG(n) \
539 if (flags & O_##n) { \
540 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
541 flags &= ~O_##n; \
542 }
543
544 P_FLAG(CLOEXEC);
545 P_FLAG(NONBLOCK);
546#undef P_FLAG
547
548 if (flags)
549 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
550
551 return printed;
552}
553
554#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
555
a355a61e
ACM
556#ifndef GRND_NONBLOCK
557#define GRND_NONBLOCK 0x0001
558#endif
559#ifndef GRND_RANDOM
560#define GRND_RANDOM 0x0002
561#endif
562
39878d49
ACM
563static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
564 struct syscall_arg *arg)
565{
566 int printed = 0, flags = arg->val;
567
568#define P_FLAG(n) \
569 if (flags & GRND_##n) { \
570 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
571 flags &= ~GRND_##n; \
572 }
573
574 P_FLAG(RANDOM);
575 P_FLAG(NONBLOCK);
576#undef P_FLAG
577
578 if (flags)
579 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
580
581 return printed;
582}
583
584#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
585
82d4a110
ACM
586#define STRARRAY(name, array) \
587 { .scnprintf = SCA_STRARRAY, \
588 .parm = &strarray__##array, }
453350dd 589
092bd3cd 590#include "trace/beauty/arch_errno_names.c"
ea8dc3ce 591#include "trace/beauty/eventfd.c"
d5d71e86 592#include "trace/beauty/futex_op.c"
3258abe0 593#include "trace/beauty/futex_val3.c"
df4cb167 594#include "trace/beauty/mmap.c"
ba2f22cf 595#include "trace/beauty/mode_t.c"
a30e6259 596#include "trace/beauty/msg_flags.c"
8f48df69 597#include "trace/beauty/open_flags.c"
62de344e 598#include "trace/beauty/perf_event_open.c"
d5d71e86 599#include "trace/beauty/pid.c"
a3bca91f 600#include "trace/beauty/sched_policy.c"
f5cd95ea 601#include "trace/beauty/seccomp.c"
12199d8e 602#include "trace/beauty/signum.c"
bbf86c43 603#include "trace/beauty/socket_type.c"
7206b900 604#include "trace/beauty/waitid_options.c"
a3bca91f 605
82d4a110
ACM
606struct syscall_arg_fmt {
607 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
608 void *parm;
c51bdfec 609 const char *name;
d47737d5 610 bool show_zero;
82d4a110
ACM
611};
612
514f1c67
ACM
613static struct syscall_fmt {
614 const char *name;
aec1930b 615 const char *alias;
82d4a110 616 struct syscall_arg_fmt arg[6];
332337da 617 u8 nr_args;
11c8e39f 618 bool errpid;
514f1c67 619 bool timeout;
04b34729 620 bool hexret;
514f1c67 621} syscall_fmts[] = {
1f63139c 622 { .name = "access",
82d4a110 623 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
02ef2884
ACM
624 { .name = "bind",
625 .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, },
1f63139c 626 { .name = "bpf",
82d4a110 627 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 628 { .name = "brk", .hexret = true,
82d4a110 629 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 630 { .name = "clock_gettime",
82d4a110 631 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
33396a3a
ACM
632 { .name = "clone", .errpid = true, .nr_args = 5,
633 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
634 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
635 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
636 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
637 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
1f63139c 638 { .name = "close",
82d4a110 639 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
d5a7e661
ACM
640 { .name = "connect",
641 .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, },
1f63139c 642 { .name = "epoll_ctl",
82d4a110 643 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 644 { .name = "eventfd2",
82d4a110 645 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 646 { .name = "fchmodat",
82d4a110 647 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 648 { .name = "fchownat",
82d4a110 649 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 650 { .name = "fcntl",
82d4a110 651 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
652 .parm = &strarrays__fcntl_cmds_arrays,
653 .show_zero = true, },
82d4a110 654 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 655 { .name = "flock",
82d4a110 656 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
657 { .name = "fstat", .alias = "newfstat", },
658 { .name = "fstatat", .alias = "newfstatat", },
659 { .name = "futex",
3258abe0
ACM
660 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
661 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
1f63139c 662 { .name = "futimesat",
82d4a110 663 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 664 { .name = "getitimer",
82d4a110 665 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 666 { .name = "getpid", .errpid = true, },
d1d438a3 667 { .name = "getpgid", .errpid = true, },
c65f1070 668 { .name = "getppid", .errpid = true, },
1f63139c 669 { .name = "getrandom",
82d4a110 670 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 671 { .name = "getrlimit",
82d4a110 672 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
2d1073de 673 { .name = "gettid", .errpid = true, },
1f63139c 674 { .name = "ioctl",
82d4a110 675 .arg = {
844ae5b4
ACM
676#if defined(__i386__) || defined(__x86_64__)
677/*
678 * FIXME: Make this available to all arches.
679 */
1cc47f2d 680 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
82d4a110 681 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 682#else
82d4a110 683 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 684#endif
1de3038d
ACM
685 { .name = "kcmp", .nr_args = 5,
686 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
687 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
688 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
689 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
690 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1f63139c 691 { .name = "keyctl",
82d4a110 692 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 693 { .name = "kill",
82d4a110 694 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 695 { .name = "linkat",
82d4a110 696 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 697 { .name = "lseek",
82d4a110 698 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
699 { .name = "lstat", .alias = "newlstat", },
700 { .name = "madvise",
82d4a110
ACM
701 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
702 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 703 { .name = "mkdirat",
82d4a110 704 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 705 { .name = "mknodat",
82d4a110 706 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 707 { .name = "mlock",
82d4a110 708 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 709 { .name = "mlockall",
82d4a110 710 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 711 { .name = "mmap", .hexret = true,
54265664
JO
712/* The standard mmap maps to old_mmap on s390x */
713#if defined(__s390x__)
714 .alias = "old_mmap",
715#endif
82d4a110
ACM
716 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
717 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
718 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 719 { .name = "mprotect",
82d4a110
ACM
720 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
721 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 722 { .name = "mq_unlink",
82d4a110 723 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 724 { .name = "mremap", .hexret = true,
82d4a110
ACM
725 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
726 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
727 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 728 { .name = "munlock",
82d4a110 729 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 730 { .name = "munmap",
82d4a110 731 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 732 { .name = "name_to_handle_at",
82d4a110 733 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 734 { .name = "newfstatat",
82d4a110 735 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 736 { .name = "open",
82d4a110 737 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 738 { .name = "open_by_handle_at",
82d4a110
ACM
739 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
740 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 741 { .name = "openat",
82d4a110
ACM
742 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
743 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 744 { .name = "perf_event_open",
82d4a110
ACM
745 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
746 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
747 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 748 { .name = "pipe2",
82d4a110 749 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
83bc9c37
ACM
750 { .name = "pkey_alloc",
751 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
752 { .name = "pkey_free",
753 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
754 { .name = "pkey_mprotect",
755 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
756 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
757 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
1f63139c
ACM
758 { .name = "poll", .timeout = true, },
759 { .name = "ppoll", .timeout = true, },
d688d037
ACM
760 { .name = "prctl", .alias = "arch_prctl",
761 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
762 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
763 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1f63139c
ACM
764 { .name = "pread", .alias = "pread64", },
765 { .name = "preadv", .alias = "pread", },
766 { .name = "prlimit64",
82d4a110 767 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
768 { .name = "pwrite", .alias = "pwrite64", },
769 { .name = "readlinkat",
82d4a110 770 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 771 { .name = "recvfrom",
82d4a110 772 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 773 { .name = "recvmmsg",
82d4a110 774 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 775 { .name = "recvmsg",
82d4a110 776 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 777 { .name = "renameat",
82d4a110 778 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 779 { .name = "rt_sigaction",
82d4a110 780 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 781 { .name = "rt_sigprocmask",
82d4a110 782 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 783 { .name = "rt_sigqueueinfo",
82d4a110 784 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 785 { .name = "rt_tgsigqueueinfo",
82d4a110 786 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 787 { .name = "sched_setscheduler",
82d4a110 788 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 789 { .name = "seccomp",
82d4a110
ACM
790 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
791 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
792 { .name = "select", .timeout = true, },
793 { .name = "sendmmsg",
82d4a110 794 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 795 { .name = "sendmsg",
82d4a110 796 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 797 { .name = "sendto",
6ebb6862
ACM
798 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
799 [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
c65f1070 800 { .name = "set_tid_address", .errpid = true, },
1f63139c 801 { .name = "setitimer",
82d4a110 802 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 803 { .name = "setrlimit",
82d4a110 804 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 805 { .name = "socket",
82d4a110 806 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
807 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
808 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c 809 { .name = "socketpair",
82d4a110 810 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
811 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
812 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c
ACM
813 { .name = "stat", .alias = "newstat", },
814 { .name = "statx",
82d4a110
ACM
815 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
816 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
817 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 818 { .name = "swapoff",
82d4a110 819 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 820 { .name = "swapon",
82d4a110 821 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 822 { .name = "symlinkat",
82d4a110 823 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 824 { .name = "tgkill",
82d4a110 825 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 826 { .name = "tkill",
82d4a110 827 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
3de3e8bb 828 { .name = "umount2", .alias = "umount", },
1f63139c
ACM
829 { .name = "uname", .alias = "newuname", },
830 { .name = "unlinkat",
82d4a110 831 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 832 { .name = "utimensat",
82d4a110 833 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 834 { .name = "wait4", .errpid = true,
82d4a110 835 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 836 { .name = "waitid", .errpid = true,
82d4a110 837 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
838};
839
840static int syscall_fmt__cmp(const void *name, const void *fmtp)
841{
842 const struct syscall_fmt *fmt = fmtp;
843 return strcmp(name, fmt->name);
844}
845
846static struct syscall_fmt *syscall_fmt__find(const char *name)
847{
848 const int nmemb = ARRAY_SIZE(syscall_fmts);
849 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
850}
851
6a648b53
ACM
852/*
853 * is_exit: is this "exit" or "exit_group"?
854 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
7a983a0f 855 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
6a648b53 856 */
514f1c67
ACM
857struct syscall {
858 struct event_format *tp_format;
f208bd8d 859 int nr_args;
7a983a0f 860 int args_size;
6a648b53
ACM
861 bool is_exit;
862 bool is_open;
f208bd8d 863 struct format_field *args;
514f1c67
ACM
864 const char *name;
865 struct syscall_fmt *fmt;
82d4a110 866 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
867};
868
fd2b2975
ACM
869/*
870 * We need to have this 'calculated' boolean because in some cases we really
871 * don't know what is the duration of a syscall, for instance, when we start
872 * a session and some threads are waiting for a syscall to finish, say 'poll',
873 * in which case all we can do is to print "( ? ) for duration and for the
874 * start timestamp.
875 */
876static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
877{
878 double duration = (double)t / NSEC_PER_MSEC;
879 size_t printed = fprintf(fp, "(");
880
fd2b2975 881 if (!calculated)
522283fe 882 printed += fprintf(fp, " ");
fd2b2975 883 else if (duration >= 1.0)
60c907ab
ACM
884 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
885 else if (duration >= 0.01)
886 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
887 else
888 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 889 return printed + fprintf(fp, "): ");
60c907ab
ACM
890}
891
f994592d
ACM
892/**
893 * filename.ptr: The filename char pointer that will be vfs_getname'd
894 * filename.entry_str_pos: Where to insert the string translated from
895 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
896 * ret_scnprintf: syscall args may set this to a different syscall return
897 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 898 */
752fde44
ACM
899struct thread_trace {
900 u64 entry_time;
752fde44 901 bool entry_pending;
efd5745e 902 unsigned long nr_events;
a2ea67d7 903 unsigned long pfmaj, pfmin;
752fde44 904 char *entry_str;
1302d88e 905 double runtime_ms;
7ee57434 906 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
907 struct {
908 unsigned long ptr;
7f4f8001
ACM
909 short int entry_str_pos;
910 bool pending_open;
911 unsigned int namelen;
912 char *name;
f994592d 913 } filename;
75b757ca
ACM
914 struct {
915 int max;
916 char **table;
917 } paths;
bf2575c1
DA
918
919 struct intlist *syscall_stats;
752fde44
ACM
920};
921
922static struct thread_trace *thread_trace__new(void)
923{
75b757ca
ACM
924 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
925
926 if (ttrace)
927 ttrace->paths.max = -1;
928
bf2575c1
DA
929 ttrace->syscall_stats = intlist__new(NULL);
930
75b757ca 931 return ttrace;
752fde44
ACM
932}
933
c24ff998 934static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 935{
efd5745e
ACM
936 struct thread_trace *ttrace;
937
752fde44
ACM
938 if (thread == NULL)
939 goto fail;
940
89dceb22
NK
941 if (thread__priv(thread) == NULL)
942 thread__set_priv(thread, thread_trace__new());
48000a1a 943
89dceb22 944 if (thread__priv(thread) == NULL)
752fde44
ACM
945 goto fail;
946
89dceb22 947 ttrace = thread__priv(thread);
efd5745e
ACM
948 ++ttrace->nr_events;
949
950 return ttrace;
752fde44 951fail:
c24ff998 952 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
953 "WARNING: not enough memory, dropping samples!\n");
954 return NULL;
955}
956
84486caa
ACM
957
958void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 959 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
960{
961 struct thread_trace *ttrace = thread__priv(arg->thread);
962
963 ttrace->ret_scnprintf = ret_scnprintf;
964}
965
598d02c5
SF
966#define TRACE_PFMAJ (1 << 0)
967#define TRACE_PFMIN (1 << 1)
968
e4d44e83
ACM
969static const size_t trace__entry_str_size = 2048;
970
97119f37 971static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 972{
89dceb22 973 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
974
975 if (fd > ttrace->paths.max) {
976 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
977
978 if (npath == NULL)
979 return -1;
980
981 if (ttrace->paths.max != -1) {
982 memset(npath + ttrace->paths.max + 1, 0,
983 (fd - ttrace->paths.max) * sizeof(char *));
984 } else {
985 memset(npath, 0, (fd + 1) * sizeof(char *));
986 }
987
988 ttrace->paths.table = npath;
989 ttrace->paths.max = fd;
990 }
991
992 ttrace->paths.table[fd] = strdup(pathname);
993
994 return ttrace->paths.table[fd] != NULL ? 0 : -1;
995}
996
97119f37
ACM
997static int thread__read_fd_path(struct thread *thread, int fd)
998{
999 char linkname[PATH_MAX], pathname[PATH_MAX];
1000 struct stat st;
1001 int ret;
1002
1003 if (thread->pid_ == thread->tid) {
1004 scnprintf(linkname, sizeof(linkname),
1005 "/proc/%d/fd/%d", thread->pid_, fd);
1006 } else {
1007 scnprintf(linkname, sizeof(linkname),
1008 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1009 }
1010
1011 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1012 return -1;
1013
1014 ret = readlink(linkname, pathname, sizeof(pathname));
1015
1016 if (ret < 0 || ret > st.st_size)
1017 return -1;
1018
1019 pathname[ret] = '\0';
1020 return trace__set_fd_pathname(thread, fd, pathname);
1021}
1022
c522739d
ACM
1023static const char *thread__fd_path(struct thread *thread, int fd,
1024 struct trace *trace)
75b757ca 1025{
89dceb22 1026 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
1027
1028 if (ttrace == NULL)
1029 return NULL;
1030
1031 if (fd < 0)
1032 return NULL;
1033
cdcd1e6b 1034 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
1035 if (!trace->live)
1036 return NULL;
1037 ++trace->stats.proc_getname;
cdcd1e6b 1038 if (thread__read_fd_path(thread, fd))
c522739d
ACM
1039 return NULL;
1040 }
75b757ca
ACM
1041
1042 return ttrace->paths.table[fd];
1043}
1044
fc65eb82 1045size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1046{
1047 int fd = arg->val;
1048 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1049 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1050
1051 if (path)
1052 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1053
1054 return printed;
1055}
1056
0a2f7540
ACM
1057size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1058{
1059 size_t printed = scnprintf(bf, size, "%d", fd);
1060 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1061
1062 if (thread) {
1063 const char *path = thread__fd_path(thread, fd, trace);
1064
1065 if (path)
1066 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1067
1068 thread__put(thread);
1069 }
1070
1071 return printed;
1072}
1073
75b757ca
ACM
1074static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1075 struct syscall_arg *arg)
1076{
1077 int fd = arg->val;
1078 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1079 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1080
04662523
ACM
1081 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1082 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1083
1084 return printed;
1085}
1086
f994592d
ACM
1087static void thread__set_filename_pos(struct thread *thread, const char *bf,
1088 unsigned long ptr)
1089{
1090 struct thread_trace *ttrace = thread__priv(thread);
1091
1092 ttrace->filename.ptr = ptr;
1093 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1094}
1095
75d1e306
ACM
1096static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1097{
1098 struct augmented_arg *augmented_arg = arg->augmented.args;
1099
1100 return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value);
1101}
1102
f994592d
ACM
1103static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1104 struct syscall_arg *arg)
1105{
1106 unsigned long ptr = arg->val;
1107
75d1e306
ACM
1108 if (arg->augmented.args)
1109 return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1110
f994592d
ACM
1111 if (!arg->trace->vfs_getname)
1112 return scnprintf(bf, size, "%#x", ptr);
1113
1114 thread__set_filename_pos(arg->thread, bf, ptr);
1115 return 0;
1116}
1117
ae9ed035
ACM
1118static bool trace__filter_duration(struct trace *trace, double t)
1119{
1120 return t < (trace->duration_filter * NSEC_PER_MSEC);
1121}
1122
fd2b2975 1123static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1124{
1125 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1126
60c907ab 1127 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1128}
1129
fd2b2975
ACM
1130/*
1131 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1132 * using ttrace->entry_time for a thread that receives a sys_exit without
1133 * first having received a sys_enter ("poll" issued before tracing session
1134 * starts, lost sys_enter exit due to ring buffer overflow).
1135 */
1136static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1137{
1138 if (tstamp > 0)
1139 return __trace__fprintf_tstamp(trace, tstamp, fp);
1140
1141 return fprintf(fp, " ? ");
1142}
1143
f15eb531 1144static bool done = false;
ba209f85 1145static bool interrupted = false;
f15eb531 1146
ba209f85 1147static void sig_handler(int sig)
f15eb531
NK
1148{
1149 done = true;
ba209f85 1150 interrupted = sig == SIGINT;
f15eb531
NK
1151}
1152
6dcbd212 1153static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
752fde44 1154{
6dcbd212 1155 size_t printed = 0;
752fde44 1156
50c95cbd
ACM
1157 if (trace->multiple_threads) {
1158 if (trace->show_comm)
1902efe7 1159 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1160 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1161 }
752fde44
ACM
1162
1163 return printed;
1164}
1165
6dcbd212
ACM
1166static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1167 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1168{
1169 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1170 printed += fprintf_duration(duration, duration_calculated, fp);
1171 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1172}
1173
c24ff998 1174static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1175 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1176{
1177 int ret = 0;
1178
1179 switch (event->header.type) {
1180 case PERF_RECORD_LOST:
c24ff998 1181 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1182 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1183 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1184 break;
752fde44 1185 default:
162f0bef 1186 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1187 break;
1188 }
1189
1190 return ret;
1191}
1192
c24ff998 1193static int trace__tool_process(struct perf_tool *tool,
752fde44 1194 union perf_event *event,
162f0bef 1195 struct perf_sample *sample,
752fde44
ACM
1196 struct machine *machine)
1197{
c24ff998 1198 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1199 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1200}
1201
caf8a0d0
ACM
1202static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1203{
1204 struct machine *machine = vmachine;
1205
1206 if (machine->kptr_restrict_warned)
1207 return NULL;
1208
1209 if (symbol_conf.kptr_restrict) {
1210 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1211 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1212 "Kernel samples will not be resolved.\n");
1213 machine->kptr_restrict_warned = true;
1214 return NULL;
1215 }
1216
1217 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1218}
1219
752fde44
ACM
1220static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1221{
0a7e6d1b 1222 int err = symbol__init(NULL);
752fde44
ACM
1223
1224 if (err)
1225 return err;
1226
8fb598e5
DA
1227 trace->host = machine__new_host();
1228 if (trace->host == NULL)
1229 return -ENOMEM;
752fde44 1230
cbd5c178
AV
1231 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1232 if (err < 0)
1233 goto out;
706c3da4 1234
a33fbd56 1235 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76 1236 evlist->threads, trace__tool_process, false,
340b47f5 1237 trace->opts.proc_map_timeout, 1);
cbd5c178 1238out:
752fde44
ACM
1239 if (err)
1240 symbol__exit();
1241
1242 return err;
1243}
1244
33974a41
AV
1245static void trace__symbols__exit(struct trace *trace)
1246{
1247 machine__exit(trace->host);
1248 trace->host = NULL;
1249
1250 symbol__exit();
1251}
1252
5e58fcfa 1253static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
13d4ff3e 1254{
5e58fcfa 1255 int idx;
13d4ff3e 1256
332337da
ACM
1257 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1258 nr_args = sc->fmt->nr_args;
1259
5e58fcfa 1260 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
82d4a110 1261 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1262 return -1;
1263
5e58fcfa
ACM
1264 for (idx = 0; idx < nr_args; ++idx) {
1265 if (sc->fmt)
82d4a110 1266 sc->arg_fmt[idx] = sc->fmt->arg[idx];
5e58fcfa 1267 }
82d4a110 1268
5e58fcfa
ACM
1269 sc->nr_args = nr_args;
1270 return 0;
1271}
1272
1273static int syscall__set_arg_fmts(struct syscall *sc)
1274{
7a983a0f 1275 struct format_field *field, *last_field = NULL;
5e58fcfa
ACM
1276 int idx = 0, len;
1277
1278 for (field = sc->args; field; field = field->next, ++idx) {
7a983a0f
ACM
1279 last_field = field;
1280
5e58fcfa
ACM
1281 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1282 continue;
1f115cb7 1283
82d4a110 1284 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1285 (strcmp(field->name, "filename") == 0 ||
1286 strcmp(field->name, "path") == 0 ||
1287 strcmp(field->name, "pathname") == 0))
82d4a110 1288 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1289 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1290 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1291 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1292 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1293 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1294 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1295 else if ((strcmp(field->type, "int") == 0 ||
1296 strcmp(field->type, "unsigned int") == 0 ||
1297 strcmp(field->type, "long") == 0) &&
1298 (len = strlen(field->name)) >= 2 &&
1299 strcmp(field->name + len - 2, "fd") == 0) {
1300 /*
1301 * /sys/kernel/tracing/events/syscalls/sys_enter*
1302 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1303 * 65 int
1304 * 23 unsigned int
1305 * 7 unsigned long
1306 */
82d4a110 1307 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1308 }
13d4ff3e
ACM
1309 }
1310
7a983a0f
ACM
1311 if (last_field)
1312 sc->args_size = last_field->offset + last_field->size;
1313
13d4ff3e
ACM
1314 return 0;
1315}
1316
514f1c67
ACM
1317static int trace__read_syscall_info(struct trace *trace, int id)
1318{
1319 char tp_name[128];
1320 struct syscall *sc;
fd0db102 1321 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1322
1323 if (name == NULL)
1324 return -1;
514f1c67
ACM
1325
1326 if (id > trace->syscalls.max) {
1327 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1328
1329 if (nsyscalls == NULL)
1330 return -1;
1331
1332 if (trace->syscalls.max != -1) {
1333 memset(nsyscalls + trace->syscalls.max + 1, 0,
1334 (id - trace->syscalls.max) * sizeof(*sc));
1335 } else {
1336 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1337 }
1338
1339 trace->syscalls.table = nsyscalls;
1340 trace->syscalls.max = id;
1341 }
1342
1343 sc = trace->syscalls.table + id;
3a531260 1344 sc->name = name;
2ae3a312 1345
3a531260 1346 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1347
aec1930b 1348 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1349 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1350
8dd2a131 1351 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1352 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1353 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1354 }
514f1c67 1355
5e58fcfa
ACM
1356 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1357 return -1;
1358
8dd2a131 1359 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1360 return -1;
1361
f208bd8d 1362 sc->args = sc->tp_format->format.fields;
c42de706
TS
1363 /*
1364 * We need to check and discard the first variable '__syscall_nr'
1365 * or 'nr' that mean the syscall number. It is needless here.
1366 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1367 */
1368 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1369 sc->args = sc->args->next;
1370 --sc->nr_args;
1371 }
1372
5089f20e 1373 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
6a648b53 1374 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
5089f20e 1375
13d4ff3e 1376 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1377}
1378
d0cc439b
ACM
1379static int trace__validate_ev_qualifier(struct trace *trace)
1380{
8b3ce757 1381 int err = 0, i;
27702bcf 1382 size_t nr_allocated;
d0cc439b
ACM
1383 struct str_node *pos;
1384
8b3ce757
ACM
1385 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1386 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1387 sizeof(trace->ev_qualifier_ids.entries[0]));
1388
1389 if (trace->ev_qualifier_ids.entries == NULL) {
1390 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1391 trace->output);
1392 err = -EINVAL;
1393 goto out;
1394 }
1395
27702bcf 1396 nr_allocated = trace->ev_qualifier_ids.nr;
8b3ce757
ACM
1397 i = 0;
1398
602a1f4d 1399 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1400 const char *sc = pos->s;
27702bcf 1401 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
d0cc439b 1402
8b3ce757 1403 if (id < 0) {
27702bcf
ACM
1404 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1405 if (id >= 0)
1406 goto matches;
1407
d0cc439b
ACM
1408 if (err == 0) {
1409 fputs("Error:\tInvalid syscall ", trace->output);
1410 err = -EINVAL;
1411 } else {
1412 fputs(", ", trace->output);
1413 }
1414
1415 fputs(sc, trace->output);
1416 }
27702bcf 1417matches:
8b3ce757 1418 trace->ev_qualifier_ids.entries[i++] = id;
27702bcf
ACM
1419 if (match_next == -1)
1420 continue;
1421
1422 while (1) {
1423 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1424 if (id < 0)
1425 break;
1426 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1427 void *entries;
1428
1429 nr_allocated += 8;
1430 entries = realloc(trace->ev_qualifier_ids.entries,
1431 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1432 if (entries == NULL) {
1433 err = -ENOMEM;
1434 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1435 goto out_free;
1436 }
1437 trace->ev_qualifier_ids.entries = entries;
1438 }
1439 trace->ev_qualifier_ids.nr++;
1440 trace->ev_qualifier_ids.entries[i++] = id;
1441 }
d0cc439b
ACM
1442 }
1443
1444 if (err < 0) {
1445 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1446 "\nHint:\tand: 'man syscalls'\n", trace->output);
27702bcf 1447out_free:
8b3ce757
ACM
1448 zfree(&trace->ev_qualifier_ids.entries);
1449 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1450 }
8b3ce757 1451out:
d0cc439b
ACM
1452 return err;
1453}
1454
55d43bca
DA
1455/*
1456 * args is to be interpreted as a series of longs but we need to handle
1457 * 8-byte unaligned accesses. args points to raw_data within the event
1458 * and raw_data is guaranteed to be 8-byte unaligned because it is
1459 * preceded by raw_size which is a u32. So we need to copy args to a temp
1460 * variable to read it. Most notably this avoids extended load instructions
1461 * on unaligned addresses
1462 */
325f5091 1463unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
f9f83b33
ACM
1464{
1465 unsigned long val;
325f5091 1466 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
f9f83b33
ACM
1467
1468 memcpy(&val, p, sizeof(val));
1469 return val;
1470}
1471
c51bdfec
ACM
1472static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1473 struct syscall_arg *arg)
1474{
1475 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1476 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1477
1478 return scnprintf(bf, size, "arg%d: ", arg->idx);
1479}
1480
d032d79e
ACM
1481static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1482 struct syscall_arg *arg, unsigned long val)
1483{
1484 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1485 arg->val = val;
1486 if (sc->arg_fmt[arg->idx].parm)
1487 arg->parm = sc->arg_fmt[arg->idx].parm;
1488 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1489 }
1490 return scnprintf(bf, size, "%ld", val);
1491}
1492
752fde44 1493static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
7a983a0f
ACM
1494 unsigned char *args, void *augmented_args, int augmented_args_size,
1495 struct trace *trace, struct thread *thread)
514f1c67 1496{
514f1c67 1497 size_t printed = 0;
55d43bca 1498 unsigned long val;
d032d79e
ACM
1499 u8 bit = 1;
1500 struct syscall_arg arg = {
1501 .args = args,
7a983a0f
ACM
1502 .augmented = {
1503 .size = augmented_args_size,
1504 .args = augmented_args,
1505 },
d032d79e
ACM
1506 .idx = 0,
1507 .mask = 0,
1508 .trace = trace,
1509 .thread = thread,
1510 };
84486caa
ACM
1511 struct thread_trace *ttrace = thread__priv(thread);
1512
1513 /*
1514 * Things like fcntl will set this in its 'cmd' formatter to pick the
1515 * right formatter for the return value (an fd? file flags?), which is
1516 * not needed for syscalls that always return a given type, say an fd.
1517 */
1518 ttrace->ret_scnprintf = NULL;
514f1c67 1519
f208bd8d 1520 if (sc->args != NULL) {
514f1c67 1521 struct format_field *field;
6e7eeb51 1522
f208bd8d 1523 for (field = sc->args; field;
01533e97
ACM
1524 field = field->next, ++arg.idx, bit <<= 1) {
1525 if (arg.mask & bit)
6e7eeb51 1526 continue;
55d43bca 1527
f9f83b33 1528 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1529
4aa58232
ACM
1530 /*
1531 * Suppress this argument if its value is zero and
1532 * and we don't have a string associated in an
1533 * strarray for it.
1534 */
55d43bca 1535 if (val == 0 &&
82d4a110 1536 !(sc->arg_fmt &&
d47737d5
ACM
1537 (sc->arg_fmt[arg.idx].show_zero ||
1538 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1539 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1540 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1541 continue;
1542
752fde44 1543 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1544 "%s%s: ", printed ? ", " : "", field->name);
d032d79e 1545 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
514f1c67 1546 }
4c4d6e51
ACM
1547 } else if (IS_ERR(sc->tp_format)) {
1548 /*
1549 * If we managed to read the tracepoint /format file, then we
1550 * may end up not having any args, like with gettid(), so only
1551 * print the raw args when we didn't manage to read it.
1552 */
332337da 1553 while (arg.idx < sc->nr_args) {
d032d79e
ACM
1554 if (arg.mask & bit)
1555 goto next_arg;
1556 val = syscall_arg__val(&arg, arg.idx);
c51bdfec
ACM
1557 if (printed)
1558 printed += scnprintf(bf + printed, size - printed, ", ");
1559 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
d032d79e
ACM
1560 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1561next_arg:
1562 ++arg.idx;
1563 bit <<= 1;
514f1c67
ACM
1564 }
1565 }
1566
1567 return printed;
1568}
1569
ba3d7dee 1570typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1571 union perf_event *event,
ba3d7dee
ACM
1572 struct perf_sample *sample);
1573
1574static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1575 struct perf_evsel *evsel, int id)
ba3d7dee 1576{
ba3d7dee
ACM
1577
1578 if (id < 0) {
adaa18bf
ACM
1579
1580 /*
1581 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1582 * before that, leaving at a higher verbosity level till that is
1583 * explained. Reproduced with plain ftrace with:
1584 *
1585 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1586 * grep "NR -1 " /t/trace_pipe
1587 *
1588 * After generating some load on the machine.
1589 */
1590 if (verbose > 1) {
1591 static u64 n;
1592 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1593 id, perf_evsel__name(evsel), ++n);
1594 }
ba3d7dee
ACM
1595 return NULL;
1596 }
1597
1598 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1599 trace__read_syscall_info(trace, id))
1600 goto out_cant_read;
1601
1602 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1603 goto out_cant_read;
1604
1605 return &trace->syscalls.table[id];
1606
1607out_cant_read:
bb963e16 1608 if (verbose > 0) {
7c304ee0
ACM
1609 fprintf(trace->output, "Problems reading syscall %d", id);
1610 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1611 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1612 fputs(" information\n", trace->output);
1613 }
ba3d7dee
ACM
1614 return NULL;
1615}
1616
bf2575c1
DA
1617static void thread__update_stats(struct thread_trace *ttrace,
1618 int id, struct perf_sample *sample)
1619{
1620 struct int_node *inode;
1621 struct stats *stats;
1622 u64 duration = 0;
1623
1624 inode = intlist__findnew(ttrace->syscall_stats, id);
1625 if (inode == NULL)
1626 return;
1627
1628 stats = inode->priv;
1629 if (stats == NULL) {
1630 stats = malloc(sizeof(struct stats));
1631 if (stats == NULL)
1632 return;
1633 init_stats(stats);
1634 inode->priv = stats;
1635 }
1636
1637 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1638 duration = sample->time - ttrace->entry_time;
1639
1640 update_stats(stats, duration);
1641}
1642
522283fe 1643static int trace__printf_interrupted_entry(struct trace *trace)
e596663e
ACM
1644{
1645 struct thread_trace *ttrace;
e596663e
ACM
1646 size_t printed;
1647
0a6545bd 1648 if (trace->failure_only || trace->current == NULL)
e596663e
ACM
1649 return 0;
1650
1651 ttrace = thread__priv(trace->current);
1652
1653 if (!ttrace->entry_pending)
1654 return 0;
1655
522283fe 1656 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
e596663e
ACM
1657 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1658 ttrace->entry_pending = false;
1659
1660 return printed;
1661}
1662
591421e1
ACM
1663static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1664 struct perf_sample *sample, struct thread *thread)
1665{
1666 int printed = 0;
1667
1668 if (trace->print_sample) {
1669 double ts = (double)sample->time / NSEC_PER_MSEC;
1670
1671 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1672 perf_evsel__name(evsel), ts,
1673 thread__comm_str(thread),
1674 sample->pid, sample->tid, sample->cpu);
1675 }
1676
1677 return printed;
1678}
1679
8a041f86
ACM
1680static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size)
1681{
1682 void *augmented_args = NULL;
1683
1684 *augmented_args_size = sample->raw_size - sc->args_size;
1685 if (*augmented_args_size > 0)
1686 augmented_args = sample->raw_data + sc->args_size;
1687
1688 return augmented_args;
1689}
1690
ba3d7dee 1691static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1692 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1693 struct perf_sample *sample)
1694{
752fde44 1695 char *msg;
ba3d7dee 1696 void *args;
752fde44 1697 size_t printed = 0;
2ae3a312 1698 struct thread *thread;
b91fc39f 1699 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1700 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1701 struct thread_trace *ttrace;
1702
1703 if (sc == NULL)
1704 return -1;
ba3d7dee 1705
8fb598e5 1706 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1707 ttrace = thread__trace(thread, trace->output);
2ae3a312 1708 if (ttrace == NULL)
b91fc39f 1709 goto out_put;
ba3d7dee 1710
591421e1
ACM
1711 trace__fprintf_sample(trace, evsel, sample, thread);
1712
77170988 1713 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1714
1715 if (ttrace->entry_str == NULL) {
e4d44e83 1716 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1717 if (!ttrace->entry_str)
b91fc39f 1718 goto out_put;
752fde44
ACM
1719 }
1720
5cf9c84e 1721 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
522283fe 1722 trace__printf_interrupted_entry(trace);
e596663e 1723
752fde44
ACM
1724 ttrace->entry_time = sample->time;
1725 msg = ttrace->entry_str;
e4d44e83 1726 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1727
e4d44e83 1728 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
7a983a0f 1729 args, NULL, 0, trace, thread);
752fde44 1730
5089f20e 1731 if (sc->is_exit) {
0a6545bd 1732 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
fd2b2975 1733 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1734 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1735 }
7f4f8001 1736 } else {
752fde44 1737 ttrace->entry_pending = true;
7f4f8001
ACM
1738 /* See trace__vfs_getname & trace__sys_exit */
1739 ttrace->filename.pending_open = false;
1740 }
ba3d7dee 1741
f3b623b8
ACM
1742 if (trace->current != thread) {
1743 thread__put(trace->current);
1744 trace->current = thread__get(thread);
1745 }
b91fc39f
ACM
1746 err = 0;
1747out_put:
1748 thread__put(thread);
1749 return err;
ba3d7dee
ACM
1750}
1751
a98392bb
ACM
1752static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
1753 struct perf_sample *sample)
1754{
a98392bb
ACM
1755 struct thread_trace *ttrace;
1756 struct thread *thread;
f3acd886
ACM
1757 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1758 struct syscall *sc = trace__syscall_info(trace, evsel, id);
a98392bb 1759 char msg[1024];
7a983a0f
ACM
1760 void *args, *augmented_args = NULL;
1761 int augmented_args_size;
a98392bb 1762
a98392bb
ACM
1763 if (sc == NULL)
1764 return -1;
1765
1766 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1767 ttrace = thread__trace(thread, trace->output);
1768 /*
1769 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
1770 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
1771 */
1772 if (ttrace == NULL)
1773 goto out_put;
1774
f3acd886 1775 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
8a041f86 1776 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
7a983a0f 1777 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
a98392bb
ACM
1778 fprintf(trace->output, "%s", msg);
1779 err = 0;
1780out_put:
1781 thread__put(thread);
1782 return err;
1783}
1784
5cf9c84e
ACM
1785static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1786 struct perf_sample *sample,
1787 struct callchain_cursor *cursor)
202ff968
ACM
1788{
1789 struct addr_location al;
3a9e9a47
RB
1790 int max_stack = evsel->attr.sample_max_stack ?
1791 evsel->attr.sample_max_stack :
1792 trace->max_stack;
5cf9c84e
ACM
1793
1794 if (machine__resolve(trace->host, &al, sample) < 0 ||
3a9e9a47 1795 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
5cf9c84e
ACM
1796 return -1;
1797
1798 return 0;
1799}
1800
1801static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1802{
202ff968 1803 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1804 const unsigned int print_opts = EVSEL__PRINT_SYM |
1805 EVSEL__PRINT_DSO |
1806 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1807
d327e60c 1808 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1809}
1810
092bd3cd
HB
1811static const char *errno_to_name(struct perf_evsel *evsel, int err)
1812{
1813 struct perf_env *env = perf_evsel__env(evsel);
1814 const char *arch_name = perf_env__arch(env);
1815
1816 return arch_syscalls__strerrno(arch_name, err);
1817}
1818
ba3d7dee 1819static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1820 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1821 struct perf_sample *sample)
1822{
2c82c3ad 1823 long ret;
60c907ab 1824 u64 duration = 0;
fd2b2975 1825 bool duration_calculated = false;
2ae3a312 1826 struct thread *thread;
5cf9c84e 1827 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1828 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1829 struct thread_trace *ttrace;
1830
1831 if (sc == NULL)
1832 return -1;
ba3d7dee 1833
8fb598e5 1834 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1835 ttrace = thread__trace(thread, trace->output);
2ae3a312 1836 if (ttrace == NULL)
b91fc39f 1837 goto out_put;
ba3d7dee 1838
591421e1
ACM
1839 trace__fprintf_sample(trace, evsel, sample, thread);
1840
bf2575c1
DA
1841 if (trace->summary)
1842 thread__update_stats(ttrace, id, sample);
1843
77170988 1844 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1845
6a648b53 1846 if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1847 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1848 ttrace->filename.pending_open = false;
c522739d
ACM
1849 ++trace->stats.vfs_getname;
1850 }
1851
ae9ed035 1852 if (ttrace->entry_time) {
60c907ab 1853 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1854 if (trace__filter_duration(trace, duration))
1855 goto out;
fd2b2975 1856 duration_calculated = true;
ae9ed035
ACM
1857 } else if (trace->duration_filter)
1858 goto out;
60c907ab 1859
5cf9c84e
ACM
1860 if (sample->callchain) {
1861 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1862 if (callchain_ret == 0) {
1863 if (callchain_cursor.nr < trace->min_stack)
1864 goto out;
1865 callchain_ret = 1;
1866 }
1867 }
1868
0a6545bd 1869 if (trace->summary_only || (ret >= 0 && trace->failure_only))
fd2eabaf
DA
1870 goto out;
1871
fd2b2975 1872 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1873
1874 if (ttrace->entry_pending) {
c24ff998 1875 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1876 } else {
c24ff998
ACM
1877 fprintf(trace->output, " ... [");
1878 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1879 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1880 }
1881
da3c9a44 1882 if (sc->fmt == NULL) {
1f63139c
ACM
1883 if (ret < 0)
1884 goto errno_print;
da3c9a44 1885signed_print:
6f8fe61e 1886 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1887 } else if (ret < 0) {
1888errno_print: {
942a91ed 1889 char bf[STRERR_BUFSIZE];
c8b5f2c9 1890 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
092bd3cd 1891 *e = errno_to_name(evsel, -ret);
ba3d7dee 1892
c24ff998 1893 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1894 }
da3c9a44 1895 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1896 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1897 else if (ttrace->ret_scnprintf) {
1898 char bf[1024];
7ee57434
ACM
1899 struct syscall_arg arg = {
1900 .val = ret,
1901 .thread = thread,
1902 .trace = trace,
1903 };
1904 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1905 ttrace->ret_scnprintf = NULL;
1906 fprintf(trace->output, ") = %s", bf);
1907 } else if (sc->fmt->hexret)
2c82c3ad 1908 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1909 else if (sc->fmt->errpid) {
1910 struct thread *child = machine__find_thread(trace->host, ret, ret);
1911
1912 if (child != NULL) {
1913 fprintf(trace->output, ") = %ld", ret);
1914 if (child->comm_set)
1915 fprintf(trace->output, " (%s)", thread__comm_str(child));
1916 thread__put(child);
1917 }
1918 } else
da3c9a44 1919 goto signed_print;
ba3d7dee 1920
c24ff998 1921 fputc('\n', trace->output);
566a0885 1922
5cf9c84e
ACM
1923 if (callchain_ret > 0)
1924 trace__fprintf_callchain(trace, sample);
1925 else if (callchain_ret < 0)
1926 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1927out:
752fde44 1928 ttrace->entry_pending = false;
b91fc39f
ACM
1929 err = 0;
1930out_put:
1931 thread__put(thread);
1932 return err;
ba3d7dee
ACM
1933}
1934
c522739d 1935static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1936 union perf_event *event __maybe_unused,
c522739d
ACM
1937 struct perf_sample *sample)
1938{
f994592d
ACM
1939 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1940 struct thread_trace *ttrace;
1941 size_t filename_len, entry_str_len, to_move;
1942 ssize_t remaining_space;
1943 char *pos;
7f4f8001 1944 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1945
1946 if (!thread)
1947 goto out;
1948
1949 ttrace = thread__priv(thread);
1950 if (!ttrace)
ef65e96e 1951 goto out_put;
f994592d 1952
7f4f8001 1953 filename_len = strlen(filename);
39f0e7a8 1954 if (filename_len == 0)
ef65e96e 1955 goto out_put;
7f4f8001
ACM
1956
1957 if (ttrace->filename.namelen < filename_len) {
1958 char *f = realloc(ttrace->filename.name, filename_len + 1);
1959
1960 if (f == NULL)
ef65e96e 1961 goto out_put;
7f4f8001
ACM
1962
1963 ttrace->filename.namelen = filename_len;
1964 ttrace->filename.name = f;
1965 }
1966
1967 strcpy(ttrace->filename.name, filename);
1968 ttrace->filename.pending_open = true;
1969
f994592d 1970 if (!ttrace->filename.ptr)
ef65e96e 1971 goto out_put;
f994592d
ACM
1972
1973 entry_str_len = strlen(ttrace->entry_str);
1974 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1975 if (remaining_space <= 0)
ef65e96e 1976 goto out_put;
f994592d 1977
f994592d
ACM
1978 if (filename_len > (size_t)remaining_space) {
1979 filename += filename_len - remaining_space;
1980 filename_len = remaining_space;
1981 }
1982
1983 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1984 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1985 memmove(pos + filename_len, pos, to_move);
1986 memcpy(pos, filename, filename_len);
1987
1988 ttrace->filename.ptr = 0;
1989 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1990out_put:
1991 thread__put(thread);
f994592d 1992out:
c522739d
ACM
1993 return 0;
1994}
1995
1302d88e 1996static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1997 union perf_event *event __maybe_unused,
1302d88e
ACM
1998 struct perf_sample *sample)
1999{
2000 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2001 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 2002 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
2003 sample->pid,
2004 sample->tid);
c24ff998 2005 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
2006
2007 if (ttrace == NULL)
2008 goto out_dump;
2009
2010 ttrace->runtime_ms += runtime_ms;
2011 trace->runtime_ms += runtime_ms;
ef65e96e 2012out_put:
b91fc39f 2013 thread__put(thread);
1302d88e
ACM
2014 return 0;
2015
2016out_dump:
c24ff998 2017 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
2018 evsel->name,
2019 perf_evsel__strval(evsel, sample, "comm"),
2020 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2021 runtime,
2022 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 2023 goto out_put;
1302d88e
ACM
2024}
2025
923d0c9a
ACM
2026static int bpf_output__printer(enum binary_printer_ops op,
2027 unsigned int val, void *extra __maybe_unused, FILE *fp)
1d6c9407 2028{
1d6c9407
WN
2029 unsigned char ch = (unsigned char)val;
2030
2031 switch (op) {
2032 case BINARY_PRINT_CHAR_DATA:
923d0c9a 2033 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1d6c9407
WN
2034 case BINARY_PRINT_DATA_BEGIN:
2035 case BINARY_PRINT_LINE_BEGIN:
2036 case BINARY_PRINT_ADDR:
2037 case BINARY_PRINT_NUM_DATA:
2038 case BINARY_PRINT_NUM_PAD:
2039 case BINARY_PRINT_SEP:
2040 case BINARY_PRINT_CHAR_PAD:
2041 case BINARY_PRINT_LINE_END:
2042 case BINARY_PRINT_DATA_END:
2043 default:
2044 break;
2045 }
923d0c9a
ACM
2046
2047 return 0;
1d6c9407
WN
2048}
2049
2050static void bpf_output__fprintf(struct trace *trace,
2051 struct perf_sample *sample)
2052{
923d0c9a
ACM
2053 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2054 bpf_output__printer, NULL, trace->output);
1d6c9407
WN
2055}
2056
14a052df
ACM
2057static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2058 union perf_event *event __maybe_unused,
2059 struct perf_sample *sample)
2060{
c4191e55 2061 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
7ad35615
ACM
2062 int callchain_ret = 0;
2063
2064 if (sample->callchain) {
2065 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2066 if (callchain_ret == 0) {
2067 if (callchain_cursor.nr < trace->min_stack)
2068 goto out;
2069 callchain_ret = 1;
2070 }
2071 }
2072
522283fe 2073 trace__printf_interrupted_entry(trace);
14a052df 2074 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
2075
2076 if (trace->trace_syscalls)
2077 fprintf(trace->output, "( ): ");
2078
c4191e55
ACM
2079 if (thread)
2080 trace__fprintf_comm_tid(trace, thread, trace->output);
2081
1cdf618f
ACM
2082 if (evsel == trace->syscalls.events.augmented) {
2083 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2084 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2085
2086 if (sc) {
c4191e55
ACM
2087 fprintf(trace->output, "%s(", sc->name);
2088 trace__fprintf_sys_enter(trace, evsel, sample);
2089 fputc(')', trace->output);
2090 goto newline;
1cdf618f
ACM
2091 }
2092
2093 /*
2094 * XXX: Not having the associated syscall info or not finding/adding
2095 * the thread should never happen, but if it does...
2096 * fall thru and print it as a bpf_output event.
2097 */
2098 }
2099
0808921a 2100 fprintf(trace->output, "%s:", evsel->name);
14a052df 2101
1d6c9407 2102 if (perf_evsel__is_bpf_output(evsel)) {
1cdf618f 2103 bpf_output__fprintf(trace, sample);
1d6c9407 2104 } else if (evsel->tp_format) {
a98392bb
ACM
2105 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2106 trace__fprintf_sys_enter(trace, evsel, sample)) {
2107 event_format__fprintf(evsel->tp_format, sample->cpu,
2108 sample->raw_data, sample->raw_size,
2109 trace->output);
2110 }
14a052df
ACM
2111 }
2112
1cdf618f 2113newline:
51125a29 2114 fprintf(trace->output, "\n");
202ff968 2115
7ad35615
ACM
2116 if (callchain_ret > 0)
2117 trace__fprintf_callchain(trace, sample);
2118 else if (callchain_ret < 0)
2119 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
c4191e55 2120 thread__put(thread);
7ad35615 2121out:
14a052df
ACM
2122 return 0;
2123}
2124
598d02c5
SF
2125static void print_location(FILE *f, struct perf_sample *sample,
2126 struct addr_location *al,
2127 bool print_dso, bool print_sym)
2128{
2129
bb963e16 2130 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
2131 fprintf(f, "%s@", al->map->dso->long_name);
2132
bb963e16 2133 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 2134 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
2135 al->addr - al->sym->start);
2136 else if (al->map)
4414a3c5 2137 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 2138 else
4414a3c5 2139 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
2140}
2141
2142static int trace__pgfault(struct trace *trace,
2143 struct perf_evsel *evsel,
473398a2 2144 union perf_event *event __maybe_unused,
598d02c5
SF
2145 struct perf_sample *sample)
2146{
2147 struct thread *thread;
598d02c5
SF
2148 struct addr_location al;
2149 char map_type = 'd';
a2ea67d7 2150 struct thread_trace *ttrace;
b91fc39f 2151 int err = -1;
1df54290 2152 int callchain_ret = 0;
598d02c5
SF
2153
2154 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
2155
2156 if (sample->callchain) {
2157 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2158 if (callchain_ret == 0) {
2159 if (callchain_cursor.nr < trace->min_stack)
2160 goto out_put;
2161 callchain_ret = 1;
2162 }
2163 }
2164
a2ea67d7
SF
2165 ttrace = thread__trace(thread, trace->output);
2166 if (ttrace == NULL)
b91fc39f 2167 goto out_put;
a2ea67d7
SF
2168
2169 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2170 ttrace->pfmaj++;
2171 else
2172 ttrace->pfmin++;
2173
2174 if (trace->summary_only)
b91fc39f 2175 goto out;
598d02c5 2176
4546263d 2177 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
598d02c5 2178
fd2b2975 2179 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
2180
2181 fprintf(trace->output, "%sfault [",
2182 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2183 "maj" : "min");
2184
2185 print_location(trace->output, sample, &al, false, true);
2186
2187 fprintf(trace->output, "] => ");
2188
117d3c24 2189 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2190
2191 if (!al.map) {
4546263d 2192 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2193
2194 if (al.map)
2195 map_type = 'x';
2196 else
2197 map_type = '?';
2198 }
2199
2200 print_location(trace->output, sample, &al, true, false);
2201
2202 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2203
1df54290
ACM
2204 if (callchain_ret > 0)
2205 trace__fprintf_callchain(trace, sample);
2206 else if (callchain_ret < 0)
2207 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2208out:
2209 err = 0;
2210out_put:
2211 thread__put(thread);
2212 return err;
598d02c5
SF
2213}
2214
e6001980 2215static void trace__set_base_time(struct trace *trace,
8a07a809 2216 struct perf_evsel *evsel,
e6001980
ACM
2217 struct perf_sample *sample)
2218{
8a07a809
ACM
2219 /*
2220 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2221 * and don't use sample->time unconditionally, we may end up having
2222 * some other event in the future without PERF_SAMPLE_TIME for good
2223 * reason, i.e. we may not be interested in its timestamps, just in
2224 * it taking place, picking some piece of information when it
2225 * appears in our event stream (vfs_getname comes to mind).
2226 */
2227 if (trace->base_time == 0 && !trace->full_time &&
2228 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2229 trace->base_time = sample->time;
2230}
2231
6810fc91 2232static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2233 union perf_event *event,
6810fc91
DA
2234 struct perf_sample *sample,
2235 struct perf_evsel *evsel,
2236 struct machine *machine __maybe_unused)
2237{
2238 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2239 struct thread *thread;
6810fc91
DA
2240 int err = 0;
2241
744a9719 2242 tracepoint_handler handler = evsel->handler;
6810fc91 2243
aa07df6e
DA
2244 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2245 if (thread && thread__is_filtered(thread))
ef65e96e 2246 goto out;
bdc89661 2247
e6001980 2248 trace__set_base_time(trace, evsel, sample);
6810fc91 2249
3160565f
DA
2250 if (handler) {
2251 ++trace->nr_events;
0c82adcf 2252 handler(trace, evsel, event, sample);
3160565f 2253 }
ef65e96e
ACM
2254out:
2255 thread__put(thread);
6810fc91
DA
2256 return err;
2257}
2258
1e28fe0a 2259static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2260{
2261 unsigned int rec_argc, i, j;
2262 const char **rec_argv;
2263 const char * const record_args[] = {
2264 "record",
2265 "-R",
2266 "-m", "1024",
2267 "-c", "1",
5e2485b1
DA
2268 };
2269
1e28fe0a
SF
2270 const char * const sc_args[] = { "-e", };
2271 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2272 const char * const majpf_args[] = { "-e", "major-faults" };
2273 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2274 const char * const minpf_args[] = { "-e", "minor-faults" };
2275 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2276
9aca7f17 2277 /* +1 is for the event string below */
1e28fe0a
SF
2278 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2279 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2280 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2281
2282 if (rec_argv == NULL)
2283 return -ENOMEM;
2284
1e28fe0a 2285 j = 0;
5e2485b1 2286 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2287 rec_argv[j++] = record_args[i];
2288
e281a960
SF
2289 if (trace->trace_syscalls) {
2290 for (i = 0; i < sc_args_nr; i++)
2291 rec_argv[j++] = sc_args[i];
2292
2293 /* event string may be different for older kernels - e.g., RHEL6 */
2294 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2295 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2296 else if (is_valid_tracepoint("syscalls:sys_enter"))
2297 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2298 else {
2299 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
c896f85a 2300 free(rec_argv);
e281a960
SF
2301 return -1;
2302 }
9aca7f17 2303 }
9aca7f17 2304
1e28fe0a
SF
2305 if (trace->trace_pgfaults & TRACE_PFMAJ)
2306 for (i = 0; i < majpf_args_nr; i++)
2307 rec_argv[j++] = majpf_args[i];
2308
2309 if (trace->trace_pgfaults & TRACE_PFMIN)
2310 for (i = 0; i < minpf_args_nr; i++)
2311 rec_argv[j++] = minpf_args[i];
2312
2313 for (i = 0; i < (unsigned int)argc; i++)
2314 rec_argv[j++] = argv[i];
5e2485b1 2315
b0ad8ea6 2316 return cmd_record(j, rec_argv);
5e2485b1
DA
2317}
2318
bf2575c1
DA
2319static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2320
08c98776 2321static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2322{
ef503831 2323 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2324
2325 if (IS_ERR(evsel))
08c98776 2326 return false;
c522739d
ACM
2327
2328 if (perf_evsel__field(evsel, "pathname") == NULL) {
2329 perf_evsel__delete(evsel);
08c98776 2330 return false;
c522739d
ACM
2331 }
2332
744a9719 2333 evsel->handler = trace__vfs_getname;
c522739d 2334 perf_evlist__add(evlist, evsel);
08c98776 2335 return true;
c522739d
ACM
2336}
2337
0ae537cb 2338static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2339{
2340 struct perf_evsel *evsel;
2341 struct perf_event_attr attr = {
2342 .type = PERF_TYPE_SOFTWARE,
2343 .mmap_data = 1,
598d02c5
SF
2344 };
2345
2346 attr.config = config;
0524798c 2347 attr.sample_period = 1;
598d02c5
SF
2348
2349 event_attr_init(&attr);
2350
2351 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2352 if (evsel)
2353 evsel->handler = trace__pgfault;
598d02c5 2354
0ae537cb 2355 return evsel;
598d02c5
SF
2356}
2357
ddbb1b13
ACM
2358static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2359{
2360 const u32 type = event->header.type;
2361 struct perf_evsel *evsel;
2362
ddbb1b13
ACM
2363 if (type != PERF_RECORD_SAMPLE) {
2364 trace__process_event(trace, trace->host, event, sample);
2365 return;
2366 }
2367
2368 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2369 if (evsel == NULL) {
2370 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2371 return;
2372 }
2373
e6001980
ACM
2374 trace__set_base_time(trace, evsel, sample);
2375
ddbb1b13
ACM
2376 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2377 sample->raw_data == NULL) {
2378 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2379 perf_evsel__name(evsel), sample->tid,
2380 sample->cpu, sample->raw_size);
2381 } else {
2382 tracepoint_handler handler = evsel->handler;
2383 handler(trace, evsel, event, sample);
2384 }
2385}
2386
c27366f0
ACM
2387static int trace__add_syscall_newtp(struct trace *trace)
2388{
2389 int ret = -1;
2390 struct perf_evlist *evlist = trace->evlist;
2391 struct perf_evsel *sys_enter, *sys_exit;
2392
63f11c80 2393 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
c27366f0
ACM
2394 if (sys_enter == NULL)
2395 goto out;
2396
2397 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2398 goto out_delete_sys_enter;
2399
63f11c80 2400 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
c27366f0
ACM
2401 if (sys_exit == NULL)
2402 goto out_delete_sys_enter;
2403
2404 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2405 goto out_delete_sys_exit;
2406
08e26396
ACM
2407 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2408 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2409
c27366f0
ACM
2410 perf_evlist__add(evlist, sys_enter);
2411 perf_evlist__add(evlist, sys_exit);
2412
2ddd5c04 2413 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2414 /*
2415 * We're interested only in the user space callchain
2416 * leading to the syscall, allow overriding that for
2417 * debugging reasons using --kernel_syscall_callchains
2418 */
2419 sys_exit->attr.exclude_callchain_kernel = 1;
2420 }
2421
8b3ce757
ACM
2422 trace->syscalls.events.sys_enter = sys_enter;
2423 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2424
2425 ret = 0;
2426out:
2427 return ret;
2428
2429out_delete_sys_exit:
2430 perf_evsel__delete_priv(sys_exit);
2431out_delete_sys_enter:
2432 perf_evsel__delete_priv(sys_enter);
2433 goto out;
2434}
2435
19867b61
ACM
2436static int trace__set_ev_qualifier_filter(struct trace *trace)
2437{
2438 int err = -1;
b15d0a4c 2439 struct perf_evsel *sys_exit;
19867b61
ACM
2440 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2441 trace->ev_qualifier_ids.nr,
2442 trace->ev_qualifier_ids.entries);
2443
2444 if (filter == NULL)
2445 goto out_enomem;
2446
3541c034
MP
2447 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2448 filter)) {
b15d0a4c 2449 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2450 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2451 }
19867b61
ACM
2452
2453 free(filter);
2454out:
2455 return err;
2456out_enomem:
2457 errno = ENOMEM;
2458 goto out;
2459}
c27366f0 2460
dd1a5037
ACM
2461static int trace__set_filter_loop_pids(struct trace *trace)
2462{
082ab9a1 2463 unsigned int nr = 1;
dd1a5037
ACM
2464 pid_t pids[32] = {
2465 getpid(),
2466 };
082ab9a1
ACM
2467 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2468
2469 while (thread && nr < ARRAY_SIZE(pids)) {
2470 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2471
2472 if (parent == NULL)
2473 break;
2474
2475 if (!strcmp(thread__comm_str(parent), "sshd")) {
2476 pids[nr++] = parent->tid;
2477 break;
2478 }
2479 thread = parent;
2480 }
dd1a5037
ACM
2481
2482 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2483}
2484
f15eb531 2485static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2486{
14a052df 2487 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2488 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2489 int err = -1, i;
2490 unsigned long before;
f15eb531 2491 const bool forks = argc > 0;
46fb3c21 2492 bool draining = false;
514f1c67 2493
75b757ca
ACM
2494 trace->live = true;
2495
c27366f0 2496 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2497 goto out_error_raw_syscalls;
514f1c67 2498
e281a960 2499 if (trace->trace_syscalls)
08c98776 2500 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2501
0ae537cb
ACM
2502 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2503 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2504 if (pgfault_maj == NULL)
2505 goto out_error_mem;
08e26396 2506 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
0ae537cb 2507 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2508 }
598d02c5 2509
0ae537cb
ACM
2510 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2511 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2512 if (pgfault_min == NULL)
2513 goto out_error_mem;
08e26396 2514 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
0ae537cb
ACM
2515 perf_evlist__add(evlist, pgfault_min);
2516 }
598d02c5 2517
1302d88e 2518 if (trace->sched &&
2cc990ba
ACM
2519 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2520 trace__sched_stat_runtime))
2521 goto out_error_sched_stat_runtime;
1302d88e 2522
9ea42ba4
ACM
2523 /*
2524 * If a global cgroup was set, apply it to all the events without an
2525 * explicit cgroup. I.e.:
2526 *
2527 * trace -G A -e sched:*switch
2528 *
2529 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
2530 * _and_ sched:sched_switch to the 'A' cgroup, while:
2531 *
2532 * trace -e sched:*switch -G A
2533 *
2534 * will only set the sched:sched_switch event to the 'A' cgroup, all the
2535 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
2536 * a cgroup (on the root cgroup, sys wide, etc).
2537 *
2538 * Multiple cgroups:
2539 *
2540 * trace -G A -e sched:*switch -G B
2541 *
2542 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
2543 * to the 'B' cgroup.
2544 *
2545 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
2546 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
2547 */
2548 if (trace->cgroup)
2549 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
2550
514f1c67
ACM
2551 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2552 if (err < 0) {
c24ff998 2553 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2554 goto out_delete_evlist;
2555 }
2556
752fde44
ACM
2557 err = trace__symbols_init(trace, evlist);
2558 if (err < 0) {
c24ff998 2559 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2560 goto out_delete_evlist;
752fde44
ACM
2561 }
2562
75d50117 2563 perf_evlist__config(evlist, &trace->opts, &callchain_param);
fde54b78 2564
f15eb531
NK
2565 signal(SIGCHLD, sig_handler);
2566 signal(SIGINT, sig_handler);
2567
2568 if (forks) {
6ef73ec4 2569 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2570 argv, false, NULL);
f15eb531 2571 if (err < 0) {
c24ff998 2572 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2573 goto out_delete_evlist;
f15eb531
NK
2574 }
2575 }
2576
514f1c67 2577 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2578 if (err < 0)
2579 goto out_error_open;
514f1c67 2580
ba504235
WN
2581 err = bpf__apply_obj_config();
2582 if (err) {
2583 char errbuf[BUFSIZ];
2584
2585 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2586 pr_err("ERROR: Apply config to BPF failed: %s\n",
2587 errbuf);
2588 goto out_error_open;
2589 }
2590
241b057c
ACM
2591 /*
2592 * Better not use !target__has_task() here because we need to cover the
2593 * case where no threads were specified in the command line, but a
2594 * workload was, and in that case we will fill in the thread_map when
2595 * we fork the workload in perf_evlist__prepare_workload.
2596 */
f078c385
ACM
2597 if (trace->filter_pids.nr > 0)
2598 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2599 else if (thread_map__pid(evlist->threads, 0) == -1)
dd1a5037 2600 err = trace__set_filter_loop_pids(trace);
f078c385 2601
94ad89bc
ACM
2602 if (err < 0)
2603 goto out_error_mem;
2604
19867b61
ACM
2605 if (trace->ev_qualifier_ids.nr > 0) {
2606 err = trace__set_ev_qualifier_filter(trace);
2607 if (err < 0)
2608 goto out_errno;
19867b61 2609
2e5e5f87
ACM
2610 pr_debug("event qualifier tracepoint filter: %s\n",
2611 trace->syscalls.events.sys_exit->filter);
2612 }
19867b61 2613
94ad89bc
ACM
2614 err = perf_evlist__apply_filters(evlist, &evsel);
2615 if (err < 0)
2616 goto out_error_apply_filters;
241b057c 2617
f74b9d3a 2618 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
e09b18d4
ACM
2619 if (err < 0)
2620 goto out_error_mmap;
514f1c67 2621
e36b7821 2622 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2623 perf_evlist__enable(evlist);
2624
f15eb531
NK
2625 if (forks)
2626 perf_evlist__start_workload(evlist);
2627
e36b7821
AB
2628 if (trace->opts.initial_delay) {
2629 usleep(trace->opts.initial_delay * 1000);
2630 perf_evlist__enable(evlist);
2631 }
2632
e13798c7 2633 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2634 evlist->threads->nr > 1 ||
2635 perf_evlist__first(evlist)->attr.inherit;
bd3dda9a
ACM
2636
2637 /*
2638 * Now that we already used evsel->attr to ask the kernel to setup the
2639 * events, lets reuse evsel->attr.sample_max_stack as the limit in
2640 * trace__resolve_callchain(), allowing per-event max-stack settings
2641 * to override an explicitely set --max-stack global setting.
2642 */
2643 evlist__for_each_entry(evlist, evsel) {
27de9b2b 2644 if (evsel__has_callchain(evsel) &&
bd3dda9a
ACM
2645 evsel->attr.sample_max_stack == 0)
2646 evsel->attr.sample_max_stack = trace->max_stack;
2647 }
514f1c67 2648again:
efd5745e 2649 before = trace->nr_events;
514f1c67
ACM
2650
2651 for (i = 0; i < evlist->nr_mmaps; i++) {
2652 union perf_event *event;
d7f55c62 2653 struct perf_mmap *md;
514f1c67 2654
d7f55c62 2655 md = &evlist->mmap[i];
b9bae2c8 2656 if (perf_mmap__read_init(md) < 0)
d7f55c62
KL
2657 continue;
2658
0019dc87 2659 while ((event = perf_mmap__read_event(md)) != NULL) {
514f1c67 2660 struct perf_sample sample;
514f1c67 2661
efd5745e 2662 ++trace->nr_events;
514f1c67 2663
514f1c67
ACM
2664 err = perf_evlist__parse_sample(evlist, event, &sample);
2665 if (err) {
c24ff998 2666 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2667 goto next_event;
514f1c67
ACM
2668 }
2669
ddbb1b13 2670 trace__handle_event(trace, event, &sample);
8e50d384 2671next_event:
d6ace3df 2672 perf_mmap__consume(md);
20c5f10e 2673
ba209f85
ACM
2674 if (interrupted)
2675 goto out_disable;
02ac5421
ACM
2676
2677 if (done && !draining) {
2678 perf_evlist__disable(evlist);
2679 draining = true;
2680 }
514f1c67 2681 }
d7f55c62 2682 perf_mmap__read_done(md);
514f1c67
ACM
2683 }
2684
efd5745e 2685 if (trace->nr_events == before) {
ba209f85 2686 int timeout = done ? 100 : -1;
f15eb531 2687
46fb3c21
ACM
2688 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2689 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2690 draining = true;
2691
ba209f85 2692 goto again;
46fb3c21 2693 }
ba209f85
ACM
2694 } else {
2695 goto again;
f15eb531
NK
2696 }
2697
ba209f85 2698out_disable:
f3b623b8
ACM
2699 thread__zput(trace->current);
2700
ba209f85 2701 perf_evlist__disable(evlist);
514f1c67 2702
c522739d
ACM
2703 if (!err) {
2704 if (trace->summary)
2705 trace__fprintf_thread_summary(trace, trace->output);
2706
2707 if (trace->show_tool_stats) {
2708 fprintf(trace->output, "Stats:\n "
2709 " vfs_getname : %" PRIu64 "\n"
2710 " proc_getname: %" PRIu64 "\n",
2711 trace->stats.vfs_getname,
2712 trace->stats.proc_getname);
2713 }
2714 }
bf2575c1 2715
514f1c67 2716out_delete_evlist:
33974a41
AV
2717 trace__symbols__exit(trace);
2718
514f1c67 2719 perf_evlist__delete(evlist);
9ea42ba4 2720 cgroup__put(trace->cgroup);
14a052df 2721 trace->evlist = NULL;
75b757ca 2722 trace->live = false;
514f1c67 2723 return err;
6ef068cb
ACM
2724{
2725 char errbuf[BUFSIZ];
a8f23d8f 2726
2cc990ba 2727out_error_sched_stat_runtime:
988bdb31 2728 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2729 goto out_error;
2730
801c67b0 2731out_error_raw_syscalls:
988bdb31 2732 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2733 goto out_error;
2734
e09b18d4
ACM
2735out_error_mmap:
2736 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2737 goto out_error;
2738
a8f23d8f
ACM
2739out_error_open:
2740 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2741
2742out_error:
6ef068cb 2743 fprintf(trace->output, "%s\n", errbuf);
87f91868 2744 goto out_delete_evlist;
94ad89bc
ACM
2745
2746out_error_apply_filters:
2747 fprintf(trace->output,
2748 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2749 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2750 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2751 goto out_delete_evlist;
514f1c67 2752}
5ed08dae
ACM
2753out_error_mem:
2754 fprintf(trace->output, "Not enough memory to run!\n");
2755 goto out_delete_evlist;
19867b61
ACM
2756
2757out_errno:
2758 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2759 goto out_delete_evlist;
a8f23d8f 2760}
514f1c67 2761
6810fc91
DA
2762static int trace__replay(struct trace *trace)
2763{
2764 const struct perf_evsel_str_handler handlers[] = {
c522739d 2765 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2766 };
8ceb41d7 2767 struct perf_data data = {
eae8ad80
JO
2768 .file = {
2769 .path = input_name,
2770 },
2771 .mode = PERF_DATA_MODE_READ,
2772 .force = trace->force,
f5fc1412 2773 };
6810fc91 2774 struct perf_session *session;
003824e8 2775 struct perf_evsel *evsel;
6810fc91
DA
2776 int err = -1;
2777
2778 trace->tool.sample = trace__process_sample;
2779 trace->tool.mmap = perf_event__process_mmap;
384c671e 2780 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2781 trace->tool.comm = perf_event__process_comm;
2782 trace->tool.exit = perf_event__process_exit;
2783 trace->tool.fork = perf_event__process_fork;
2784 trace->tool.attr = perf_event__process_attr;
f3b3614a 2785 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2786 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2787 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2788
0a8cb85c 2789 trace->tool.ordered_events = true;
6810fc91
DA
2790 trace->tool.ordering_requires_timestamps = true;
2791
2792 /* add tid to output */
2793 trace->multiple_threads = true;
2794
8ceb41d7 2795 session = perf_session__new(&data, false, &trace->tool);
6810fc91 2796 if (session == NULL)
52e02834 2797 return -1;
6810fc91 2798
aa07df6e
DA
2799 if (trace->opts.target.pid)
2800 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2801
2802 if (trace->opts.target.tid)
2803 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2804
0a7e6d1b 2805 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2806 goto out;
2807
8fb598e5
DA
2808 trace->host = &session->machines.host;
2809
6810fc91
DA
2810 err = perf_session__set_tracepoints_handlers(session, handlers);
2811 if (err)
2812 goto out;
2813
003824e8
NK
2814 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2815 "raw_syscalls:sys_enter");
9aca7f17
DA
2816 /* older kernels have syscalls tp versus raw_syscalls */
2817 if (evsel == NULL)
2818 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2819 "syscalls:sys_enter");
003824e8 2820
e281a960 2821 if (evsel &&
63f11c80 2822 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
e281a960 2823 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2824 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2825 goto out;
2826 }
2827
2828 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2829 "raw_syscalls:sys_exit");
9aca7f17
DA
2830 if (evsel == NULL)
2831 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2832 "syscalls:sys_exit");
e281a960 2833 if (evsel &&
63f11c80 2834 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
e281a960 2835 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2836 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2837 goto out;
2838 }
2839
e5cadb93 2840 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2841 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2842 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2843 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2844 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2845 evsel->handler = trace__pgfault;
2846 }
2847
6810fc91
DA
2848 setup_pager();
2849
b7b61cbe 2850 err = perf_session__process_events(session);
6810fc91
DA
2851 if (err)
2852 pr_err("Failed to process events, error %d", err);
2853
bf2575c1
DA
2854 else if (trace->summary)
2855 trace__fprintf_thread_summary(trace, trace->output);
2856
6810fc91
DA
2857out:
2858 perf_session__delete(session);
2859
2860 return err;
2861}
2862
1302d88e
ACM
2863static size_t trace__fprintf_threads_header(FILE *fp)
2864{
2865 size_t printed;
2866
99ff7150 2867 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2868
2869 return printed;
2870}
2871
b535d523
ACM
2872DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2873 struct stats *stats;
2874 double msecs;
2875 int syscall;
2876)
2877{
2878 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2879 struct stats *stats = source->priv;
2880
2881 entry->syscall = source->i;
2882 entry->stats = stats;
2883 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2884}
2885
bf2575c1
DA
2886static size_t thread__dump_stats(struct thread_trace *ttrace,
2887 struct trace *trace, FILE *fp)
2888{
bf2575c1
DA
2889 size_t printed = 0;
2890 struct syscall *sc;
b535d523
ACM
2891 struct rb_node *nd;
2892 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2893
b535d523 2894 if (syscall_stats == NULL)
bf2575c1
DA
2895 return 0;
2896
2897 printed += fprintf(fp, "\n");
2898
834fd46d
MW
2899 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2900 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2901 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2902
98a91837 2903 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2904 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2905 if (stats) {
2906 double min = (double)(stats->min) / NSEC_PER_MSEC;
2907 double max = (double)(stats->max) / NSEC_PER_MSEC;
2908 double avg = avg_stats(stats);
2909 double pct;
2910 u64 n = (u64) stats->n;
2911
2912 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2913 avg /= NSEC_PER_MSEC;
2914
b535d523 2915 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2916 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2917 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2918 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2919 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2920 }
bf2575c1
DA
2921 }
2922
b535d523 2923 resort_rb__delete(syscall_stats);
bf2575c1 2924 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2925
2926 return printed;
2927}
2928
96c14451 2929static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2930{
96c14451 2931 size_t printed = 0;
89dceb22 2932 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2933 double ratio;
2934
2935 if (ttrace == NULL)
2936 return 0;
2937
2938 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2939
15e65c69 2940 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2941 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2942 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2943 if (ttrace->pfmaj)
2944 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2945 if (ttrace->pfmin)
2946 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2947 if (trace->sched)
2948 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2949 else if (fputc('\n', fp) != EOF)
2950 ++printed;
2951
bf2575c1 2952 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2953
96c14451
ACM
2954 return printed;
2955}
896cbb56 2956
96c14451
ACM
2957static unsigned long thread__nr_events(struct thread_trace *ttrace)
2958{
2959 return ttrace ? ttrace->nr_events : 0;
2960}
2961
2962DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2963 struct thread *thread;
2964)
2965{
2966 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2967}
2968
1302d88e
ACM
2969static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2970{
96c14451
ACM
2971 size_t printed = trace__fprintf_threads_header(fp);
2972 struct rb_node *nd;
91e467bc 2973 int i;
1302d88e 2974
91e467bc
KL
2975 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2976 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
96c14451 2977
91e467bc
KL
2978 if (threads == NULL) {
2979 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2980 return 0;
2981 }
896cbb56 2982
91e467bc
KL
2983 resort_rb__for_each_entry(nd, threads)
2984 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
96c14451 2985
91e467bc
KL
2986 resort_rb__delete(threads);
2987 }
96c14451 2988 return printed;
1302d88e
ACM
2989}
2990
ae9ed035
ACM
2991static int trace__set_duration(const struct option *opt, const char *str,
2992 int unset __maybe_unused)
2993{
2994 struct trace *trace = opt->value;
2995
2996 trace->duration_filter = atof(str);
2997 return 0;
2998}
2999
f078c385
ACM
3000static int trace__set_filter_pids(const struct option *opt, const char *str,
3001 int unset __maybe_unused)
3002{
3003 int ret = -1;
3004 size_t i;
3005 struct trace *trace = opt->value;
3006 /*
3007 * FIXME: introduce a intarray class, plain parse csv and create a
3008 * { int nr, int entries[] } struct...
3009 */
3010 struct intlist *list = intlist__new(str);
3011
3012 if (list == NULL)
3013 return -1;
3014
3015 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3016 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3017
3018 if (trace->filter_pids.entries == NULL)
3019 goto out;
3020
3021 trace->filter_pids.entries[0] = getpid();
3022
3023 for (i = 1; i < trace->filter_pids.nr; ++i)
3024 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3025
3026 intlist__delete(list);
3027 ret = 0;
3028out:
3029 return ret;
3030}
3031
c24ff998
ACM
3032static int trace__open_output(struct trace *trace, const char *filename)
3033{
3034 struct stat st;
3035
3036 if (!stat(filename, &st) && st.st_size) {
3037 char oldname[PATH_MAX];
3038
3039 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3040 unlink(oldname);
3041 rename(filename, oldname);
3042 }
3043
3044 trace->output = fopen(filename, "w");
3045
3046 return trace->output == NULL ? -errno : 0;
3047}
3048
598d02c5
SF
3049static int parse_pagefaults(const struct option *opt, const char *str,
3050 int unset __maybe_unused)
3051{
3052 int *trace_pgfaults = opt->value;
3053
3054 if (strcmp(str, "all") == 0)
3055 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3056 else if (strcmp(str, "maj") == 0)
3057 *trace_pgfaults |= TRACE_PFMAJ;
3058 else if (strcmp(str, "min") == 0)
3059 *trace_pgfaults |= TRACE_PFMIN;
3060 else
3061 return -1;
3062
3063 return 0;
3064}
3065
14a052df
ACM
3066static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3067{
3068 struct perf_evsel *evsel;
3069
e5cadb93 3070 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
3071 evsel->handler = handler;
3072}
3073
d32855fa
ACM
3074static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
3075{
3076 struct perf_evsel *evsel;
3077
3078 evlist__for_each_entry(evlist, evsel) {
3079 if (evsel->priv || !evsel->tp_format)
3080 continue;
3081
3082 if (strcmp(evsel->tp_format->system, "syscalls"))
3083 continue;
3084
3085 if (perf_evsel__init_syscall_tp(evsel))
3086 return -1;
3087
3088 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3089 struct syscall_tp *sc = evsel->priv;
3090
3091 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
3092 return -1;
3093 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3094 struct syscall_tp *sc = evsel->priv;
3095
3096 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
3097 return -1;
3098 }
3099 }
3100
3101 return 0;
3102}
3103
017037ff
ACM
3104/*
3105 * XXX: Hackish, just splitting the combined -e+--event (syscalls
3106 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
3107 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
3108 *
3109 * It'd be better to introduce a parse_options() variant that would return a
3110 * list with the terms it didn't match to an event...
3111 */
3112static int trace__parse_events_option(const struct option *opt, const char *str,
3113 int unset __maybe_unused)
3114{
3115 struct trace *trace = (struct trace *)opt->value;
3116 const char *s = str;
3117 char *sep = NULL, *lists[2] = { NULL, NULL, };
27702bcf 3118 int len = strlen(str) + 1, err = -1, list, idx;
017037ff
ACM
3119 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
3120 char group_name[PATH_MAX];
3121
3122 if (strace_groups_dir == NULL)
3123 return -1;
3124
3125 if (*s == '!') {
3126 ++s;
3127 trace->not_ev_qualifier = true;
3128 }
3129
3130 while (1) {
3131 if ((sep = strchr(s, ',')) != NULL)
3132 *sep = '\0';
3133
3134 list = 0;
27702bcf
ACM
3135 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
3136 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
017037ff
ACM
3137 list = 1;
3138 } else {
3139 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
3140 if (access(group_name, R_OK) == 0)
3141 list = 1;
3142 }
3143
3144 if (lists[list]) {
3145 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
3146 } else {
3147 lists[list] = malloc(len);
3148 if (lists[list] == NULL)
3149 goto out;
3150 strcpy(lists[list], s);
3151 }
3152
3153 if (!sep)
3154 break;
3155
3156 *sep = ',';
3157 s = sep + 1;
3158 }
3159
3160 if (lists[1] != NULL) {
3161 struct strlist_config slist_config = {
3162 .dirname = strace_groups_dir,
3163 };
3164
3165 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
3166 if (trace->ev_qualifier == NULL) {
3167 fputs("Not enough memory to parse event qualifier", trace->output);
3168 goto out;
3169 }
3170
3171 if (trace__validate_ev_qualifier(trace))
3172 goto out;
b912885a 3173 trace->trace_syscalls = true;
017037ff
ACM
3174 }
3175
3176 err = 0;
3177
3178 if (lists[0]) {
3179 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3180 "event selector. use 'perf list' to list available events",
3181 parse_events_option);
3182 err = parse_events_option(&o, lists[0], 0);
3183 }
3184out:
3185 if (sep)
3186 *sep = ',';
3187
3188 return err;
3189}
3190
9ea42ba4
ACM
3191static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3192{
3193 struct trace *trace = opt->value;
3194
3195 if (!list_empty(&trace->evlist->entries))
3196 return parse_cgroups(opt, str, unset);
3197
3198 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
3199
3200 return 0;
3201}
3202
b0ad8ea6 3203int cmd_trace(int argc, const char **argv)
514f1c67 3204{
6fdd9cb7 3205 const char *trace_usage[] = {
f15eb531
NK
3206 "perf trace [<options>] [<command>]",
3207 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
3208 "perf trace record [<options>] [<command>]",
3209 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
3210 NULL
3211 };
3212 struct trace trace = {
514f1c67
ACM
3213 .syscalls = {
3214 . max = -1,
3215 },
3216 .opts = {
3217 .target = {
3218 .uid = UINT_MAX,
3219 .uses_mmap = true,
3220 },
3221 .user_freq = UINT_MAX,
3222 .user_interval = ULLONG_MAX,
509051ea 3223 .no_buffering = true,
38d5447d 3224 .mmap_pages = UINT_MAX,
9d9cad76 3225 .proc_map_timeout = 500,
514f1c67 3226 },
007d66a0 3227 .output = stderr,
50c95cbd 3228 .show_comm = true,
b912885a 3229 .trace_syscalls = false,
44621819 3230 .kernel_syscallchains = false,
05614993 3231 .max_stack = UINT_MAX,
514f1c67 3232 };
c24ff998 3233 const char *output_name = NULL;
514f1c67 3234 const struct option trace_options[] = {
017037ff
ACM
3235 OPT_CALLBACK('e', "event", &trace, "event",
3236 "event/syscall selector. use 'perf list' to list available events",
3237 trace__parse_events_option),
50c95cbd
ACM
3238 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3239 "show the thread COMM next to its id"),
c522739d 3240 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
3241 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
3242 trace__parse_events_option),
c24ff998 3243 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 3244 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
3245 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3246 "trace events on existing process id"),
ac9be8ee 3247 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 3248 "trace events on existing thread id"),
fa0e4ffe
ACM
3249 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3250 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 3251 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 3252 "system-wide collection from all CPUs"),
ac9be8ee 3253 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 3254 "list of cpus to monitor"),
6810fc91 3255 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 3256 "child tasks do not inherit counters"),
994a1f78
JO
3257 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3258 "number of mmap data pages",
3259 perf_evlist__parse_mmap_pages),
ac9be8ee 3260 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 3261 "user to profile"),
ae9ed035
ACM
3262 OPT_CALLBACK(0, "duration", &trace, "float",
3263 "show only events with duration > N.M ms",
3264 trace__set_duration),
1302d88e 3265 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 3266 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
3267 OPT_BOOLEAN('T', "time", &trace.full_time,
3268 "Show full timestamp, not time relative to first start"),
0a6545bd
ACM
3269 OPT_BOOLEAN(0, "failure", &trace.failure_only,
3270 "Show only syscalls that failed"),
fd2eabaf
DA
3271 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3272 "Show only syscall summary with statistics"),
3273 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3274 "Show all syscalls and summary with statistics"),
598d02c5
SF
3275 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3276 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 3277 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 3278 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
3279 OPT_CALLBACK(0, "call-graph", &trace.opts,
3280 "record_mode[,record_size]", record_callchain_help,
3281 &record_parse_callchain_opt),
44621819
ACM
3282 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3283 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
3284 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3285 "Set the minimum stack depth when parsing the callchain, "
3286 "anything below the specified depth will be ignored."),
c6d4a494
ACM
3287 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3288 "Set the maximum stack depth when parsing the callchain, "
3289 "anything beyond the specified depth will be ignored. "
4cb93446 3290 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
591421e1
ACM
3291 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
3292 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
9d9cad76
KL
3293 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3294 "per thread proc mmap processing timeout in ms"),
9ea42ba4
ACM
3295 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
3296 trace__parse_cgroups),
e36b7821
AB
3297 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3298 "ms to wait before starting measurement after program "
3299 "start"),
514f1c67
ACM
3300 OPT_END()
3301 };
ccd62a89 3302 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3303 bool mmap_pages_user_set = true;
78e890ea 3304 struct perf_evsel *evsel;
6fdd9cb7 3305 const char * const trace_subcommands[] = { "record", NULL };
78e890ea 3306 int err = -1;
32caf0d1 3307 char bf[BUFSIZ];
514f1c67 3308
4d08cb80
ACM
3309 signal(SIGSEGV, sighandler_dump_stack);
3310 signal(SIGFPE, sighandler_dump_stack);
3311
14a052df 3312 trace.evlist = perf_evlist__new();
fd0db102 3313 trace.sctbl = syscalltbl__new();
14a052df 3314
fd0db102 3315 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3316 pr_err("Not enough memory to run!\n");
ff8f695c 3317 err = -ENOMEM;
14a052df
ACM
3318 goto out;
3319 }
3320
6fdd9cb7
YS
3321 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3322 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3323
9ea42ba4
ACM
3324 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
3325 usage_with_options_msg(trace_usage, trace_options,
3326 "cgroup monitoring only available in system-wide mode");
3327 }
3328
78e890ea
ACM
3329 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
3330 if (IS_ERR(evsel)) {
3331 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
e0b6d2ef
ACM
3332 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
3333 goto out;
3334 }
3335
d3d1c4bd
ACM
3336 if (evsel) {
3337 if (perf_evsel__init_augmented_syscall_tp(evsel) ||
3338 perf_evsel__init_augmented_syscall_tp_args(evsel))
3339 goto out;
3340 trace.syscalls.events.augmented = evsel;
3341 }
3342
d7888573
WN
3343 err = bpf__setup_stdout(trace.evlist);
3344 if (err) {
3345 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3346 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3347 goto out;
3348 }
3349
59247e33
ACM
3350 err = -1;
3351
598d02c5
SF
3352 if (trace.trace_pgfaults) {
3353 trace.opts.sample_address = true;
3354 trace.opts.sample_time = true;
3355 }
3356
f3e459d1
ACM
3357 if (trace.opts.mmap_pages == UINT_MAX)
3358 mmap_pages_user_set = false;
3359
05614993 3360 if (trace.max_stack == UINT_MAX) {
029c75e5 3361 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
05614993
ACM
3362 max_stack_user_set = false;
3363 }
3364
3365#ifdef HAVE_DWARF_UNWIND_SUPPORT
75d50117 3366 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
05614993 3367 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
75d50117 3368 }
05614993
ACM
3369#endif
3370
2ddd5c04 3371 if (callchain_param.enabled) {
f3e459d1
ACM
3372 if (!mmap_pages_user_set && geteuid() == 0)
3373 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3374
566a0885 3375 symbol_conf.use_callchain = true;
f3e459d1 3376 }
566a0885 3377
d32855fa 3378 if (trace.evlist->nr_entries > 0) {
14a052df 3379 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
d32855fa
ACM
3380 if (evlist__set_syscall_tp_fields(trace.evlist)) {
3381 perror("failed to set syscalls:* tracepoint fields");
3382 goto out;
3383 }
3384 }
14a052df 3385
1e28fe0a
SF
3386 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3387 return trace__record(&trace, argc-1, &argv[1]);
3388
3389 /* summary_only implies summary option, but don't overwrite summary if set */
3390 if (trace.summary_only)
3391 trace.summary = trace.summary_only;
3392
726f3234
ACM
3393 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3394 trace.evlist->nr_entries == 0 /* Was --events used? */) {
b912885a 3395 trace.trace_syscalls = true;
59247e33
ACM
3396 }
3397
c24ff998
ACM
3398 if (output_name != NULL) {
3399 err = trace__open_output(&trace, output_name);
3400 if (err < 0) {
3401 perror("failed to create output file");
3402 goto out;
3403 }
3404 }
3405
602ad878 3406 err = target__validate(&trace.opts.target);
32caf0d1 3407 if (err) {
602ad878 3408 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3409 fprintf(trace.output, "%s", bf);
3410 goto out_close;
32caf0d1
NK
3411 }
3412
602ad878 3413 err = target__parse_uid(&trace.opts.target);
514f1c67 3414 if (err) {
602ad878 3415 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3416 fprintf(trace.output, "%s", bf);
3417 goto out_close;
514f1c67
ACM
3418 }
3419
602ad878 3420 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3421 trace.opts.target.system_wide = true;
3422
6810fc91
DA
3423 if (input_name)
3424 err = trace__replay(&trace);
3425 else
3426 err = trace__run(&trace, argc, argv);
1302d88e 3427
c24ff998
ACM
3428out_close:
3429 if (output_name != NULL)
3430 fclose(trace.output);
3431out:
1302d88e 3432 return err;
514f1c67 3433}