perf annotate: Fix parsing aarch64 branch instructions after objdump update
[linux-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
9ea42ba4 22#include "util/cgroup.h"
752fde44 23#include "util/color.h"
7c304ee0 24#include "util/debug.h"
092bd3cd 25#include "util/env.h"
5ab8c689 26#include "util/event.h"
514f1c67 27#include "util/evlist.h"
4b6ab94e 28#include <subcmd/exec-cmd.h>
752fde44 29#include "util/machine.h"
9a3993d4 30#include "util/path.h"
6810fc91 31#include "util/session.h"
752fde44 32#include "util/thread.h"
4b6ab94e 33#include <subcmd/parse-options.h>
2ae3a312 34#include "util/strlist.h"
bdc89661 35#include "util/intlist.h"
514f1c67 36#include "util/thread_map.h"
bf2575c1 37#include "util/stat.h"
fd5cead2 38#include "trace/beauty/beauty.h"
97978b3e 39#include "trace-event.h"
9aca7f17 40#include "util/parse-events.h"
ba504235 41#include "util/bpf-loader.h"
566a0885 42#include "callchain.h"
fea01392 43#include "print_binary.h"
a067558e 44#include "string2.h"
fd0db102 45#include "syscalltbl.h"
96c14451 46#include "rb_resort.h"
514f1c67 47
a43783ae 48#include <errno.h>
fd20e811 49#include <inttypes.h>
4208735d 50#include <poll.h>
9607ad3a 51#include <signal.h>
514f1c67 52#include <stdlib.h>
017037ff 53#include <string.h>
8dd2a131 54#include <linux/err.h>
997bba8c 55#include <linux/filter.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
bafae98e 60#include <fcntl.h>
514f1c67 61
3d689ed6
ACM
62#include "sane_ctype.h"
63
c188e7ac
ACM
64#ifndef O_CLOEXEC
65# define O_CLOEXEC 02000000
66#endif
67
83a51694
ACM
68#ifndef F_LINUX_SPECIFIC_BASE
69# define F_LINUX_SPECIFIC_BASE 1024
70#endif
71
d1d438a3
ACM
72struct trace {
73 struct perf_tool tool;
fd0db102 74 struct syscalltbl *sctbl;
d1d438a3
ACM
75 struct {
76 int max;
77 struct syscall *table;
78 struct {
79 struct perf_evsel *sys_enter,
d3d1c4bd
ACM
80 *sys_exit,
81 *augmented;
d1d438a3
ACM
82 } events;
83 } syscalls;
84 struct record_opts opts;
85 struct perf_evlist *evlist;
86 struct machine *host;
87 struct thread *current;
9ea42ba4 88 struct cgroup *cgroup;
d1d438a3
ACM
89 u64 base_time;
90 FILE *output;
91 unsigned long nr_events;
92 struct strlist *ev_qualifier;
93 struct {
94 size_t nr;
95 int *entries;
96 } ev_qualifier_ids;
d1d438a3
ACM
97 struct {
98 size_t nr;
99 pid_t *entries;
100 } filter_pids;
101 double duration_filter;
102 double runtime_ms;
103 struct {
104 u64 vfs_getname,
105 proc_getname;
106 } stats;
c6d4a494 107 unsigned int max_stack;
5cf9c84e 108 unsigned int min_stack;
d1d438a3
ACM
109 bool not_ev_qualifier;
110 bool live;
111 bool full_time;
112 bool sched;
113 bool multiple_threads;
114 bool summary;
115 bool summary_only;
0a6545bd 116 bool failure_only;
d1d438a3 117 bool show_comm;
591421e1 118 bool print_sample;
d1d438a3
ACM
119 bool show_tool_stats;
120 bool trace_syscalls;
44621819 121 bool kernel_syscallchains;
d1d438a3
ACM
122 bool force;
123 bool vfs_getname;
124 int trace_pgfaults;
125};
a1c2552d 126
77170988
ACM
127struct tp_field {
128 int offset;
129 union {
130 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
131 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
132 };
133};
134
135#define TP_UINT_FIELD(bits) \
136static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
137{ \
55d43bca
DA
138 u##bits value; \
139 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
140 return value; \
77170988
ACM
141}
142
143TP_UINT_FIELD(8);
144TP_UINT_FIELD(16);
145TP_UINT_FIELD(32);
146TP_UINT_FIELD(64);
147
148#define TP_UINT_FIELD__SWAPPED(bits) \
149static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
150{ \
55d43bca
DA
151 u##bits value; \
152 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
153 return bswap_##bits(value);\
154}
155
156TP_UINT_FIELD__SWAPPED(16);
157TP_UINT_FIELD__SWAPPED(32);
158TP_UINT_FIELD__SWAPPED(64);
159
aa823f58 160static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
77170988 161{
aa823f58 162 field->offset = offset;
77170988 163
aa823f58 164 switch (size) {
77170988
ACM
165 case 1:
166 field->integer = tp_field__u8;
167 break;
168 case 2:
169 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
170 break;
171 case 4:
172 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
173 break;
174 case 8:
175 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
176 break;
177 default:
178 return -1;
179 }
180
181 return 0;
182}
183
aa823f58
ACM
184static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
185{
186 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
187}
188
77170988
ACM
189static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
190{
191 return sample->raw_data + field->offset;
192}
193
aa823f58 194static int __tp_field__init_ptr(struct tp_field *field, int offset)
77170988 195{
aa823f58 196 field->offset = offset;
77170988
ACM
197 field->pointer = tp_field__ptr;
198 return 0;
199}
200
aa823f58
ACM
201static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
202{
203 return __tp_field__init_ptr(field, format_field->offset);
204}
205
77170988
ACM
206struct syscall_tp {
207 struct tp_field id;
208 union {
209 struct tp_field args, ret;
210 };
211};
212
213static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
214 struct tp_field *field,
215 const char *name)
216{
217 struct format_field *format_field = perf_evsel__field(evsel, name);
218
219 if (format_field == NULL)
220 return -1;
221
222 return tp_field__init_uint(field, format_field, evsel->needs_swap);
223}
224
225#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
226 ({ struct syscall_tp *sc = evsel->priv;\
227 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
228
229static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
230 struct tp_field *field,
231 const char *name)
232{
233 struct format_field *format_field = perf_evsel__field(evsel, name);
234
235 if (format_field == NULL)
236 return -1;
237
238 return tp_field__init_ptr(field, format_field);
239}
240
241#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
242 ({ struct syscall_tp *sc = evsel->priv;\
243 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
244
245static void perf_evsel__delete_priv(struct perf_evsel *evsel)
246{
04662523 247 zfree(&evsel->priv);
77170988
ACM
248 perf_evsel__delete(evsel);
249}
250
d32855fa
ACM
251static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
252{
253 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
254
255 if (evsel->priv != NULL) {
256 if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
257 goto out_delete;
258 return 0;
259 }
260
261 return -ENOMEM;
262out_delete:
263 zfree(&evsel->priv);
264 return -ENOENT;
265}
266
d3d1c4bd
ACM
267static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
268{
269 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
270
271 if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
272 if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
273 goto out_delete;
274
275 return 0;
276 }
277
278 return -ENOMEM;
279out_delete:
280 zfree(&evsel->priv);
281 return -EINVAL;
282}
283
284static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
285{
286 struct syscall_tp *sc = evsel->priv;
287
288 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
289}
290
63f11c80 291static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
96695d44
NK
292{
293 evsel->priv = malloc(sizeof(struct syscall_tp));
294 if (evsel->priv != NULL) {
295 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
296 goto out_delete;
297
298 evsel->handler = handler;
299 return 0;
300 }
301
302 return -ENOMEM;
303
304out_delete:
04662523 305 zfree(&evsel->priv);
96695d44
NK
306 return -ENOENT;
307}
308
63f11c80 309static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
77170988 310{
ef503831 311 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 312
9aca7f17 313 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 314 if (IS_ERR(evsel))
9aca7f17
DA
315 evsel = perf_evsel__newtp("syscalls", direction);
316
8dd2a131
JO
317 if (IS_ERR(evsel))
318 return NULL;
319
63f11c80 320 if (perf_evsel__init_raw_syscall_tp(evsel, handler))
8dd2a131 321 goto out_delete;
77170988
ACM
322
323 return evsel;
324
325out_delete:
326 perf_evsel__delete_priv(evsel);
327 return NULL;
328}
329
330#define perf_evsel__sc_tp_uint(evsel, name, sample) \
331 ({ struct syscall_tp *fields = evsel->priv; \
332 fields->name.integer(&fields->name, sample); })
333
334#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
335 ({ struct syscall_tp *fields = evsel->priv; \
336 fields->name.pointer(&fields->name, sample); })
337
0ae79636
ACM
338size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
339{
340 int idx = val - sa->offset;
1f115cb7 341
bc972ada 342 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
0ae79636 343 return scnprintf(bf, size, intfmt, val);
1f115cb7 344
0ae79636 345 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
346}
347
975b7c2f
ACM
348static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
349 const char *intfmt,
350 struct syscall_arg *arg)
1f115cb7 351{
0ae79636 352 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
353}
354
975b7c2f
ACM
355static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
356 struct syscall_arg *arg)
357{
358 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
359}
360
1f115cb7
ACM
361#define SCA_STRARRAY syscall_arg__scnprintf_strarray
362
83a51694
ACM
363struct strarrays {
364 int nr_entries;
365 struct strarray **entries;
366};
367
368#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
369 .nr_entries = ARRAY_SIZE(array), \
370 .entries = array, \
371}
372
274e86fd
ACM
373size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
374 struct syscall_arg *arg)
83a51694
ACM
375{
376 struct strarrays *sas = arg->parm;
377 int i;
378
379 for (i = 0; i < sas->nr_entries; ++i) {
380 struct strarray *sa = sas->entries[i];
381 int idx = arg->val - sa->offset;
382
383 if (idx >= 0 && idx < sa->nr_entries) {
384 if (sa->entries[idx] == NULL)
385 break;
386 return scnprintf(bf, size, "%s", sa->entries[idx]);
387 }
388 }
389
390 return scnprintf(bf, size, "%d", arg->val);
391}
392
48e1f91a
ACM
393#ifndef AT_FDCWD
394#define AT_FDCWD -100
395#endif
396
75b757ca
ACM
397static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
398 struct syscall_arg *arg)
399{
400 int fd = arg->val;
401
402 if (fd == AT_FDCWD)
403 return scnprintf(bf, size, "CWD");
404
405 return syscall_arg__scnprintf_fd(bf, size, arg);
406}
407
408#define SCA_FDAT syscall_arg__scnprintf_fd_at
409
410static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
411 struct syscall_arg *arg);
412
413#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
414
2c2b1623 415size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 416{
01533e97 417 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
418}
419
2c2b1623 420size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
421{
422 return scnprintf(bf, size, "%d", arg->val);
423}
424
5dde91ed
ACM
425size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
426{
427 return scnprintf(bf, size, "%ld", arg->val);
428}
429
729a7841
ACM
430static const char *bpf_cmd[] = {
431 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
432 "MAP_GET_NEXT_KEY", "PROG_LOAD",
433};
434static DEFINE_STRARRAY(bpf_cmd);
435
03e3adc9
ACM
436static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
437static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 438
1f115cb7
ACM
439static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
440static DEFINE_STRARRAY(itimers);
441
b62bee1b
ACM
442static const char *keyctl_options[] = {
443 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
444 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
445 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
446 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
447 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
448};
449static DEFINE_STRARRAY(keyctl_options);
450
efe6b882
ACM
451static const char *whences[] = { "SET", "CUR", "END",
452#ifdef SEEK_DATA
453"DATA",
454#endif
455#ifdef SEEK_HOLE
456"HOLE",
457#endif
458};
459static DEFINE_STRARRAY(whences);
f9da0b0c 460
80f587d5
ACM
461static const char *fcntl_cmds[] = {
462 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
463 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
464 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
465 "GETOWNER_UIDS",
80f587d5
ACM
466};
467static DEFINE_STRARRAY(fcntl_cmds);
468
83a51694
ACM
469static const char *fcntl_linux_specific_cmds[] = {
470 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
471 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 472 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
473};
474
475static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
476
477static struct strarray *fcntl_cmds_arrays[] = {
478 &strarray__fcntl_cmds,
479 &strarray__fcntl_linux_specific_cmds,
480};
481
482static DEFINE_STRARRAYS(fcntl_cmds_arrays);
483
c045bf02
ACM
484static const char *rlimit_resources[] = {
485 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
486 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
487 "RTTIME",
488};
489static DEFINE_STRARRAY(rlimit_resources);
490
eb5b1b14
ACM
491static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
492static DEFINE_STRARRAY(sighow);
493
4f8c1b74
DA
494static const char *clockid[] = {
495 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
496 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
497 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
498};
499static DEFINE_STRARRAY(clockid);
500
e10bce81
ACM
501static const char *socket_families[] = {
502 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
503 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
504 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
505 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
506 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
507 "ALG", "NFC", "VSOCK",
508};
509static DEFINE_STRARRAY(socket_families);
510
51108999
ACM
511static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
512 struct syscall_arg *arg)
513{
514 size_t printed = 0;
515 int mode = arg->val;
516
517 if (mode == F_OK) /* 0 */
518 return scnprintf(bf, size, "F");
519#define P_MODE(n) \
520 if (mode & n##_OK) { \
521 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
522 mode &= ~n##_OK; \
523 }
524
525 P_MODE(R);
526 P_MODE(W);
527 P_MODE(X);
528#undef P_MODE
529
530 if (mode)
531 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
532
533 return printed;
534}
535
536#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
537
f994592d
ACM
538static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
539 struct syscall_arg *arg);
540
541#define SCA_FILENAME syscall_arg__scnprintf_filename
542
46cce19b
ACM
543static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
544 struct syscall_arg *arg)
545{
546 int printed = 0, flags = arg->val;
547
548#define P_FLAG(n) \
549 if (flags & O_##n) { \
550 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
551 flags &= ~O_##n; \
552 }
553
554 P_FLAG(CLOEXEC);
555 P_FLAG(NONBLOCK);
556#undef P_FLAG
557
558 if (flags)
559 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
560
561 return printed;
562}
563
564#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
565
a355a61e
ACM
566#ifndef GRND_NONBLOCK
567#define GRND_NONBLOCK 0x0001
568#endif
569#ifndef GRND_RANDOM
570#define GRND_RANDOM 0x0002
571#endif
572
39878d49
ACM
573static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
574 struct syscall_arg *arg)
575{
576 int printed = 0, flags = arg->val;
577
578#define P_FLAG(n) \
579 if (flags & GRND_##n) { \
580 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
581 flags &= ~GRND_##n; \
582 }
583
584 P_FLAG(RANDOM);
585 P_FLAG(NONBLOCK);
586#undef P_FLAG
587
588 if (flags)
589 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
590
591 return printed;
592}
593
594#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
595
82d4a110
ACM
596#define STRARRAY(name, array) \
597 { .scnprintf = SCA_STRARRAY, \
598 .parm = &strarray__##array, }
453350dd 599
092bd3cd 600#include "trace/beauty/arch_errno_names.c"
ea8dc3ce 601#include "trace/beauty/eventfd.c"
d5d71e86 602#include "trace/beauty/futex_op.c"
3258abe0 603#include "trace/beauty/futex_val3.c"
df4cb167 604#include "trace/beauty/mmap.c"
ba2f22cf 605#include "trace/beauty/mode_t.c"
a30e6259 606#include "trace/beauty/msg_flags.c"
8f48df69 607#include "trace/beauty/open_flags.c"
62de344e 608#include "trace/beauty/perf_event_open.c"
d5d71e86 609#include "trace/beauty/pid.c"
a3bca91f 610#include "trace/beauty/sched_policy.c"
f5cd95ea 611#include "trace/beauty/seccomp.c"
12199d8e 612#include "trace/beauty/signum.c"
bbf86c43 613#include "trace/beauty/socket_type.c"
7206b900 614#include "trace/beauty/waitid_options.c"
a3bca91f 615
82d4a110
ACM
616struct syscall_arg_fmt {
617 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
618 void *parm;
c51bdfec 619 const char *name;
d47737d5 620 bool show_zero;
82d4a110
ACM
621};
622
514f1c67
ACM
623static struct syscall_fmt {
624 const char *name;
aec1930b 625 const char *alias;
82d4a110 626 struct syscall_arg_fmt arg[6];
332337da 627 u8 nr_args;
11c8e39f 628 bool errpid;
514f1c67 629 bool timeout;
04b34729 630 bool hexret;
514f1c67 631} syscall_fmts[] = {
1f63139c 632 { .name = "access",
82d4a110 633 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
1f63139c 634 { .name = "bpf",
82d4a110 635 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 636 { .name = "brk", .hexret = true,
82d4a110 637 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 638 { .name = "clock_gettime",
82d4a110 639 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
33396a3a
ACM
640 { .name = "clone", .errpid = true, .nr_args = 5,
641 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
642 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
643 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
644 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
645 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
1f63139c 646 { .name = "close",
82d4a110 647 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1f63139c 648 { .name = "epoll_ctl",
82d4a110 649 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 650 { .name = "eventfd2",
82d4a110 651 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 652 { .name = "fchmodat",
82d4a110 653 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 654 { .name = "fchownat",
82d4a110 655 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 656 { .name = "fcntl",
82d4a110 657 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
658 .parm = &strarrays__fcntl_cmds_arrays,
659 .show_zero = true, },
82d4a110 660 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 661 { .name = "flock",
82d4a110 662 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
663 { .name = "fstat", .alias = "newfstat", },
664 { .name = "fstatat", .alias = "newfstatat", },
665 { .name = "futex",
3258abe0
ACM
666 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
667 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
1f63139c 668 { .name = "futimesat",
82d4a110 669 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 670 { .name = "getitimer",
82d4a110 671 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 672 { .name = "getpid", .errpid = true, },
d1d438a3 673 { .name = "getpgid", .errpid = true, },
c65f1070 674 { .name = "getppid", .errpid = true, },
1f63139c 675 { .name = "getrandom",
82d4a110 676 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 677 { .name = "getrlimit",
82d4a110 678 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
2d1073de 679 { .name = "gettid", .errpid = true, },
1f63139c 680 { .name = "ioctl",
82d4a110 681 .arg = {
844ae5b4
ACM
682#if defined(__i386__) || defined(__x86_64__)
683/*
684 * FIXME: Make this available to all arches.
685 */
1cc47f2d 686 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
82d4a110 687 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 688#else
82d4a110 689 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 690#endif
1de3038d
ACM
691 { .name = "kcmp", .nr_args = 5,
692 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
693 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
694 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
695 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
696 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1f63139c 697 { .name = "keyctl",
82d4a110 698 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 699 { .name = "kill",
82d4a110 700 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 701 { .name = "linkat",
82d4a110 702 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 703 { .name = "lseek",
82d4a110 704 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
705 { .name = "lstat", .alias = "newlstat", },
706 { .name = "madvise",
82d4a110
ACM
707 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
708 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 709 { .name = "mkdirat",
82d4a110 710 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 711 { .name = "mknodat",
82d4a110 712 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 713 { .name = "mlock",
82d4a110 714 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 715 { .name = "mlockall",
82d4a110 716 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 717 { .name = "mmap", .hexret = true,
54265664
JO
718/* The standard mmap maps to old_mmap on s390x */
719#if defined(__s390x__)
720 .alias = "old_mmap",
721#endif
82d4a110
ACM
722 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
723 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
724 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 725 { .name = "mprotect",
82d4a110
ACM
726 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
727 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 728 { .name = "mq_unlink",
82d4a110 729 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 730 { .name = "mremap", .hexret = true,
82d4a110
ACM
731 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
732 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
733 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 734 { .name = "munlock",
82d4a110 735 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 736 { .name = "munmap",
82d4a110 737 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 738 { .name = "name_to_handle_at",
82d4a110 739 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 740 { .name = "newfstatat",
82d4a110 741 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 742 { .name = "open",
82d4a110 743 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 744 { .name = "open_by_handle_at",
82d4a110
ACM
745 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
746 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 747 { .name = "openat",
82d4a110
ACM
748 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
749 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 750 { .name = "perf_event_open",
82d4a110
ACM
751 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
752 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
753 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 754 { .name = "pipe2",
82d4a110 755 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
83bc9c37
ACM
756 { .name = "pkey_alloc",
757 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
758 { .name = "pkey_free",
759 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
760 { .name = "pkey_mprotect",
761 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
762 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
763 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
1f63139c
ACM
764 { .name = "poll", .timeout = true, },
765 { .name = "ppoll", .timeout = true, },
d688d037
ACM
766 { .name = "prctl", .alias = "arch_prctl",
767 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
768 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
769 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1f63139c
ACM
770 { .name = "pread", .alias = "pread64", },
771 { .name = "preadv", .alias = "pread", },
772 { .name = "prlimit64",
82d4a110 773 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
774 { .name = "pwrite", .alias = "pwrite64", },
775 { .name = "readlinkat",
82d4a110 776 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 777 { .name = "recvfrom",
82d4a110 778 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 779 { .name = "recvmmsg",
82d4a110 780 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 781 { .name = "recvmsg",
82d4a110 782 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 783 { .name = "renameat",
82d4a110 784 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 785 { .name = "rt_sigaction",
82d4a110 786 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 787 { .name = "rt_sigprocmask",
82d4a110 788 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 789 { .name = "rt_sigqueueinfo",
82d4a110 790 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 791 { .name = "rt_tgsigqueueinfo",
82d4a110 792 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 793 { .name = "sched_setscheduler",
82d4a110 794 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 795 { .name = "seccomp",
82d4a110
ACM
796 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
797 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
798 { .name = "select", .timeout = true, },
799 { .name = "sendmmsg",
82d4a110 800 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 801 { .name = "sendmsg",
82d4a110 802 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 803 { .name = "sendto",
82d4a110 804 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 805 { .name = "set_tid_address", .errpid = true, },
1f63139c 806 { .name = "setitimer",
82d4a110 807 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 808 { .name = "setrlimit",
82d4a110 809 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 810 { .name = "socket",
82d4a110 811 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
812 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
813 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c 814 { .name = "socketpair",
82d4a110 815 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
816 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
817 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c
ACM
818 { .name = "stat", .alias = "newstat", },
819 { .name = "statx",
82d4a110
ACM
820 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
821 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
822 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 823 { .name = "swapoff",
82d4a110 824 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 825 { .name = "swapon",
82d4a110 826 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 827 { .name = "symlinkat",
82d4a110 828 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 829 { .name = "tgkill",
82d4a110 830 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 831 { .name = "tkill",
82d4a110 832 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c
ACM
833 { .name = "uname", .alias = "newuname", },
834 { .name = "unlinkat",
82d4a110 835 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 836 { .name = "utimensat",
82d4a110 837 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 838 { .name = "wait4", .errpid = true,
82d4a110 839 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 840 { .name = "waitid", .errpid = true,
82d4a110 841 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
842};
843
844static int syscall_fmt__cmp(const void *name, const void *fmtp)
845{
846 const struct syscall_fmt *fmt = fmtp;
847 return strcmp(name, fmt->name);
848}
849
850static struct syscall_fmt *syscall_fmt__find(const char *name)
851{
852 const int nmemb = ARRAY_SIZE(syscall_fmts);
853 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
854}
855
6a648b53
ACM
856/*
857 * is_exit: is this "exit" or "exit_group"?
858 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
859 */
514f1c67
ACM
860struct syscall {
861 struct event_format *tp_format;
f208bd8d 862 int nr_args;
6a648b53
ACM
863 bool is_exit;
864 bool is_open;
f208bd8d 865 struct format_field *args;
514f1c67
ACM
866 const char *name;
867 struct syscall_fmt *fmt;
82d4a110 868 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
869};
870
fd2b2975
ACM
871/*
872 * We need to have this 'calculated' boolean because in some cases we really
873 * don't know what is the duration of a syscall, for instance, when we start
874 * a session and some threads are waiting for a syscall to finish, say 'poll',
875 * in which case all we can do is to print "( ? ) for duration and for the
876 * start timestamp.
877 */
878static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
879{
880 double duration = (double)t / NSEC_PER_MSEC;
881 size_t printed = fprintf(fp, "(");
882
fd2b2975 883 if (!calculated)
522283fe 884 printed += fprintf(fp, " ");
fd2b2975 885 else if (duration >= 1.0)
60c907ab
ACM
886 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
887 else if (duration >= 0.01)
888 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
889 else
890 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 891 return printed + fprintf(fp, "): ");
60c907ab
ACM
892}
893
f994592d
ACM
894/**
895 * filename.ptr: The filename char pointer that will be vfs_getname'd
896 * filename.entry_str_pos: Where to insert the string translated from
897 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
898 * ret_scnprintf: syscall args may set this to a different syscall return
899 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 900 */
752fde44
ACM
901struct thread_trace {
902 u64 entry_time;
752fde44 903 bool entry_pending;
efd5745e 904 unsigned long nr_events;
a2ea67d7 905 unsigned long pfmaj, pfmin;
752fde44 906 char *entry_str;
1302d88e 907 double runtime_ms;
7ee57434 908 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
909 struct {
910 unsigned long ptr;
7f4f8001
ACM
911 short int entry_str_pos;
912 bool pending_open;
913 unsigned int namelen;
914 char *name;
f994592d 915 } filename;
75b757ca
ACM
916 struct {
917 int max;
918 char **table;
919 } paths;
bf2575c1
DA
920
921 struct intlist *syscall_stats;
752fde44
ACM
922};
923
924static struct thread_trace *thread_trace__new(void)
925{
75b757ca
ACM
926 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
927
928 if (ttrace)
929 ttrace->paths.max = -1;
930
bf2575c1
DA
931 ttrace->syscall_stats = intlist__new(NULL);
932
75b757ca 933 return ttrace;
752fde44
ACM
934}
935
c24ff998 936static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 937{
efd5745e
ACM
938 struct thread_trace *ttrace;
939
752fde44
ACM
940 if (thread == NULL)
941 goto fail;
942
89dceb22
NK
943 if (thread__priv(thread) == NULL)
944 thread__set_priv(thread, thread_trace__new());
48000a1a 945
89dceb22 946 if (thread__priv(thread) == NULL)
752fde44
ACM
947 goto fail;
948
89dceb22 949 ttrace = thread__priv(thread);
efd5745e
ACM
950 ++ttrace->nr_events;
951
952 return ttrace;
752fde44 953fail:
c24ff998 954 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
955 "WARNING: not enough memory, dropping samples!\n");
956 return NULL;
957}
958
84486caa
ACM
959
960void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 961 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
962{
963 struct thread_trace *ttrace = thread__priv(arg->thread);
964
965 ttrace->ret_scnprintf = ret_scnprintf;
966}
967
598d02c5
SF
968#define TRACE_PFMAJ (1 << 0)
969#define TRACE_PFMIN (1 << 1)
970
e4d44e83
ACM
971static const size_t trace__entry_str_size = 2048;
972
97119f37 973static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 974{
89dceb22 975 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
976
977 if (fd > ttrace->paths.max) {
978 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
979
980 if (npath == NULL)
981 return -1;
982
983 if (ttrace->paths.max != -1) {
984 memset(npath + ttrace->paths.max + 1, 0,
985 (fd - ttrace->paths.max) * sizeof(char *));
986 } else {
987 memset(npath, 0, (fd + 1) * sizeof(char *));
988 }
989
990 ttrace->paths.table = npath;
991 ttrace->paths.max = fd;
992 }
993
994 ttrace->paths.table[fd] = strdup(pathname);
995
996 return ttrace->paths.table[fd] != NULL ? 0 : -1;
997}
998
97119f37
ACM
999static int thread__read_fd_path(struct thread *thread, int fd)
1000{
1001 char linkname[PATH_MAX], pathname[PATH_MAX];
1002 struct stat st;
1003 int ret;
1004
1005 if (thread->pid_ == thread->tid) {
1006 scnprintf(linkname, sizeof(linkname),
1007 "/proc/%d/fd/%d", thread->pid_, fd);
1008 } else {
1009 scnprintf(linkname, sizeof(linkname),
1010 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1011 }
1012
1013 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1014 return -1;
1015
1016 ret = readlink(linkname, pathname, sizeof(pathname));
1017
1018 if (ret < 0 || ret > st.st_size)
1019 return -1;
1020
1021 pathname[ret] = '\0';
1022 return trace__set_fd_pathname(thread, fd, pathname);
1023}
1024
c522739d
ACM
1025static const char *thread__fd_path(struct thread *thread, int fd,
1026 struct trace *trace)
75b757ca 1027{
89dceb22 1028 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
1029
1030 if (ttrace == NULL)
1031 return NULL;
1032
1033 if (fd < 0)
1034 return NULL;
1035
cdcd1e6b 1036 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
1037 if (!trace->live)
1038 return NULL;
1039 ++trace->stats.proc_getname;
cdcd1e6b 1040 if (thread__read_fd_path(thread, fd))
c522739d
ACM
1041 return NULL;
1042 }
75b757ca
ACM
1043
1044 return ttrace->paths.table[fd];
1045}
1046
fc65eb82 1047size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1048{
1049 int fd = arg->val;
1050 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1051 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1052
1053 if (path)
1054 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1055
1056 return printed;
1057}
1058
0a2f7540
ACM
1059size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1060{
1061 size_t printed = scnprintf(bf, size, "%d", fd);
1062 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1063
1064 if (thread) {
1065 const char *path = thread__fd_path(thread, fd, trace);
1066
1067 if (path)
1068 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1069
1070 thread__put(thread);
1071 }
1072
1073 return printed;
1074}
1075
75b757ca
ACM
1076static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1077 struct syscall_arg *arg)
1078{
1079 int fd = arg->val;
1080 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1081 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1082
04662523
ACM
1083 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1084 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1085
1086 return printed;
1087}
1088
f994592d
ACM
1089static void thread__set_filename_pos(struct thread *thread, const char *bf,
1090 unsigned long ptr)
1091{
1092 struct thread_trace *ttrace = thread__priv(thread);
1093
1094 ttrace->filename.ptr = ptr;
1095 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1096}
1097
1098static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1099 struct syscall_arg *arg)
1100{
1101 unsigned long ptr = arg->val;
1102
1103 if (!arg->trace->vfs_getname)
1104 return scnprintf(bf, size, "%#x", ptr);
1105
1106 thread__set_filename_pos(arg->thread, bf, ptr);
1107 return 0;
1108}
1109
ae9ed035
ACM
1110static bool trace__filter_duration(struct trace *trace, double t)
1111{
1112 return t < (trace->duration_filter * NSEC_PER_MSEC);
1113}
1114
fd2b2975 1115static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1116{
1117 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1118
60c907ab 1119 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1120}
1121
fd2b2975
ACM
1122/*
1123 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1124 * using ttrace->entry_time for a thread that receives a sys_exit without
1125 * first having received a sys_enter ("poll" issued before tracing session
1126 * starts, lost sys_enter exit due to ring buffer overflow).
1127 */
1128static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1129{
1130 if (tstamp > 0)
1131 return __trace__fprintf_tstamp(trace, tstamp, fp);
1132
1133 return fprintf(fp, " ? ");
1134}
1135
f15eb531 1136static bool done = false;
ba209f85 1137static bool interrupted = false;
f15eb531 1138
ba209f85 1139static void sig_handler(int sig)
f15eb531
NK
1140{
1141 done = true;
ba209f85 1142 interrupted = sig == SIGINT;
f15eb531
NK
1143}
1144
752fde44 1145static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1146 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1147{
1148 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1149 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1150
50c95cbd
ACM
1151 if (trace->multiple_threads) {
1152 if (trace->show_comm)
1902efe7 1153 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1154 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1155 }
752fde44
ACM
1156
1157 return printed;
1158}
1159
c24ff998 1160static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1161 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1162{
1163 int ret = 0;
1164
1165 switch (event->header.type) {
1166 case PERF_RECORD_LOST:
c24ff998 1167 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1168 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1169 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1170 break;
752fde44 1171 default:
162f0bef 1172 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1173 break;
1174 }
1175
1176 return ret;
1177}
1178
c24ff998 1179static int trace__tool_process(struct perf_tool *tool,
752fde44 1180 union perf_event *event,
162f0bef 1181 struct perf_sample *sample,
752fde44
ACM
1182 struct machine *machine)
1183{
c24ff998 1184 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1185 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1186}
1187
caf8a0d0
ACM
1188static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1189{
1190 struct machine *machine = vmachine;
1191
1192 if (machine->kptr_restrict_warned)
1193 return NULL;
1194
1195 if (symbol_conf.kptr_restrict) {
1196 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1197 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1198 "Kernel samples will not be resolved.\n");
1199 machine->kptr_restrict_warned = true;
1200 return NULL;
1201 }
1202
1203 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1204}
1205
752fde44
ACM
1206static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1207{
0a7e6d1b 1208 int err = symbol__init(NULL);
752fde44
ACM
1209
1210 if (err)
1211 return err;
1212
8fb598e5
DA
1213 trace->host = machine__new_host();
1214 if (trace->host == NULL)
1215 return -ENOMEM;
752fde44 1216
cbd5c178
AV
1217 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1218 if (err < 0)
1219 goto out;
706c3da4 1220
a33fbd56 1221 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76 1222 evlist->threads, trace__tool_process, false,
340b47f5 1223 trace->opts.proc_map_timeout, 1);
cbd5c178 1224out:
752fde44
ACM
1225 if (err)
1226 symbol__exit();
1227
1228 return err;
1229}
1230
33974a41
AV
1231static void trace__symbols__exit(struct trace *trace)
1232{
1233 machine__exit(trace->host);
1234 trace->host = NULL;
1235
1236 symbol__exit();
1237}
1238
5e58fcfa 1239static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
13d4ff3e 1240{
5e58fcfa 1241 int idx;
13d4ff3e 1242
332337da
ACM
1243 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1244 nr_args = sc->fmt->nr_args;
1245
5e58fcfa 1246 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
82d4a110 1247 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1248 return -1;
1249
5e58fcfa
ACM
1250 for (idx = 0; idx < nr_args; ++idx) {
1251 if (sc->fmt)
82d4a110 1252 sc->arg_fmt[idx] = sc->fmt->arg[idx];
5e58fcfa 1253 }
82d4a110 1254
5e58fcfa
ACM
1255 sc->nr_args = nr_args;
1256 return 0;
1257}
1258
1259static int syscall__set_arg_fmts(struct syscall *sc)
1260{
1261 struct format_field *field;
1262 int idx = 0, len;
1263
1264 for (field = sc->args; field; field = field->next, ++idx) {
1265 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1266 continue;
1f115cb7 1267
82d4a110 1268 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1269 (strcmp(field->name, "filename") == 0 ||
1270 strcmp(field->name, "path") == 0 ||
1271 strcmp(field->name, "pathname") == 0))
82d4a110 1272 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1273 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1274 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1275 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1276 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1277 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1278 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1279 else if ((strcmp(field->type, "int") == 0 ||
1280 strcmp(field->type, "unsigned int") == 0 ||
1281 strcmp(field->type, "long") == 0) &&
1282 (len = strlen(field->name)) >= 2 &&
1283 strcmp(field->name + len - 2, "fd") == 0) {
1284 /*
1285 * /sys/kernel/tracing/events/syscalls/sys_enter*
1286 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1287 * 65 int
1288 * 23 unsigned int
1289 * 7 unsigned long
1290 */
82d4a110 1291 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1292 }
13d4ff3e
ACM
1293 }
1294
1295 return 0;
1296}
1297
514f1c67
ACM
1298static int trace__read_syscall_info(struct trace *trace, int id)
1299{
1300 char tp_name[128];
1301 struct syscall *sc;
fd0db102 1302 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1303
1304 if (name == NULL)
1305 return -1;
514f1c67
ACM
1306
1307 if (id > trace->syscalls.max) {
1308 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1309
1310 if (nsyscalls == NULL)
1311 return -1;
1312
1313 if (trace->syscalls.max != -1) {
1314 memset(nsyscalls + trace->syscalls.max + 1, 0,
1315 (id - trace->syscalls.max) * sizeof(*sc));
1316 } else {
1317 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1318 }
1319
1320 trace->syscalls.table = nsyscalls;
1321 trace->syscalls.max = id;
1322 }
1323
1324 sc = trace->syscalls.table + id;
3a531260 1325 sc->name = name;
2ae3a312 1326
3a531260 1327 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1328
aec1930b 1329 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1330 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1331
8dd2a131 1332 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1333 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1334 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1335 }
514f1c67 1336
5e58fcfa
ACM
1337 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1338 return -1;
1339
8dd2a131 1340 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1341 return -1;
1342
f208bd8d 1343 sc->args = sc->tp_format->format.fields;
c42de706
TS
1344 /*
1345 * We need to check and discard the first variable '__syscall_nr'
1346 * or 'nr' that mean the syscall number. It is needless here.
1347 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1348 */
1349 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1350 sc->args = sc->args->next;
1351 --sc->nr_args;
1352 }
1353
5089f20e 1354 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
6a648b53 1355 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
5089f20e 1356
13d4ff3e 1357 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1358}
1359
d0cc439b
ACM
1360static int trace__validate_ev_qualifier(struct trace *trace)
1361{
8b3ce757 1362 int err = 0, i;
27702bcf 1363 size_t nr_allocated;
d0cc439b
ACM
1364 struct str_node *pos;
1365
8b3ce757
ACM
1366 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1367 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1368 sizeof(trace->ev_qualifier_ids.entries[0]));
1369
1370 if (trace->ev_qualifier_ids.entries == NULL) {
1371 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1372 trace->output);
1373 err = -EINVAL;
1374 goto out;
1375 }
1376
27702bcf 1377 nr_allocated = trace->ev_qualifier_ids.nr;
8b3ce757
ACM
1378 i = 0;
1379
602a1f4d 1380 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1381 const char *sc = pos->s;
27702bcf 1382 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
d0cc439b 1383
8b3ce757 1384 if (id < 0) {
27702bcf
ACM
1385 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1386 if (id >= 0)
1387 goto matches;
1388
d0cc439b
ACM
1389 if (err == 0) {
1390 fputs("Error:\tInvalid syscall ", trace->output);
1391 err = -EINVAL;
1392 } else {
1393 fputs(", ", trace->output);
1394 }
1395
1396 fputs(sc, trace->output);
1397 }
27702bcf 1398matches:
8b3ce757 1399 trace->ev_qualifier_ids.entries[i++] = id;
27702bcf
ACM
1400 if (match_next == -1)
1401 continue;
1402
1403 while (1) {
1404 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1405 if (id < 0)
1406 break;
1407 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1408 void *entries;
1409
1410 nr_allocated += 8;
1411 entries = realloc(trace->ev_qualifier_ids.entries,
1412 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1413 if (entries == NULL) {
1414 err = -ENOMEM;
1415 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1416 goto out_free;
1417 }
1418 trace->ev_qualifier_ids.entries = entries;
1419 }
1420 trace->ev_qualifier_ids.nr++;
1421 trace->ev_qualifier_ids.entries[i++] = id;
1422 }
d0cc439b
ACM
1423 }
1424
1425 if (err < 0) {
1426 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1427 "\nHint:\tand: 'man syscalls'\n", trace->output);
27702bcf 1428out_free:
8b3ce757
ACM
1429 zfree(&trace->ev_qualifier_ids.entries);
1430 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1431 }
8b3ce757 1432out:
d0cc439b
ACM
1433 return err;
1434}
1435
55d43bca
DA
1436/*
1437 * args is to be interpreted as a series of longs but we need to handle
1438 * 8-byte unaligned accesses. args points to raw_data within the event
1439 * and raw_data is guaranteed to be 8-byte unaligned because it is
1440 * preceded by raw_size which is a u32. So we need to copy args to a temp
1441 * variable to read it. Most notably this avoids extended load instructions
1442 * on unaligned addresses
1443 */
325f5091 1444unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
f9f83b33
ACM
1445{
1446 unsigned long val;
325f5091 1447 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
f9f83b33
ACM
1448
1449 memcpy(&val, p, sizeof(val));
1450 return val;
1451}
1452
c51bdfec
ACM
1453static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1454 struct syscall_arg *arg)
1455{
1456 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1457 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1458
1459 return scnprintf(bf, size, "arg%d: ", arg->idx);
1460}
1461
d032d79e
ACM
1462static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1463 struct syscall_arg *arg, unsigned long val)
1464{
1465 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1466 arg->val = val;
1467 if (sc->arg_fmt[arg->idx].parm)
1468 arg->parm = sc->arg_fmt[arg->idx].parm;
1469 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1470 }
1471 return scnprintf(bf, size, "%ld", val);
1472}
1473
752fde44 1474static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1475 unsigned char *args, struct trace *trace,
75b757ca 1476 struct thread *thread)
514f1c67 1477{
514f1c67 1478 size_t printed = 0;
55d43bca 1479 unsigned long val;
d032d79e
ACM
1480 u8 bit = 1;
1481 struct syscall_arg arg = {
1482 .args = args,
1483 .idx = 0,
1484 .mask = 0,
1485 .trace = trace,
1486 .thread = thread,
1487 };
84486caa
ACM
1488 struct thread_trace *ttrace = thread__priv(thread);
1489
1490 /*
1491 * Things like fcntl will set this in its 'cmd' formatter to pick the
1492 * right formatter for the return value (an fd? file flags?), which is
1493 * not needed for syscalls that always return a given type, say an fd.
1494 */
1495 ttrace->ret_scnprintf = NULL;
514f1c67 1496
f208bd8d 1497 if (sc->args != NULL) {
514f1c67 1498 struct format_field *field;
6e7eeb51 1499
f208bd8d 1500 for (field = sc->args; field;
01533e97
ACM
1501 field = field->next, ++arg.idx, bit <<= 1) {
1502 if (arg.mask & bit)
6e7eeb51 1503 continue;
55d43bca 1504
f9f83b33 1505 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1506
4aa58232
ACM
1507 /*
1508 * Suppress this argument if its value is zero and
1509 * and we don't have a string associated in an
1510 * strarray for it.
1511 */
55d43bca 1512 if (val == 0 &&
82d4a110 1513 !(sc->arg_fmt &&
d47737d5
ACM
1514 (sc->arg_fmt[arg.idx].show_zero ||
1515 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1516 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1517 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1518 continue;
1519
752fde44 1520 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1521 "%s%s: ", printed ? ", " : "", field->name);
d032d79e 1522 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
514f1c67 1523 }
4c4d6e51
ACM
1524 } else if (IS_ERR(sc->tp_format)) {
1525 /*
1526 * If we managed to read the tracepoint /format file, then we
1527 * may end up not having any args, like with gettid(), so only
1528 * print the raw args when we didn't manage to read it.
1529 */
332337da 1530 while (arg.idx < sc->nr_args) {
d032d79e
ACM
1531 if (arg.mask & bit)
1532 goto next_arg;
1533 val = syscall_arg__val(&arg, arg.idx);
c51bdfec
ACM
1534 if (printed)
1535 printed += scnprintf(bf + printed, size - printed, ", ");
1536 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
d032d79e
ACM
1537 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1538next_arg:
1539 ++arg.idx;
1540 bit <<= 1;
514f1c67
ACM
1541 }
1542 }
1543
1544 return printed;
1545}
1546
ba3d7dee 1547typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1548 union perf_event *event,
ba3d7dee
ACM
1549 struct perf_sample *sample);
1550
1551static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1552 struct perf_evsel *evsel, int id)
ba3d7dee 1553{
ba3d7dee
ACM
1554
1555 if (id < 0) {
adaa18bf
ACM
1556
1557 /*
1558 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1559 * before that, leaving at a higher verbosity level till that is
1560 * explained. Reproduced with plain ftrace with:
1561 *
1562 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1563 * grep "NR -1 " /t/trace_pipe
1564 *
1565 * After generating some load on the machine.
1566 */
1567 if (verbose > 1) {
1568 static u64 n;
1569 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1570 id, perf_evsel__name(evsel), ++n);
1571 }
ba3d7dee
ACM
1572 return NULL;
1573 }
1574
1575 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1576 trace__read_syscall_info(trace, id))
1577 goto out_cant_read;
1578
1579 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1580 goto out_cant_read;
1581
1582 return &trace->syscalls.table[id];
1583
1584out_cant_read:
bb963e16 1585 if (verbose > 0) {
7c304ee0
ACM
1586 fprintf(trace->output, "Problems reading syscall %d", id);
1587 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1588 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1589 fputs(" information\n", trace->output);
1590 }
ba3d7dee
ACM
1591 return NULL;
1592}
1593
bf2575c1
DA
1594static void thread__update_stats(struct thread_trace *ttrace,
1595 int id, struct perf_sample *sample)
1596{
1597 struct int_node *inode;
1598 struct stats *stats;
1599 u64 duration = 0;
1600
1601 inode = intlist__findnew(ttrace->syscall_stats, id);
1602 if (inode == NULL)
1603 return;
1604
1605 stats = inode->priv;
1606 if (stats == NULL) {
1607 stats = malloc(sizeof(struct stats));
1608 if (stats == NULL)
1609 return;
1610 init_stats(stats);
1611 inode->priv = stats;
1612 }
1613
1614 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1615 duration = sample->time - ttrace->entry_time;
1616
1617 update_stats(stats, duration);
1618}
1619
522283fe 1620static int trace__printf_interrupted_entry(struct trace *trace)
e596663e
ACM
1621{
1622 struct thread_trace *ttrace;
e596663e
ACM
1623 size_t printed;
1624
0a6545bd 1625 if (trace->failure_only || trace->current == NULL)
e596663e
ACM
1626 return 0;
1627
1628 ttrace = thread__priv(trace->current);
1629
1630 if (!ttrace->entry_pending)
1631 return 0;
1632
522283fe 1633 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
e596663e
ACM
1634 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1635 ttrace->entry_pending = false;
1636
1637 return printed;
1638}
1639
591421e1
ACM
1640static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1641 struct perf_sample *sample, struct thread *thread)
1642{
1643 int printed = 0;
1644
1645 if (trace->print_sample) {
1646 double ts = (double)sample->time / NSEC_PER_MSEC;
1647
1648 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1649 perf_evsel__name(evsel), ts,
1650 thread__comm_str(thread),
1651 sample->pid, sample->tid, sample->cpu);
1652 }
1653
1654 return printed;
1655}
1656
ba3d7dee 1657static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1658 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1659 struct perf_sample *sample)
1660{
752fde44 1661 char *msg;
ba3d7dee 1662 void *args;
752fde44 1663 size_t printed = 0;
2ae3a312 1664 struct thread *thread;
b91fc39f 1665 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1666 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1667 struct thread_trace *ttrace;
1668
1669 if (sc == NULL)
1670 return -1;
ba3d7dee 1671
8fb598e5 1672 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1673 ttrace = thread__trace(thread, trace->output);
2ae3a312 1674 if (ttrace == NULL)
b91fc39f 1675 goto out_put;
ba3d7dee 1676
591421e1
ACM
1677 trace__fprintf_sample(trace, evsel, sample, thread);
1678
77170988 1679 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1680
1681 if (ttrace->entry_str == NULL) {
e4d44e83 1682 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1683 if (!ttrace->entry_str)
b91fc39f 1684 goto out_put;
752fde44
ACM
1685 }
1686
5cf9c84e 1687 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
522283fe 1688 trace__printf_interrupted_entry(trace);
e596663e 1689
752fde44
ACM
1690 ttrace->entry_time = sample->time;
1691 msg = ttrace->entry_str;
e4d44e83 1692 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1693
e4d44e83 1694 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1695 args, trace, thread);
752fde44 1696
5089f20e 1697 if (sc->is_exit) {
0a6545bd 1698 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
fd2b2975 1699 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1700 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1701 }
7f4f8001 1702 } else {
752fde44 1703 ttrace->entry_pending = true;
7f4f8001
ACM
1704 /* See trace__vfs_getname & trace__sys_exit */
1705 ttrace->filename.pending_open = false;
1706 }
ba3d7dee 1707
f3b623b8
ACM
1708 if (trace->current != thread) {
1709 thread__put(trace->current);
1710 trace->current = thread__get(thread);
1711 }
b91fc39f
ACM
1712 err = 0;
1713out_put:
1714 thread__put(thread);
1715 return err;
ba3d7dee
ACM
1716}
1717
a98392bb
ACM
1718static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
1719 struct perf_sample *sample)
1720{
a98392bb
ACM
1721 struct thread_trace *ttrace;
1722 struct thread *thread;
f3acd886
ACM
1723 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1724 struct syscall *sc = trace__syscall_info(trace, evsel, id);
a98392bb 1725 char msg[1024];
a98392bb
ACM
1726 void *args;
1727
a98392bb
ACM
1728 if (sc == NULL)
1729 return -1;
1730
1731 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1732 ttrace = thread__trace(thread, trace->output);
1733 /*
1734 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
1735 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
1736 */
1737 if (ttrace == NULL)
1738 goto out_put;
1739
f3acd886 1740 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
a98392bb
ACM
1741 syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread);
1742 fprintf(trace->output, "%s", msg);
1743 err = 0;
1744out_put:
1745 thread__put(thread);
1746 return err;
1747}
1748
5cf9c84e
ACM
1749static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1750 struct perf_sample *sample,
1751 struct callchain_cursor *cursor)
202ff968
ACM
1752{
1753 struct addr_location al;
3a9e9a47
RB
1754 int max_stack = evsel->attr.sample_max_stack ?
1755 evsel->attr.sample_max_stack :
1756 trace->max_stack;
5cf9c84e
ACM
1757
1758 if (machine__resolve(trace->host, &al, sample) < 0 ||
3a9e9a47 1759 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
5cf9c84e
ACM
1760 return -1;
1761
1762 return 0;
1763}
1764
1765static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1766{
202ff968 1767 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1768 const unsigned int print_opts = EVSEL__PRINT_SYM |
1769 EVSEL__PRINT_DSO |
1770 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1771
d327e60c 1772 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1773}
1774
092bd3cd
HB
1775static const char *errno_to_name(struct perf_evsel *evsel, int err)
1776{
1777 struct perf_env *env = perf_evsel__env(evsel);
1778 const char *arch_name = perf_env__arch(env);
1779
1780 return arch_syscalls__strerrno(arch_name, err);
1781}
1782
ba3d7dee 1783static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1784 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1785 struct perf_sample *sample)
1786{
2c82c3ad 1787 long ret;
60c907ab 1788 u64 duration = 0;
fd2b2975 1789 bool duration_calculated = false;
2ae3a312 1790 struct thread *thread;
5cf9c84e 1791 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1792 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1793 struct thread_trace *ttrace;
1794
1795 if (sc == NULL)
1796 return -1;
ba3d7dee 1797
8fb598e5 1798 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1799 ttrace = thread__trace(thread, trace->output);
2ae3a312 1800 if (ttrace == NULL)
b91fc39f 1801 goto out_put;
ba3d7dee 1802
591421e1
ACM
1803 trace__fprintf_sample(trace, evsel, sample, thread);
1804
bf2575c1
DA
1805 if (trace->summary)
1806 thread__update_stats(ttrace, id, sample);
1807
77170988 1808 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1809
6a648b53 1810 if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1811 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1812 ttrace->filename.pending_open = false;
c522739d
ACM
1813 ++trace->stats.vfs_getname;
1814 }
1815
ae9ed035 1816 if (ttrace->entry_time) {
60c907ab 1817 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1818 if (trace__filter_duration(trace, duration))
1819 goto out;
fd2b2975 1820 duration_calculated = true;
ae9ed035
ACM
1821 } else if (trace->duration_filter)
1822 goto out;
60c907ab 1823
5cf9c84e
ACM
1824 if (sample->callchain) {
1825 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1826 if (callchain_ret == 0) {
1827 if (callchain_cursor.nr < trace->min_stack)
1828 goto out;
1829 callchain_ret = 1;
1830 }
1831 }
1832
0a6545bd 1833 if (trace->summary_only || (ret >= 0 && trace->failure_only))
fd2eabaf
DA
1834 goto out;
1835
fd2b2975 1836 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1837
1838 if (ttrace->entry_pending) {
c24ff998 1839 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1840 } else {
c24ff998
ACM
1841 fprintf(trace->output, " ... [");
1842 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1843 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1844 }
1845
da3c9a44 1846 if (sc->fmt == NULL) {
1f63139c
ACM
1847 if (ret < 0)
1848 goto errno_print;
da3c9a44 1849signed_print:
6f8fe61e 1850 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1851 } else if (ret < 0) {
1852errno_print: {
942a91ed 1853 char bf[STRERR_BUFSIZE];
c8b5f2c9 1854 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
092bd3cd 1855 *e = errno_to_name(evsel, -ret);
ba3d7dee 1856
c24ff998 1857 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1858 }
da3c9a44 1859 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1860 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1861 else if (ttrace->ret_scnprintf) {
1862 char bf[1024];
7ee57434
ACM
1863 struct syscall_arg arg = {
1864 .val = ret,
1865 .thread = thread,
1866 .trace = trace,
1867 };
1868 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1869 ttrace->ret_scnprintf = NULL;
1870 fprintf(trace->output, ") = %s", bf);
1871 } else if (sc->fmt->hexret)
2c82c3ad 1872 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1873 else if (sc->fmt->errpid) {
1874 struct thread *child = machine__find_thread(trace->host, ret, ret);
1875
1876 if (child != NULL) {
1877 fprintf(trace->output, ") = %ld", ret);
1878 if (child->comm_set)
1879 fprintf(trace->output, " (%s)", thread__comm_str(child));
1880 thread__put(child);
1881 }
1882 } else
da3c9a44 1883 goto signed_print;
ba3d7dee 1884
c24ff998 1885 fputc('\n', trace->output);
566a0885 1886
5cf9c84e
ACM
1887 if (callchain_ret > 0)
1888 trace__fprintf_callchain(trace, sample);
1889 else if (callchain_ret < 0)
1890 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1891out:
752fde44 1892 ttrace->entry_pending = false;
b91fc39f
ACM
1893 err = 0;
1894out_put:
1895 thread__put(thread);
1896 return err;
ba3d7dee
ACM
1897}
1898
c522739d 1899static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1900 union perf_event *event __maybe_unused,
c522739d
ACM
1901 struct perf_sample *sample)
1902{
f994592d
ACM
1903 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1904 struct thread_trace *ttrace;
1905 size_t filename_len, entry_str_len, to_move;
1906 ssize_t remaining_space;
1907 char *pos;
7f4f8001 1908 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1909
1910 if (!thread)
1911 goto out;
1912
1913 ttrace = thread__priv(thread);
1914 if (!ttrace)
ef65e96e 1915 goto out_put;
f994592d 1916
7f4f8001 1917 filename_len = strlen(filename);
39f0e7a8 1918 if (filename_len == 0)
ef65e96e 1919 goto out_put;
7f4f8001
ACM
1920
1921 if (ttrace->filename.namelen < filename_len) {
1922 char *f = realloc(ttrace->filename.name, filename_len + 1);
1923
1924 if (f == NULL)
ef65e96e 1925 goto out_put;
7f4f8001
ACM
1926
1927 ttrace->filename.namelen = filename_len;
1928 ttrace->filename.name = f;
1929 }
1930
1931 strcpy(ttrace->filename.name, filename);
1932 ttrace->filename.pending_open = true;
1933
f994592d 1934 if (!ttrace->filename.ptr)
ef65e96e 1935 goto out_put;
f994592d
ACM
1936
1937 entry_str_len = strlen(ttrace->entry_str);
1938 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1939 if (remaining_space <= 0)
ef65e96e 1940 goto out_put;
f994592d 1941
f994592d
ACM
1942 if (filename_len > (size_t)remaining_space) {
1943 filename += filename_len - remaining_space;
1944 filename_len = remaining_space;
1945 }
1946
1947 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1948 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1949 memmove(pos + filename_len, pos, to_move);
1950 memcpy(pos, filename, filename_len);
1951
1952 ttrace->filename.ptr = 0;
1953 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1954out_put:
1955 thread__put(thread);
f994592d 1956out:
c522739d
ACM
1957 return 0;
1958}
1959
1302d88e 1960static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1961 union perf_event *event __maybe_unused,
1302d88e
ACM
1962 struct perf_sample *sample)
1963{
1964 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1965 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1966 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1967 sample->pid,
1968 sample->tid);
c24ff998 1969 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1970
1971 if (ttrace == NULL)
1972 goto out_dump;
1973
1974 ttrace->runtime_ms += runtime_ms;
1975 trace->runtime_ms += runtime_ms;
ef65e96e 1976out_put:
b91fc39f 1977 thread__put(thread);
1302d88e
ACM
1978 return 0;
1979
1980out_dump:
c24ff998 1981 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1982 evsel->name,
1983 perf_evsel__strval(evsel, sample, "comm"),
1984 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1985 runtime,
1986 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1987 goto out_put;
1302d88e
ACM
1988}
1989
923d0c9a
ACM
1990static int bpf_output__printer(enum binary_printer_ops op,
1991 unsigned int val, void *extra __maybe_unused, FILE *fp)
1d6c9407 1992{
1d6c9407
WN
1993 unsigned char ch = (unsigned char)val;
1994
1995 switch (op) {
1996 case BINARY_PRINT_CHAR_DATA:
923d0c9a 1997 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1d6c9407
WN
1998 case BINARY_PRINT_DATA_BEGIN:
1999 case BINARY_PRINT_LINE_BEGIN:
2000 case BINARY_PRINT_ADDR:
2001 case BINARY_PRINT_NUM_DATA:
2002 case BINARY_PRINT_NUM_PAD:
2003 case BINARY_PRINT_SEP:
2004 case BINARY_PRINT_CHAR_PAD:
2005 case BINARY_PRINT_LINE_END:
2006 case BINARY_PRINT_DATA_END:
2007 default:
2008 break;
2009 }
923d0c9a
ACM
2010
2011 return 0;
1d6c9407
WN
2012}
2013
2014static void bpf_output__fprintf(struct trace *trace,
2015 struct perf_sample *sample)
2016{
923d0c9a
ACM
2017 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2018 bpf_output__printer, NULL, trace->output);
1d6c9407
WN
2019}
2020
14a052df
ACM
2021static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2022 union perf_event *event __maybe_unused,
2023 struct perf_sample *sample)
2024{
7ad35615
ACM
2025 int callchain_ret = 0;
2026
2027 if (sample->callchain) {
2028 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2029 if (callchain_ret == 0) {
2030 if (callchain_cursor.nr < trace->min_stack)
2031 goto out;
2032 callchain_ret = 1;
2033 }
2034 }
2035
522283fe 2036 trace__printf_interrupted_entry(trace);
14a052df 2037 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
2038
2039 if (trace->trace_syscalls)
2040 fprintf(trace->output, "( ): ");
2041
2042 fprintf(trace->output, "%s:", evsel->name);
14a052df 2043
1d6c9407 2044 if (perf_evsel__is_bpf_output(evsel)) {
88cf7084
ACM
2045 if (evsel == trace->syscalls.events.augmented)
2046 trace__fprintf_sys_enter(trace, evsel, sample);
2047 else
2048 bpf_output__fprintf(trace, sample);
1d6c9407 2049 } else if (evsel->tp_format) {
a98392bb
ACM
2050 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2051 trace__fprintf_sys_enter(trace, evsel, sample)) {
2052 event_format__fprintf(evsel->tp_format, sample->cpu,
2053 sample->raw_data, sample->raw_size,
2054 trace->output);
2055 }
14a052df
ACM
2056 }
2057
51125a29 2058 fprintf(trace->output, "\n");
202ff968 2059
7ad35615
ACM
2060 if (callchain_ret > 0)
2061 trace__fprintf_callchain(trace, sample);
2062 else if (callchain_ret < 0)
2063 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2064out:
14a052df
ACM
2065 return 0;
2066}
2067
598d02c5
SF
2068static void print_location(FILE *f, struct perf_sample *sample,
2069 struct addr_location *al,
2070 bool print_dso, bool print_sym)
2071{
2072
bb963e16 2073 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
2074 fprintf(f, "%s@", al->map->dso->long_name);
2075
bb963e16 2076 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 2077 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
2078 al->addr - al->sym->start);
2079 else if (al->map)
4414a3c5 2080 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 2081 else
4414a3c5 2082 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
2083}
2084
2085static int trace__pgfault(struct trace *trace,
2086 struct perf_evsel *evsel,
473398a2 2087 union perf_event *event __maybe_unused,
598d02c5
SF
2088 struct perf_sample *sample)
2089{
2090 struct thread *thread;
598d02c5
SF
2091 struct addr_location al;
2092 char map_type = 'd';
a2ea67d7 2093 struct thread_trace *ttrace;
b91fc39f 2094 int err = -1;
1df54290 2095 int callchain_ret = 0;
598d02c5
SF
2096
2097 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
2098
2099 if (sample->callchain) {
2100 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2101 if (callchain_ret == 0) {
2102 if (callchain_cursor.nr < trace->min_stack)
2103 goto out_put;
2104 callchain_ret = 1;
2105 }
2106 }
2107
a2ea67d7
SF
2108 ttrace = thread__trace(thread, trace->output);
2109 if (ttrace == NULL)
b91fc39f 2110 goto out_put;
a2ea67d7
SF
2111
2112 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2113 ttrace->pfmaj++;
2114 else
2115 ttrace->pfmin++;
2116
2117 if (trace->summary_only)
b91fc39f 2118 goto out;
598d02c5 2119
4546263d 2120 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
598d02c5 2121
fd2b2975 2122 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
2123
2124 fprintf(trace->output, "%sfault [",
2125 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2126 "maj" : "min");
2127
2128 print_location(trace->output, sample, &al, false, true);
2129
2130 fprintf(trace->output, "] => ");
2131
117d3c24 2132 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2133
2134 if (!al.map) {
4546263d 2135 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2136
2137 if (al.map)
2138 map_type = 'x';
2139 else
2140 map_type = '?';
2141 }
2142
2143 print_location(trace->output, sample, &al, true, false);
2144
2145 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2146
1df54290
ACM
2147 if (callchain_ret > 0)
2148 trace__fprintf_callchain(trace, sample);
2149 else if (callchain_ret < 0)
2150 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2151out:
2152 err = 0;
2153out_put:
2154 thread__put(thread);
2155 return err;
598d02c5
SF
2156}
2157
e6001980 2158static void trace__set_base_time(struct trace *trace,
8a07a809 2159 struct perf_evsel *evsel,
e6001980
ACM
2160 struct perf_sample *sample)
2161{
8a07a809
ACM
2162 /*
2163 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2164 * and don't use sample->time unconditionally, we may end up having
2165 * some other event in the future without PERF_SAMPLE_TIME for good
2166 * reason, i.e. we may not be interested in its timestamps, just in
2167 * it taking place, picking some piece of information when it
2168 * appears in our event stream (vfs_getname comes to mind).
2169 */
2170 if (trace->base_time == 0 && !trace->full_time &&
2171 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2172 trace->base_time = sample->time;
2173}
2174
6810fc91 2175static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2176 union perf_event *event,
6810fc91
DA
2177 struct perf_sample *sample,
2178 struct perf_evsel *evsel,
2179 struct machine *machine __maybe_unused)
2180{
2181 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2182 struct thread *thread;
6810fc91
DA
2183 int err = 0;
2184
744a9719 2185 tracepoint_handler handler = evsel->handler;
6810fc91 2186
aa07df6e
DA
2187 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2188 if (thread && thread__is_filtered(thread))
ef65e96e 2189 goto out;
bdc89661 2190
e6001980 2191 trace__set_base_time(trace, evsel, sample);
6810fc91 2192
3160565f
DA
2193 if (handler) {
2194 ++trace->nr_events;
0c82adcf 2195 handler(trace, evsel, event, sample);
3160565f 2196 }
ef65e96e
ACM
2197out:
2198 thread__put(thread);
6810fc91
DA
2199 return err;
2200}
2201
1e28fe0a 2202static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2203{
2204 unsigned int rec_argc, i, j;
2205 const char **rec_argv;
2206 const char * const record_args[] = {
2207 "record",
2208 "-R",
2209 "-m", "1024",
2210 "-c", "1",
5e2485b1
DA
2211 };
2212
1e28fe0a
SF
2213 const char * const sc_args[] = { "-e", };
2214 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2215 const char * const majpf_args[] = { "-e", "major-faults" };
2216 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2217 const char * const minpf_args[] = { "-e", "minor-faults" };
2218 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2219
9aca7f17 2220 /* +1 is for the event string below */
1e28fe0a
SF
2221 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2222 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2223 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2224
2225 if (rec_argv == NULL)
2226 return -ENOMEM;
2227
1e28fe0a 2228 j = 0;
5e2485b1 2229 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2230 rec_argv[j++] = record_args[i];
2231
e281a960
SF
2232 if (trace->trace_syscalls) {
2233 for (i = 0; i < sc_args_nr; i++)
2234 rec_argv[j++] = sc_args[i];
2235
2236 /* event string may be different for older kernels - e.g., RHEL6 */
2237 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2238 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2239 else if (is_valid_tracepoint("syscalls:sys_enter"))
2240 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2241 else {
2242 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
c896f85a 2243 free(rec_argv);
e281a960
SF
2244 return -1;
2245 }
9aca7f17 2246 }
9aca7f17 2247
1e28fe0a
SF
2248 if (trace->trace_pgfaults & TRACE_PFMAJ)
2249 for (i = 0; i < majpf_args_nr; i++)
2250 rec_argv[j++] = majpf_args[i];
2251
2252 if (trace->trace_pgfaults & TRACE_PFMIN)
2253 for (i = 0; i < minpf_args_nr; i++)
2254 rec_argv[j++] = minpf_args[i];
2255
2256 for (i = 0; i < (unsigned int)argc; i++)
2257 rec_argv[j++] = argv[i];
5e2485b1 2258
b0ad8ea6 2259 return cmd_record(j, rec_argv);
5e2485b1
DA
2260}
2261
bf2575c1
DA
2262static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2263
08c98776 2264static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2265{
ef503831 2266 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2267
2268 if (IS_ERR(evsel))
08c98776 2269 return false;
c522739d
ACM
2270
2271 if (perf_evsel__field(evsel, "pathname") == NULL) {
2272 perf_evsel__delete(evsel);
08c98776 2273 return false;
c522739d
ACM
2274 }
2275
744a9719 2276 evsel->handler = trace__vfs_getname;
c522739d 2277 perf_evlist__add(evlist, evsel);
08c98776 2278 return true;
c522739d
ACM
2279}
2280
0ae537cb 2281static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2282{
2283 struct perf_evsel *evsel;
2284 struct perf_event_attr attr = {
2285 .type = PERF_TYPE_SOFTWARE,
2286 .mmap_data = 1,
598d02c5
SF
2287 };
2288
2289 attr.config = config;
0524798c 2290 attr.sample_period = 1;
598d02c5
SF
2291
2292 event_attr_init(&attr);
2293
2294 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2295 if (evsel)
2296 evsel->handler = trace__pgfault;
598d02c5 2297
0ae537cb 2298 return evsel;
598d02c5
SF
2299}
2300
ddbb1b13
ACM
2301static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2302{
2303 const u32 type = event->header.type;
2304 struct perf_evsel *evsel;
2305
ddbb1b13
ACM
2306 if (type != PERF_RECORD_SAMPLE) {
2307 trace__process_event(trace, trace->host, event, sample);
2308 return;
2309 }
2310
2311 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2312 if (evsel == NULL) {
2313 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2314 return;
2315 }
2316
e6001980
ACM
2317 trace__set_base_time(trace, evsel, sample);
2318
ddbb1b13
ACM
2319 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2320 sample->raw_data == NULL) {
2321 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2322 perf_evsel__name(evsel), sample->tid,
2323 sample->cpu, sample->raw_size);
2324 } else {
2325 tracepoint_handler handler = evsel->handler;
2326 handler(trace, evsel, event, sample);
2327 }
2328}
2329
c27366f0
ACM
2330static int trace__add_syscall_newtp(struct trace *trace)
2331{
2332 int ret = -1;
2333 struct perf_evlist *evlist = trace->evlist;
2334 struct perf_evsel *sys_enter, *sys_exit;
2335
63f11c80 2336 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
c27366f0
ACM
2337 if (sys_enter == NULL)
2338 goto out;
2339
2340 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2341 goto out_delete_sys_enter;
2342
63f11c80 2343 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
c27366f0
ACM
2344 if (sys_exit == NULL)
2345 goto out_delete_sys_enter;
2346
2347 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2348 goto out_delete_sys_exit;
2349
08e26396
ACM
2350 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2351 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2352
c27366f0
ACM
2353 perf_evlist__add(evlist, sys_enter);
2354 perf_evlist__add(evlist, sys_exit);
2355
2ddd5c04 2356 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2357 /*
2358 * We're interested only in the user space callchain
2359 * leading to the syscall, allow overriding that for
2360 * debugging reasons using --kernel_syscall_callchains
2361 */
2362 sys_exit->attr.exclude_callchain_kernel = 1;
2363 }
2364
8b3ce757
ACM
2365 trace->syscalls.events.sys_enter = sys_enter;
2366 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2367
2368 ret = 0;
2369out:
2370 return ret;
2371
2372out_delete_sys_exit:
2373 perf_evsel__delete_priv(sys_exit);
2374out_delete_sys_enter:
2375 perf_evsel__delete_priv(sys_enter);
2376 goto out;
2377}
2378
19867b61
ACM
2379static int trace__set_ev_qualifier_filter(struct trace *trace)
2380{
2381 int err = -1;
b15d0a4c 2382 struct perf_evsel *sys_exit;
19867b61
ACM
2383 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2384 trace->ev_qualifier_ids.nr,
2385 trace->ev_qualifier_ids.entries);
2386
2387 if (filter == NULL)
2388 goto out_enomem;
2389
3541c034
MP
2390 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2391 filter)) {
b15d0a4c 2392 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2393 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2394 }
19867b61
ACM
2395
2396 free(filter);
2397out:
2398 return err;
2399out_enomem:
2400 errno = ENOMEM;
2401 goto out;
2402}
c27366f0 2403
dd1a5037
ACM
2404static int trace__set_filter_loop_pids(struct trace *trace)
2405{
082ab9a1 2406 unsigned int nr = 1;
dd1a5037
ACM
2407 pid_t pids[32] = {
2408 getpid(),
2409 };
082ab9a1
ACM
2410 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2411
2412 while (thread && nr < ARRAY_SIZE(pids)) {
2413 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2414
2415 if (parent == NULL)
2416 break;
2417
2418 if (!strcmp(thread__comm_str(parent), "sshd")) {
2419 pids[nr++] = parent->tid;
2420 break;
2421 }
2422 thread = parent;
2423 }
dd1a5037
ACM
2424
2425 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2426}
2427
f15eb531 2428static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2429{
14a052df 2430 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2431 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2432 int err = -1, i;
2433 unsigned long before;
f15eb531 2434 const bool forks = argc > 0;
46fb3c21 2435 bool draining = false;
514f1c67 2436
75b757ca
ACM
2437 trace->live = true;
2438
c27366f0 2439 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2440 goto out_error_raw_syscalls;
514f1c67 2441
e281a960 2442 if (trace->trace_syscalls)
08c98776 2443 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2444
0ae537cb
ACM
2445 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2446 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2447 if (pgfault_maj == NULL)
2448 goto out_error_mem;
08e26396 2449 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
0ae537cb 2450 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2451 }
598d02c5 2452
0ae537cb
ACM
2453 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2454 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2455 if (pgfault_min == NULL)
2456 goto out_error_mem;
08e26396 2457 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
0ae537cb
ACM
2458 perf_evlist__add(evlist, pgfault_min);
2459 }
598d02c5 2460
1302d88e 2461 if (trace->sched &&
2cc990ba
ACM
2462 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2463 trace__sched_stat_runtime))
2464 goto out_error_sched_stat_runtime;
1302d88e 2465
9ea42ba4
ACM
2466 /*
2467 * If a global cgroup was set, apply it to all the events without an
2468 * explicit cgroup. I.e.:
2469 *
2470 * trace -G A -e sched:*switch
2471 *
2472 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
2473 * _and_ sched:sched_switch to the 'A' cgroup, while:
2474 *
2475 * trace -e sched:*switch -G A
2476 *
2477 * will only set the sched:sched_switch event to the 'A' cgroup, all the
2478 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
2479 * a cgroup (on the root cgroup, sys wide, etc).
2480 *
2481 * Multiple cgroups:
2482 *
2483 * trace -G A -e sched:*switch -G B
2484 *
2485 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
2486 * to the 'B' cgroup.
2487 *
2488 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
2489 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
2490 */
2491 if (trace->cgroup)
2492 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
2493
514f1c67
ACM
2494 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2495 if (err < 0) {
c24ff998 2496 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2497 goto out_delete_evlist;
2498 }
2499
752fde44
ACM
2500 err = trace__symbols_init(trace, evlist);
2501 if (err < 0) {
c24ff998 2502 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2503 goto out_delete_evlist;
752fde44
ACM
2504 }
2505
75d50117 2506 perf_evlist__config(evlist, &trace->opts, &callchain_param);
fde54b78 2507
f15eb531
NK
2508 signal(SIGCHLD, sig_handler);
2509 signal(SIGINT, sig_handler);
2510
2511 if (forks) {
6ef73ec4 2512 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2513 argv, false, NULL);
f15eb531 2514 if (err < 0) {
c24ff998 2515 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2516 goto out_delete_evlist;
f15eb531
NK
2517 }
2518 }
2519
514f1c67 2520 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2521 if (err < 0)
2522 goto out_error_open;
514f1c67 2523
ba504235
WN
2524 err = bpf__apply_obj_config();
2525 if (err) {
2526 char errbuf[BUFSIZ];
2527
2528 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2529 pr_err("ERROR: Apply config to BPF failed: %s\n",
2530 errbuf);
2531 goto out_error_open;
2532 }
2533
241b057c
ACM
2534 /*
2535 * Better not use !target__has_task() here because we need to cover the
2536 * case where no threads were specified in the command line, but a
2537 * workload was, and in that case we will fill in the thread_map when
2538 * we fork the workload in perf_evlist__prepare_workload.
2539 */
f078c385
ACM
2540 if (trace->filter_pids.nr > 0)
2541 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2542 else if (thread_map__pid(evlist->threads, 0) == -1)
dd1a5037 2543 err = trace__set_filter_loop_pids(trace);
f078c385 2544
94ad89bc
ACM
2545 if (err < 0)
2546 goto out_error_mem;
2547
19867b61
ACM
2548 if (trace->ev_qualifier_ids.nr > 0) {
2549 err = trace__set_ev_qualifier_filter(trace);
2550 if (err < 0)
2551 goto out_errno;
19867b61 2552
2e5e5f87
ACM
2553 pr_debug("event qualifier tracepoint filter: %s\n",
2554 trace->syscalls.events.sys_exit->filter);
2555 }
19867b61 2556
94ad89bc
ACM
2557 err = perf_evlist__apply_filters(evlist, &evsel);
2558 if (err < 0)
2559 goto out_error_apply_filters;
241b057c 2560
f74b9d3a 2561 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
e09b18d4
ACM
2562 if (err < 0)
2563 goto out_error_mmap;
514f1c67 2564
e36b7821 2565 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2566 perf_evlist__enable(evlist);
2567
f15eb531
NK
2568 if (forks)
2569 perf_evlist__start_workload(evlist);
2570
e36b7821
AB
2571 if (trace->opts.initial_delay) {
2572 usleep(trace->opts.initial_delay * 1000);
2573 perf_evlist__enable(evlist);
2574 }
2575
e13798c7 2576 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2577 evlist->threads->nr > 1 ||
2578 perf_evlist__first(evlist)->attr.inherit;
bd3dda9a
ACM
2579
2580 /*
2581 * Now that we already used evsel->attr to ask the kernel to setup the
2582 * events, lets reuse evsel->attr.sample_max_stack as the limit in
2583 * trace__resolve_callchain(), allowing per-event max-stack settings
2584 * to override an explicitely set --max-stack global setting.
2585 */
2586 evlist__for_each_entry(evlist, evsel) {
27de9b2b 2587 if (evsel__has_callchain(evsel) &&
bd3dda9a
ACM
2588 evsel->attr.sample_max_stack == 0)
2589 evsel->attr.sample_max_stack = trace->max_stack;
2590 }
514f1c67 2591again:
efd5745e 2592 before = trace->nr_events;
514f1c67
ACM
2593
2594 for (i = 0; i < evlist->nr_mmaps; i++) {
2595 union perf_event *event;
d7f55c62 2596 struct perf_mmap *md;
514f1c67 2597
d7f55c62 2598 md = &evlist->mmap[i];
b9bae2c8 2599 if (perf_mmap__read_init(md) < 0)
d7f55c62
KL
2600 continue;
2601
0019dc87 2602 while ((event = perf_mmap__read_event(md)) != NULL) {
514f1c67 2603 struct perf_sample sample;
514f1c67 2604
efd5745e 2605 ++trace->nr_events;
514f1c67 2606
514f1c67
ACM
2607 err = perf_evlist__parse_sample(evlist, event, &sample);
2608 if (err) {
c24ff998 2609 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2610 goto next_event;
514f1c67
ACM
2611 }
2612
ddbb1b13 2613 trace__handle_event(trace, event, &sample);
8e50d384 2614next_event:
d6ace3df 2615 perf_mmap__consume(md);
20c5f10e 2616
ba209f85
ACM
2617 if (interrupted)
2618 goto out_disable;
02ac5421
ACM
2619
2620 if (done && !draining) {
2621 perf_evlist__disable(evlist);
2622 draining = true;
2623 }
514f1c67 2624 }
d7f55c62 2625 perf_mmap__read_done(md);
514f1c67
ACM
2626 }
2627
efd5745e 2628 if (trace->nr_events == before) {
ba209f85 2629 int timeout = done ? 100 : -1;
f15eb531 2630
46fb3c21
ACM
2631 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2632 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2633 draining = true;
2634
ba209f85 2635 goto again;
46fb3c21 2636 }
ba209f85
ACM
2637 } else {
2638 goto again;
f15eb531
NK
2639 }
2640
ba209f85 2641out_disable:
f3b623b8
ACM
2642 thread__zput(trace->current);
2643
ba209f85 2644 perf_evlist__disable(evlist);
514f1c67 2645
c522739d
ACM
2646 if (!err) {
2647 if (trace->summary)
2648 trace__fprintf_thread_summary(trace, trace->output);
2649
2650 if (trace->show_tool_stats) {
2651 fprintf(trace->output, "Stats:\n "
2652 " vfs_getname : %" PRIu64 "\n"
2653 " proc_getname: %" PRIu64 "\n",
2654 trace->stats.vfs_getname,
2655 trace->stats.proc_getname);
2656 }
2657 }
bf2575c1 2658
514f1c67 2659out_delete_evlist:
33974a41
AV
2660 trace__symbols__exit(trace);
2661
514f1c67 2662 perf_evlist__delete(evlist);
9ea42ba4 2663 cgroup__put(trace->cgroup);
14a052df 2664 trace->evlist = NULL;
75b757ca 2665 trace->live = false;
514f1c67 2666 return err;
6ef068cb
ACM
2667{
2668 char errbuf[BUFSIZ];
a8f23d8f 2669
2cc990ba 2670out_error_sched_stat_runtime:
988bdb31 2671 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2672 goto out_error;
2673
801c67b0 2674out_error_raw_syscalls:
988bdb31 2675 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2676 goto out_error;
2677
e09b18d4
ACM
2678out_error_mmap:
2679 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2680 goto out_error;
2681
a8f23d8f
ACM
2682out_error_open:
2683 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2684
2685out_error:
6ef068cb 2686 fprintf(trace->output, "%s\n", errbuf);
87f91868 2687 goto out_delete_evlist;
94ad89bc
ACM
2688
2689out_error_apply_filters:
2690 fprintf(trace->output,
2691 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2692 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2693 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2694 goto out_delete_evlist;
514f1c67 2695}
5ed08dae
ACM
2696out_error_mem:
2697 fprintf(trace->output, "Not enough memory to run!\n");
2698 goto out_delete_evlist;
19867b61
ACM
2699
2700out_errno:
2701 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2702 goto out_delete_evlist;
a8f23d8f 2703}
514f1c67 2704
6810fc91
DA
2705static int trace__replay(struct trace *trace)
2706{
2707 const struct perf_evsel_str_handler handlers[] = {
c522739d 2708 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2709 };
8ceb41d7 2710 struct perf_data data = {
eae8ad80
JO
2711 .file = {
2712 .path = input_name,
2713 },
2714 .mode = PERF_DATA_MODE_READ,
2715 .force = trace->force,
f5fc1412 2716 };
6810fc91 2717 struct perf_session *session;
003824e8 2718 struct perf_evsel *evsel;
6810fc91
DA
2719 int err = -1;
2720
2721 trace->tool.sample = trace__process_sample;
2722 trace->tool.mmap = perf_event__process_mmap;
384c671e 2723 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2724 trace->tool.comm = perf_event__process_comm;
2725 trace->tool.exit = perf_event__process_exit;
2726 trace->tool.fork = perf_event__process_fork;
2727 trace->tool.attr = perf_event__process_attr;
f3b3614a 2728 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2729 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2730 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2731
0a8cb85c 2732 trace->tool.ordered_events = true;
6810fc91
DA
2733 trace->tool.ordering_requires_timestamps = true;
2734
2735 /* add tid to output */
2736 trace->multiple_threads = true;
2737
8ceb41d7 2738 session = perf_session__new(&data, false, &trace->tool);
6810fc91 2739 if (session == NULL)
52e02834 2740 return -1;
6810fc91 2741
aa07df6e
DA
2742 if (trace->opts.target.pid)
2743 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2744
2745 if (trace->opts.target.tid)
2746 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2747
0a7e6d1b 2748 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2749 goto out;
2750
8fb598e5
DA
2751 trace->host = &session->machines.host;
2752
6810fc91
DA
2753 err = perf_session__set_tracepoints_handlers(session, handlers);
2754 if (err)
2755 goto out;
2756
003824e8
NK
2757 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2758 "raw_syscalls:sys_enter");
9aca7f17
DA
2759 /* older kernels have syscalls tp versus raw_syscalls */
2760 if (evsel == NULL)
2761 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2762 "syscalls:sys_enter");
003824e8 2763
e281a960 2764 if (evsel &&
63f11c80 2765 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
e281a960 2766 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2767 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2768 goto out;
2769 }
2770
2771 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2772 "raw_syscalls:sys_exit");
9aca7f17
DA
2773 if (evsel == NULL)
2774 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2775 "syscalls:sys_exit");
e281a960 2776 if (evsel &&
63f11c80 2777 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
e281a960 2778 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2779 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2780 goto out;
2781 }
2782
e5cadb93 2783 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2784 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2785 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2786 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2787 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2788 evsel->handler = trace__pgfault;
2789 }
2790
6810fc91
DA
2791 setup_pager();
2792
b7b61cbe 2793 err = perf_session__process_events(session);
6810fc91
DA
2794 if (err)
2795 pr_err("Failed to process events, error %d", err);
2796
bf2575c1
DA
2797 else if (trace->summary)
2798 trace__fprintf_thread_summary(trace, trace->output);
2799
6810fc91
DA
2800out:
2801 perf_session__delete(session);
2802
2803 return err;
2804}
2805
1302d88e
ACM
2806static size_t trace__fprintf_threads_header(FILE *fp)
2807{
2808 size_t printed;
2809
99ff7150 2810 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2811
2812 return printed;
2813}
2814
b535d523
ACM
2815DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2816 struct stats *stats;
2817 double msecs;
2818 int syscall;
2819)
2820{
2821 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2822 struct stats *stats = source->priv;
2823
2824 entry->syscall = source->i;
2825 entry->stats = stats;
2826 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2827}
2828
bf2575c1
DA
2829static size_t thread__dump_stats(struct thread_trace *ttrace,
2830 struct trace *trace, FILE *fp)
2831{
bf2575c1
DA
2832 size_t printed = 0;
2833 struct syscall *sc;
b535d523
ACM
2834 struct rb_node *nd;
2835 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2836
b535d523 2837 if (syscall_stats == NULL)
bf2575c1
DA
2838 return 0;
2839
2840 printed += fprintf(fp, "\n");
2841
834fd46d
MW
2842 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2843 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2844 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2845
98a91837 2846 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2847 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2848 if (stats) {
2849 double min = (double)(stats->min) / NSEC_PER_MSEC;
2850 double max = (double)(stats->max) / NSEC_PER_MSEC;
2851 double avg = avg_stats(stats);
2852 double pct;
2853 u64 n = (u64) stats->n;
2854
2855 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2856 avg /= NSEC_PER_MSEC;
2857
b535d523 2858 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2859 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2860 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2861 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2862 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2863 }
bf2575c1
DA
2864 }
2865
b535d523 2866 resort_rb__delete(syscall_stats);
bf2575c1 2867 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2868
2869 return printed;
2870}
2871
96c14451 2872static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2873{
96c14451 2874 size_t printed = 0;
89dceb22 2875 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2876 double ratio;
2877
2878 if (ttrace == NULL)
2879 return 0;
2880
2881 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2882
15e65c69 2883 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2884 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2885 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2886 if (ttrace->pfmaj)
2887 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2888 if (ttrace->pfmin)
2889 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2890 if (trace->sched)
2891 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2892 else if (fputc('\n', fp) != EOF)
2893 ++printed;
2894
bf2575c1 2895 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2896
96c14451
ACM
2897 return printed;
2898}
896cbb56 2899
96c14451
ACM
2900static unsigned long thread__nr_events(struct thread_trace *ttrace)
2901{
2902 return ttrace ? ttrace->nr_events : 0;
2903}
2904
2905DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2906 struct thread *thread;
2907)
2908{
2909 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2910}
2911
1302d88e
ACM
2912static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2913{
96c14451
ACM
2914 size_t printed = trace__fprintf_threads_header(fp);
2915 struct rb_node *nd;
91e467bc 2916 int i;
1302d88e 2917
91e467bc
KL
2918 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2919 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
96c14451 2920
91e467bc
KL
2921 if (threads == NULL) {
2922 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2923 return 0;
2924 }
896cbb56 2925
91e467bc
KL
2926 resort_rb__for_each_entry(nd, threads)
2927 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
96c14451 2928
91e467bc
KL
2929 resort_rb__delete(threads);
2930 }
96c14451 2931 return printed;
1302d88e
ACM
2932}
2933
ae9ed035
ACM
2934static int trace__set_duration(const struct option *opt, const char *str,
2935 int unset __maybe_unused)
2936{
2937 struct trace *trace = opt->value;
2938
2939 trace->duration_filter = atof(str);
2940 return 0;
2941}
2942
f078c385
ACM
2943static int trace__set_filter_pids(const struct option *opt, const char *str,
2944 int unset __maybe_unused)
2945{
2946 int ret = -1;
2947 size_t i;
2948 struct trace *trace = opt->value;
2949 /*
2950 * FIXME: introduce a intarray class, plain parse csv and create a
2951 * { int nr, int entries[] } struct...
2952 */
2953 struct intlist *list = intlist__new(str);
2954
2955 if (list == NULL)
2956 return -1;
2957
2958 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2959 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2960
2961 if (trace->filter_pids.entries == NULL)
2962 goto out;
2963
2964 trace->filter_pids.entries[0] = getpid();
2965
2966 for (i = 1; i < trace->filter_pids.nr; ++i)
2967 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2968
2969 intlist__delete(list);
2970 ret = 0;
2971out:
2972 return ret;
2973}
2974
c24ff998
ACM
2975static int trace__open_output(struct trace *trace, const char *filename)
2976{
2977 struct stat st;
2978
2979 if (!stat(filename, &st) && st.st_size) {
2980 char oldname[PATH_MAX];
2981
2982 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2983 unlink(oldname);
2984 rename(filename, oldname);
2985 }
2986
2987 trace->output = fopen(filename, "w");
2988
2989 return trace->output == NULL ? -errno : 0;
2990}
2991
598d02c5
SF
2992static int parse_pagefaults(const struct option *opt, const char *str,
2993 int unset __maybe_unused)
2994{
2995 int *trace_pgfaults = opt->value;
2996
2997 if (strcmp(str, "all") == 0)
2998 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2999 else if (strcmp(str, "maj") == 0)
3000 *trace_pgfaults |= TRACE_PFMAJ;
3001 else if (strcmp(str, "min") == 0)
3002 *trace_pgfaults |= TRACE_PFMIN;
3003 else
3004 return -1;
3005
3006 return 0;
3007}
3008
14a052df
ACM
3009static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3010{
3011 struct perf_evsel *evsel;
3012
e5cadb93 3013 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
3014 evsel->handler = handler;
3015}
3016
d32855fa
ACM
3017static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
3018{
3019 struct perf_evsel *evsel;
3020
3021 evlist__for_each_entry(evlist, evsel) {
3022 if (evsel->priv || !evsel->tp_format)
3023 continue;
3024
3025 if (strcmp(evsel->tp_format->system, "syscalls"))
3026 continue;
3027
3028 if (perf_evsel__init_syscall_tp(evsel))
3029 return -1;
3030
3031 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3032 struct syscall_tp *sc = evsel->priv;
3033
3034 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
3035 return -1;
3036 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3037 struct syscall_tp *sc = evsel->priv;
3038
3039 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
3040 return -1;
3041 }
3042 }
3043
3044 return 0;
3045}
3046
017037ff
ACM
3047/*
3048 * XXX: Hackish, just splitting the combined -e+--event (syscalls
3049 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
3050 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
3051 *
3052 * It'd be better to introduce a parse_options() variant that would return a
3053 * list with the terms it didn't match to an event...
3054 */
3055static int trace__parse_events_option(const struct option *opt, const char *str,
3056 int unset __maybe_unused)
3057{
3058 struct trace *trace = (struct trace *)opt->value;
3059 const char *s = str;
3060 char *sep = NULL, *lists[2] = { NULL, NULL, };
27702bcf 3061 int len = strlen(str) + 1, err = -1, list, idx;
017037ff
ACM
3062 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
3063 char group_name[PATH_MAX];
3064
3065 if (strace_groups_dir == NULL)
3066 return -1;
3067
3068 if (*s == '!') {
3069 ++s;
3070 trace->not_ev_qualifier = true;
3071 }
3072
3073 while (1) {
3074 if ((sep = strchr(s, ',')) != NULL)
3075 *sep = '\0';
3076
3077 list = 0;
27702bcf
ACM
3078 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
3079 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
017037ff
ACM
3080 list = 1;
3081 } else {
3082 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
3083 if (access(group_name, R_OK) == 0)
3084 list = 1;
3085 }
3086
3087 if (lists[list]) {
3088 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
3089 } else {
3090 lists[list] = malloc(len);
3091 if (lists[list] == NULL)
3092 goto out;
3093 strcpy(lists[list], s);
3094 }
3095
3096 if (!sep)
3097 break;
3098
3099 *sep = ',';
3100 s = sep + 1;
3101 }
3102
3103 if (lists[1] != NULL) {
3104 struct strlist_config slist_config = {
3105 .dirname = strace_groups_dir,
3106 };
3107
3108 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
3109 if (trace->ev_qualifier == NULL) {
3110 fputs("Not enough memory to parse event qualifier", trace->output);
3111 goto out;
3112 }
3113
3114 if (trace__validate_ev_qualifier(trace))
3115 goto out;
b912885a 3116 trace->trace_syscalls = true;
017037ff
ACM
3117 }
3118
3119 err = 0;
3120
3121 if (lists[0]) {
3122 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3123 "event selector. use 'perf list' to list available events",
3124 parse_events_option);
3125 err = parse_events_option(&o, lists[0], 0);
3126 }
3127out:
3128 if (sep)
3129 *sep = ',';
3130
3131 return err;
3132}
3133
9ea42ba4
ACM
3134static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3135{
3136 struct trace *trace = opt->value;
3137
3138 if (!list_empty(&trace->evlist->entries))
3139 return parse_cgroups(opt, str, unset);
3140
3141 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
3142
3143 return 0;
3144}
3145
b0ad8ea6 3146int cmd_trace(int argc, const char **argv)
514f1c67 3147{
6fdd9cb7 3148 const char *trace_usage[] = {
f15eb531
NK
3149 "perf trace [<options>] [<command>]",
3150 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
3151 "perf trace record [<options>] [<command>]",
3152 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
3153 NULL
3154 };
3155 struct trace trace = {
514f1c67
ACM
3156 .syscalls = {
3157 . max = -1,
3158 },
3159 .opts = {
3160 .target = {
3161 .uid = UINT_MAX,
3162 .uses_mmap = true,
3163 },
3164 .user_freq = UINT_MAX,
3165 .user_interval = ULLONG_MAX,
509051ea 3166 .no_buffering = true,
38d5447d 3167 .mmap_pages = UINT_MAX,
9d9cad76 3168 .proc_map_timeout = 500,
514f1c67 3169 },
007d66a0 3170 .output = stderr,
50c95cbd 3171 .show_comm = true,
b912885a 3172 .trace_syscalls = false,
44621819 3173 .kernel_syscallchains = false,
05614993 3174 .max_stack = UINT_MAX,
514f1c67 3175 };
c24ff998 3176 const char *output_name = NULL;
514f1c67 3177 const struct option trace_options[] = {
017037ff
ACM
3178 OPT_CALLBACK('e', "event", &trace, "event",
3179 "event/syscall selector. use 'perf list' to list available events",
3180 trace__parse_events_option),
50c95cbd
ACM
3181 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3182 "show the thread COMM next to its id"),
c522739d 3183 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
3184 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
3185 trace__parse_events_option),
c24ff998 3186 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 3187 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
3188 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3189 "trace events on existing process id"),
ac9be8ee 3190 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 3191 "trace events on existing thread id"),
fa0e4ffe
ACM
3192 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3193 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 3194 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 3195 "system-wide collection from all CPUs"),
ac9be8ee 3196 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 3197 "list of cpus to monitor"),
6810fc91 3198 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 3199 "child tasks do not inherit counters"),
994a1f78
JO
3200 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3201 "number of mmap data pages",
3202 perf_evlist__parse_mmap_pages),
ac9be8ee 3203 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 3204 "user to profile"),
ae9ed035
ACM
3205 OPT_CALLBACK(0, "duration", &trace, "float",
3206 "show only events with duration > N.M ms",
3207 trace__set_duration),
1302d88e 3208 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 3209 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
3210 OPT_BOOLEAN('T', "time", &trace.full_time,
3211 "Show full timestamp, not time relative to first start"),
0a6545bd
ACM
3212 OPT_BOOLEAN(0, "failure", &trace.failure_only,
3213 "Show only syscalls that failed"),
fd2eabaf
DA
3214 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3215 "Show only syscall summary with statistics"),
3216 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3217 "Show all syscalls and summary with statistics"),
598d02c5
SF
3218 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3219 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 3220 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 3221 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
3222 OPT_CALLBACK(0, "call-graph", &trace.opts,
3223 "record_mode[,record_size]", record_callchain_help,
3224 &record_parse_callchain_opt),
44621819
ACM
3225 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3226 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
3227 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3228 "Set the minimum stack depth when parsing the callchain, "
3229 "anything below the specified depth will be ignored."),
c6d4a494
ACM
3230 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3231 "Set the maximum stack depth when parsing the callchain, "
3232 "anything beyond the specified depth will be ignored. "
4cb93446 3233 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
591421e1
ACM
3234 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
3235 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
9d9cad76
KL
3236 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3237 "per thread proc mmap processing timeout in ms"),
9ea42ba4
ACM
3238 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
3239 trace__parse_cgroups),
e36b7821
AB
3240 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3241 "ms to wait before starting measurement after program "
3242 "start"),
514f1c67
ACM
3243 OPT_END()
3244 };
ccd62a89 3245 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3246 bool mmap_pages_user_set = true;
78e890ea 3247 struct perf_evsel *evsel;
6fdd9cb7 3248 const char * const trace_subcommands[] = { "record", NULL };
78e890ea 3249 int err = -1;
32caf0d1 3250 char bf[BUFSIZ];
514f1c67 3251
4d08cb80
ACM
3252 signal(SIGSEGV, sighandler_dump_stack);
3253 signal(SIGFPE, sighandler_dump_stack);
3254
14a052df 3255 trace.evlist = perf_evlist__new();
fd0db102 3256 trace.sctbl = syscalltbl__new();
14a052df 3257
fd0db102 3258 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3259 pr_err("Not enough memory to run!\n");
ff8f695c 3260 err = -ENOMEM;
14a052df
ACM
3261 goto out;
3262 }
3263
6fdd9cb7
YS
3264 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3265 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3266
9ea42ba4
ACM
3267 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
3268 usage_with_options_msg(trace_usage, trace_options,
3269 "cgroup monitoring only available in system-wide mode");
3270 }
3271
78e890ea
ACM
3272 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
3273 if (IS_ERR(evsel)) {
3274 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
e0b6d2ef
ACM
3275 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
3276 goto out;
3277 }
3278
d3d1c4bd
ACM
3279 if (evsel) {
3280 if (perf_evsel__init_augmented_syscall_tp(evsel) ||
3281 perf_evsel__init_augmented_syscall_tp_args(evsel))
3282 goto out;
3283 trace.syscalls.events.augmented = evsel;
3284 }
3285
d7888573
WN
3286 err = bpf__setup_stdout(trace.evlist);
3287 if (err) {
3288 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3289 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3290 goto out;
3291 }
3292
59247e33
ACM
3293 err = -1;
3294
598d02c5
SF
3295 if (trace.trace_pgfaults) {
3296 trace.opts.sample_address = true;
3297 trace.opts.sample_time = true;
3298 }
3299
f3e459d1
ACM
3300 if (trace.opts.mmap_pages == UINT_MAX)
3301 mmap_pages_user_set = false;
3302
05614993 3303 if (trace.max_stack == UINT_MAX) {
029c75e5 3304 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
05614993
ACM
3305 max_stack_user_set = false;
3306 }
3307
3308#ifdef HAVE_DWARF_UNWIND_SUPPORT
75d50117 3309 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
05614993 3310 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
75d50117 3311 }
05614993
ACM
3312#endif
3313
2ddd5c04 3314 if (callchain_param.enabled) {
f3e459d1
ACM
3315 if (!mmap_pages_user_set && geteuid() == 0)
3316 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3317
566a0885 3318 symbol_conf.use_callchain = true;
f3e459d1 3319 }
566a0885 3320
d32855fa 3321 if (trace.evlist->nr_entries > 0) {
14a052df 3322 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
d32855fa
ACM
3323 if (evlist__set_syscall_tp_fields(trace.evlist)) {
3324 perror("failed to set syscalls:* tracepoint fields");
3325 goto out;
3326 }
3327 }
14a052df 3328
1e28fe0a
SF
3329 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3330 return trace__record(&trace, argc-1, &argv[1]);
3331
3332 /* summary_only implies summary option, but don't overwrite summary if set */
3333 if (trace.summary_only)
3334 trace.summary = trace.summary_only;
3335
726f3234
ACM
3336 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3337 trace.evlist->nr_entries == 0 /* Was --events used? */) {
b912885a 3338 trace.trace_syscalls = true;
59247e33
ACM
3339 }
3340
c24ff998
ACM
3341 if (output_name != NULL) {
3342 err = trace__open_output(&trace, output_name);
3343 if (err < 0) {
3344 perror("failed to create output file");
3345 goto out;
3346 }
3347 }
3348
602ad878 3349 err = target__validate(&trace.opts.target);
32caf0d1 3350 if (err) {
602ad878 3351 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3352 fprintf(trace.output, "%s", bf);
3353 goto out_close;
32caf0d1
NK
3354 }
3355
602ad878 3356 err = target__parse_uid(&trace.opts.target);
514f1c67 3357 if (err) {
602ad878 3358 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3359 fprintf(trace.output, "%s", bf);
3360 goto out_close;
514f1c67
ACM
3361 }
3362
602ad878 3363 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3364 trace.opts.target.system_wide = true;
3365
6810fc91
DA
3366 if (input_name)
3367 err = trace__replay(&trace);
3368 else
3369 err = trace__run(&trace, argc, argv);
1302d88e 3370
c24ff998
ACM
3371out_close:
3372 if (output_name != NULL)
3373 fclose(trace.output);
3374out:
1302d88e 3375 return err;
514f1c67 3376}