perf trace: Extract the comm/tid printing for syscall enter
[linux-block.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
9ea42ba4 22#include "util/cgroup.h"
752fde44 23#include "util/color.h"
7c304ee0 24#include "util/debug.h"
092bd3cd 25#include "util/env.h"
5ab8c689 26#include "util/event.h"
514f1c67 27#include "util/evlist.h"
4b6ab94e 28#include <subcmd/exec-cmd.h>
752fde44 29#include "util/machine.h"
9a3993d4 30#include "util/path.h"
6810fc91 31#include "util/session.h"
752fde44 32#include "util/thread.h"
4b6ab94e 33#include <subcmd/parse-options.h>
2ae3a312 34#include "util/strlist.h"
bdc89661 35#include "util/intlist.h"
514f1c67 36#include "util/thread_map.h"
bf2575c1 37#include "util/stat.h"
fd5cead2 38#include "trace/beauty/beauty.h"
97978b3e 39#include "trace-event.h"
9aca7f17 40#include "util/parse-events.h"
ba504235 41#include "util/bpf-loader.h"
566a0885 42#include "callchain.h"
fea01392 43#include "print_binary.h"
a067558e 44#include "string2.h"
fd0db102 45#include "syscalltbl.h"
96c14451 46#include "rb_resort.h"
514f1c67 47
a43783ae 48#include <errno.h>
fd20e811 49#include <inttypes.h>
4208735d 50#include <poll.h>
9607ad3a 51#include <signal.h>
514f1c67 52#include <stdlib.h>
017037ff 53#include <string.h>
8dd2a131 54#include <linux/err.h>
997bba8c 55#include <linux/filter.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
bafae98e 60#include <fcntl.h>
514f1c67 61
3d689ed6
ACM
62#include "sane_ctype.h"
63
c188e7ac
ACM
64#ifndef O_CLOEXEC
65# define O_CLOEXEC 02000000
66#endif
67
83a51694
ACM
68#ifndef F_LINUX_SPECIFIC_BASE
69# define F_LINUX_SPECIFIC_BASE 1024
70#endif
71
d1d438a3
ACM
72struct trace {
73 struct perf_tool tool;
fd0db102 74 struct syscalltbl *sctbl;
d1d438a3
ACM
75 struct {
76 int max;
77 struct syscall *table;
78 struct {
79 struct perf_evsel *sys_enter,
d3d1c4bd
ACM
80 *sys_exit,
81 *augmented;
d1d438a3
ACM
82 } events;
83 } syscalls;
84 struct record_opts opts;
85 struct perf_evlist *evlist;
86 struct machine *host;
87 struct thread *current;
9ea42ba4 88 struct cgroup *cgroup;
d1d438a3
ACM
89 u64 base_time;
90 FILE *output;
91 unsigned long nr_events;
92 struct strlist *ev_qualifier;
93 struct {
94 size_t nr;
95 int *entries;
96 } ev_qualifier_ids;
d1d438a3
ACM
97 struct {
98 size_t nr;
99 pid_t *entries;
100 } filter_pids;
101 double duration_filter;
102 double runtime_ms;
103 struct {
104 u64 vfs_getname,
105 proc_getname;
106 } stats;
c6d4a494 107 unsigned int max_stack;
5cf9c84e 108 unsigned int min_stack;
d1d438a3
ACM
109 bool not_ev_qualifier;
110 bool live;
111 bool full_time;
112 bool sched;
113 bool multiple_threads;
114 bool summary;
115 bool summary_only;
0a6545bd 116 bool failure_only;
d1d438a3 117 bool show_comm;
591421e1 118 bool print_sample;
d1d438a3
ACM
119 bool show_tool_stats;
120 bool trace_syscalls;
44621819 121 bool kernel_syscallchains;
d1d438a3
ACM
122 bool force;
123 bool vfs_getname;
124 int trace_pgfaults;
125};
a1c2552d 126
77170988
ACM
127struct tp_field {
128 int offset;
129 union {
130 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
131 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
132 };
133};
134
135#define TP_UINT_FIELD(bits) \
136static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
137{ \
55d43bca
DA
138 u##bits value; \
139 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
140 return value; \
77170988
ACM
141}
142
143TP_UINT_FIELD(8);
144TP_UINT_FIELD(16);
145TP_UINT_FIELD(32);
146TP_UINT_FIELD(64);
147
148#define TP_UINT_FIELD__SWAPPED(bits) \
149static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
150{ \
55d43bca
DA
151 u##bits value; \
152 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
153 return bswap_##bits(value);\
154}
155
156TP_UINT_FIELD__SWAPPED(16);
157TP_UINT_FIELD__SWAPPED(32);
158TP_UINT_FIELD__SWAPPED(64);
159
aa823f58 160static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
77170988 161{
aa823f58 162 field->offset = offset;
77170988 163
aa823f58 164 switch (size) {
77170988
ACM
165 case 1:
166 field->integer = tp_field__u8;
167 break;
168 case 2:
169 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
170 break;
171 case 4:
172 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
173 break;
174 case 8:
175 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
176 break;
177 default:
178 return -1;
179 }
180
181 return 0;
182}
183
aa823f58
ACM
184static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
185{
186 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
187}
188
77170988
ACM
189static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
190{
191 return sample->raw_data + field->offset;
192}
193
aa823f58 194static int __tp_field__init_ptr(struct tp_field *field, int offset)
77170988 195{
aa823f58 196 field->offset = offset;
77170988
ACM
197 field->pointer = tp_field__ptr;
198 return 0;
199}
200
aa823f58
ACM
201static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
202{
203 return __tp_field__init_ptr(field, format_field->offset);
204}
205
77170988
ACM
206struct syscall_tp {
207 struct tp_field id;
208 union {
209 struct tp_field args, ret;
210 };
211};
212
213static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
214 struct tp_field *field,
215 const char *name)
216{
217 struct format_field *format_field = perf_evsel__field(evsel, name);
218
219 if (format_field == NULL)
220 return -1;
221
222 return tp_field__init_uint(field, format_field, evsel->needs_swap);
223}
224
225#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
226 ({ struct syscall_tp *sc = evsel->priv;\
227 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
228
229static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
230 struct tp_field *field,
231 const char *name)
232{
233 struct format_field *format_field = perf_evsel__field(evsel, name);
234
235 if (format_field == NULL)
236 return -1;
237
238 return tp_field__init_ptr(field, format_field);
239}
240
241#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
242 ({ struct syscall_tp *sc = evsel->priv;\
243 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
244
245static void perf_evsel__delete_priv(struct perf_evsel *evsel)
246{
04662523 247 zfree(&evsel->priv);
77170988
ACM
248 perf_evsel__delete(evsel);
249}
250
d32855fa
ACM
251static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
252{
253 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
254
255 if (evsel->priv != NULL) {
256 if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
257 goto out_delete;
258 return 0;
259 }
260
261 return -ENOMEM;
262out_delete:
263 zfree(&evsel->priv);
264 return -ENOENT;
265}
266
d3d1c4bd
ACM
267static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
268{
269 struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
270
271 if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
272 if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
273 goto out_delete;
274
275 return 0;
276 }
277
278 return -ENOMEM;
279out_delete:
280 zfree(&evsel->priv);
281 return -EINVAL;
282}
283
284static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
285{
286 struct syscall_tp *sc = evsel->priv;
287
288 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
289}
290
63f11c80 291static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
96695d44
NK
292{
293 evsel->priv = malloc(sizeof(struct syscall_tp));
294 if (evsel->priv != NULL) {
295 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
296 goto out_delete;
297
298 evsel->handler = handler;
299 return 0;
300 }
301
302 return -ENOMEM;
303
304out_delete:
04662523 305 zfree(&evsel->priv);
96695d44
NK
306 return -ENOENT;
307}
308
63f11c80 309static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
77170988 310{
ef503831 311 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 312
9aca7f17 313 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 314 if (IS_ERR(evsel))
9aca7f17
DA
315 evsel = perf_evsel__newtp("syscalls", direction);
316
8dd2a131
JO
317 if (IS_ERR(evsel))
318 return NULL;
319
63f11c80 320 if (perf_evsel__init_raw_syscall_tp(evsel, handler))
8dd2a131 321 goto out_delete;
77170988
ACM
322
323 return evsel;
324
325out_delete:
326 perf_evsel__delete_priv(evsel);
327 return NULL;
328}
329
330#define perf_evsel__sc_tp_uint(evsel, name, sample) \
331 ({ struct syscall_tp *fields = evsel->priv; \
332 fields->name.integer(&fields->name, sample); })
333
334#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
335 ({ struct syscall_tp *fields = evsel->priv; \
336 fields->name.pointer(&fields->name, sample); })
337
0ae79636
ACM
338size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
339{
340 int idx = val - sa->offset;
1f115cb7 341
bc972ada 342 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
0ae79636 343 return scnprintf(bf, size, intfmt, val);
1f115cb7 344
0ae79636 345 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
346}
347
975b7c2f
ACM
348static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
349 const char *intfmt,
350 struct syscall_arg *arg)
1f115cb7 351{
0ae79636 352 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
353}
354
975b7c2f
ACM
355static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
356 struct syscall_arg *arg)
357{
358 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
359}
360
1f115cb7
ACM
361#define SCA_STRARRAY syscall_arg__scnprintf_strarray
362
83a51694
ACM
363struct strarrays {
364 int nr_entries;
365 struct strarray **entries;
366};
367
368#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
369 .nr_entries = ARRAY_SIZE(array), \
370 .entries = array, \
371}
372
274e86fd
ACM
373size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
374 struct syscall_arg *arg)
83a51694
ACM
375{
376 struct strarrays *sas = arg->parm;
377 int i;
378
379 for (i = 0; i < sas->nr_entries; ++i) {
380 struct strarray *sa = sas->entries[i];
381 int idx = arg->val - sa->offset;
382
383 if (idx >= 0 && idx < sa->nr_entries) {
384 if (sa->entries[idx] == NULL)
385 break;
386 return scnprintf(bf, size, "%s", sa->entries[idx]);
387 }
388 }
389
390 return scnprintf(bf, size, "%d", arg->val);
391}
392
48e1f91a
ACM
393#ifndef AT_FDCWD
394#define AT_FDCWD -100
395#endif
396
75b757ca
ACM
397static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
398 struct syscall_arg *arg)
399{
400 int fd = arg->val;
401
402 if (fd == AT_FDCWD)
403 return scnprintf(bf, size, "CWD");
404
405 return syscall_arg__scnprintf_fd(bf, size, arg);
406}
407
408#define SCA_FDAT syscall_arg__scnprintf_fd_at
409
410static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
411 struct syscall_arg *arg);
412
413#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
414
2c2b1623 415size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 416{
01533e97 417 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
418}
419
2c2b1623 420size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
421{
422 return scnprintf(bf, size, "%d", arg->val);
423}
424
5dde91ed
ACM
425size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
426{
427 return scnprintf(bf, size, "%ld", arg->val);
428}
429
729a7841
ACM
430static const char *bpf_cmd[] = {
431 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
432 "MAP_GET_NEXT_KEY", "PROG_LOAD",
433};
434static DEFINE_STRARRAY(bpf_cmd);
435
03e3adc9
ACM
436static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
437static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 438
1f115cb7
ACM
439static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
440static DEFINE_STRARRAY(itimers);
441
b62bee1b
ACM
442static const char *keyctl_options[] = {
443 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
444 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
445 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
446 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
447 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
448};
449static DEFINE_STRARRAY(keyctl_options);
450
efe6b882
ACM
451static const char *whences[] = { "SET", "CUR", "END",
452#ifdef SEEK_DATA
453"DATA",
454#endif
455#ifdef SEEK_HOLE
456"HOLE",
457#endif
458};
459static DEFINE_STRARRAY(whences);
f9da0b0c 460
80f587d5
ACM
461static const char *fcntl_cmds[] = {
462 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
463 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
464 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
465 "GETOWNER_UIDS",
80f587d5
ACM
466};
467static DEFINE_STRARRAY(fcntl_cmds);
468
83a51694
ACM
469static const char *fcntl_linux_specific_cmds[] = {
470 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
471 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 472 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
473};
474
475static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
476
477static struct strarray *fcntl_cmds_arrays[] = {
478 &strarray__fcntl_cmds,
479 &strarray__fcntl_linux_specific_cmds,
480};
481
482static DEFINE_STRARRAYS(fcntl_cmds_arrays);
483
c045bf02
ACM
484static const char *rlimit_resources[] = {
485 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
486 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
487 "RTTIME",
488};
489static DEFINE_STRARRAY(rlimit_resources);
490
eb5b1b14
ACM
491static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
492static DEFINE_STRARRAY(sighow);
493
4f8c1b74
DA
494static const char *clockid[] = {
495 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
496 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
497 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
498};
499static DEFINE_STRARRAY(clockid);
500
e10bce81
ACM
501static const char *socket_families[] = {
502 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
503 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
504 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
505 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
506 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
507 "ALG", "NFC", "VSOCK",
508};
509static DEFINE_STRARRAY(socket_families);
510
51108999
ACM
511static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
512 struct syscall_arg *arg)
513{
514 size_t printed = 0;
515 int mode = arg->val;
516
517 if (mode == F_OK) /* 0 */
518 return scnprintf(bf, size, "F");
519#define P_MODE(n) \
520 if (mode & n##_OK) { \
521 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
522 mode &= ~n##_OK; \
523 }
524
525 P_MODE(R);
526 P_MODE(W);
527 P_MODE(X);
528#undef P_MODE
529
530 if (mode)
531 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
532
533 return printed;
534}
535
536#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
537
f994592d
ACM
538static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
539 struct syscall_arg *arg);
540
541#define SCA_FILENAME syscall_arg__scnprintf_filename
542
46cce19b
ACM
543static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
544 struct syscall_arg *arg)
545{
546 int printed = 0, flags = arg->val;
547
548#define P_FLAG(n) \
549 if (flags & O_##n) { \
550 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
551 flags &= ~O_##n; \
552 }
553
554 P_FLAG(CLOEXEC);
555 P_FLAG(NONBLOCK);
556#undef P_FLAG
557
558 if (flags)
559 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
560
561 return printed;
562}
563
564#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
565
a355a61e
ACM
566#ifndef GRND_NONBLOCK
567#define GRND_NONBLOCK 0x0001
568#endif
569#ifndef GRND_RANDOM
570#define GRND_RANDOM 0x0002
571#endif
572
39878d49
ACM
573static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
574 struct syscall_arg *arg)
575{
576 int printed = 0, flags = arg->val;
577
578#define P_FLAG(n) \
579 if (flags & GRND_##n) { \
580 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
581 flags &= ~GRND_##n; \
582 }
583
584 P_FLAG(RANDOM);
585 P_FLAG(NONBLOCK);
586#undef P_FLAG
587
588 if (flags)
589 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
590
591 return printed;
592}
593
594#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
595
82d4a110
ACM
596#define STRARRAY(name, array) \
597 { .scnprintf = SCA_STRARRAY, \
598 .parm = &strarray__##array, }
453350dd 599
092bd3cd 600#include "trace/beauty/arch_errno_names.c"
ea8dc3ce 601#include "trace/beauty/eventfd.c"
d5d71e86 602#include "trace/beauty/futex_op.c"
3258abe0 603#include "trace/beauty/futex_val3.c"
df4cb167 604#include "trace/beauty/mmap.c"
ba2f22cf 605#include "trace/beauty/mode_t.c"
a30e6259 606#include "trace/beauty/msg_flags.c"
8f48df69 607#include "trace/beauty/open_flags.c"
62de344e 608#include "trace/beauty/perf_event_open.c"
d5d71e86 609#include "trace/beauty/pid.c"
a3bca91f 610#include "trace/beauty/sched_policy.c"
f5cd95ea 611#include "trace/beauty/seccomp.c"
12199d8e 612#include "trace/beauty/signum.c"
bbf86c43 613#include "trace/beauty/socket_type.c"
7206b900 614#include "trace/beauty/waitid_options.c"
a3bca91f 615
82d4a110
ACM
616struct syscall_arg_fmt {
617 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
618 void *parm;
c51bdfec 619 const char *name;
d47737d5 620 bool show_zero;
82d4a110
ACM
621};
622
514f1c67
ACM
623static struct syscall_fmt {
624 const char *name;
aec1930b 625 const char *alias;
82d4a110 626 struct syscall_arg_fmt arg[6];
332337da 627 u8 nr_args;
11c8e39f 628 bool errpid;
514f1c67 629 bool timeout;
04b34729 630 bool hexret;
514f1c67 631} syscall_fmts[] = {
1f63139c 632 { .name = "access",
82d4a110 633 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
1f63139c 634 { .name = "bpf",
82d4a110 635 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 636 { .name = "brk", .hexret = true,
82d4a110 637 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 638 { .name = "clock_gettime",
82d4a110 639 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
33396a3a
ACM
640 { .name = "clone", .errpid = true, .nr_args = 5,
641 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
642 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
643 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
644 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
645 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
1f63139c 646 { .name = "close",
82d4a110 647 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1f63139c 648 { .name = "epoll_ctl",
82d4a110 649 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 650 { .name = "eventfd2",
82d4a110 651 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 652 { .name = "fchmodat",
82d4a110 653 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 654 { .name = "fchownat",
82d4a110 655 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 656 { .name = "fcntl",
82d4a110 657 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
658 .parm = &strarrays__fcntl_cmds_arrays,
659 .show_zero = true, },
82d4a110 660 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 661 { .name = "flock",
82d4a110 662 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
663 { .name = "fstat", .alias = "newfstat", },
664 { .name = "fstatat", .alias = "newfstatat", },
665 { .name = "futex",
3258abe0
ACM
666 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
667 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
1f63139c 668 { .name = "futimesat",
82d4a110 669 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 670 { .name = "getitimer",
82d4a110 671 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 672 { .name = "getpid", .errpid = true, },
d1d438a3 673 { .name = "getpgid", .errpid = true, },
c65f1070 674 { .name = "getppid", .errpid = true, },
1f63139c 675 { .name = "getrandom",
82d4a110 676 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 677 { .name = "getrlimit",
82d4a110 678 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
2d1073de 679 { .name = "gettid", .errpid = true, },
1f63139c 680 { .name = "ioctl",
82d4a110 681 .arg = {
844ae5b4
ACM
682#if defined(__i386__) || defined(__x86_64__)
683/*
684 * FIXME: Make this available to all arches.
685 */
1cc47f2d 686 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
82d4a110 687 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 688#else
82d4a110 689 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 690#endif
1de3038d
ACM
691 { .name = "kcmp", .nr_args = 5,
692 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
693 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
694 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
695 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
696 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1f63139c 697 { .name = "keyctl",
82d4a110 698 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 699 { .name = "kill",
82d4a110 700 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 701 { .name = "linkat",
82d4a110 702 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 703 { .name = "lseek",
82d4a110 704 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
705 { .name = "lstat", .alias = "newlstat", },
706 { .name = "madvise",
82d4a110
ACM
707 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
708 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 709 { .name = "mkdirat",
82d4a110 710 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 711 { .name = "mknodat",
82d4a110 712 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 713 { .name = "mlock",
82d4a110 714 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 715 { .name = "mlockall",
82d4a110 716 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 717 { .name = "mmap", .hexret = true,
54265664
JO
718/* The standard mmap maps to old_mmap on s390x */
719#if defined(__s390x__)
720 .alias = "old_mmap",
721#endif
82d4a110
ACM
722 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
723 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
724 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 725 { .name = "mprotect",
82d4a110
ACM
726 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
727 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 728 { .name = "mq_unlink",
82d4a110 729 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 730 { .name = "mremap", .hexret = true,
82d4a110
ACM
731 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
732 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
733 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 734 { .name = "munlock",
82d4a110 735 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 736 { .name = "munmap",
82d4a110 737 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 738 { .name = "name_to_handle_at",
82d4a110 739 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 740 { .name = "newfstatat",
82d4a110 741 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 742 { .name = "open",
82d4a110 743 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 744 { .name = "open_by_handle_at",
82d4a110
ACM
745 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
746 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 747 { .name = "openat",
82d4a110
ACM
748 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
749 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 750 { .name = "perf_event_open",
82d4a110
ACM
751 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
752 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
753 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 754 { .name = "pipe2",
82d4a110 755 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
83bc9c37
ACM
756 { .name = "pkey_alloc",
757 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
758 { .name = "pkey_free",
759 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
760 { .name = "pkey_mprotect",
761 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
762 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
763 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
1f63139c
ACM
764 { .name = "poll", .timeout = true, },
765 { .name = "ppoll", .timeout = true, },
d688d037
ACM
766 { .name = "prctl", .alias = "arch_prctl",
767 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
768 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
769 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1f63139c
ACM
770 { .name = "pread", .alias = "pread64", },
771 { .name = "preadv", .alias = "pread", },
772 { .name = "prlimit64",
82d4a110 773 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
774 { .name = "pwrite", .alias = "pwrite64", },
775 { .name = "readlinkat",
82d4a110 776 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 777 { .name = "recvfrom",
82d4a110 778 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 779 { .name = "recvmmsg",
82d4a110 780 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 781 { .name = "recvmsg",
82d4a110 782 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 783 { .name = "renameat",
82d4a110 784 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 785 { .name = "rt_sigaction",
82d4a110 786 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 787 { .name = "rt_sigprocmask",
82d4a110 788 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 789 { .name = "rt_sigqueueinfo",
82d4a110 790 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 791 { .name = "rt_tgsigqueueinfo",
82d4a110 792 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 793 { .name = "sched_setscheduler",
82d4a110 794 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 795 { .name = "seccomp",
82d4a110
ACM
796 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
797 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
798 { .name = "select", .timeout = true, },
799 { .name = "sendmmsg",
82d4a110 800 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 801 { .name = "sendmsg",
82d4a110 802 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 803 { .name = "sendto",
82d4a110 804 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 805 { .name = "set_tid_address", .errpid = true, },
1f63139c 806 { .name = "setitimer",
82d4a110 807 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 808 { .name = "setrlimit",
82d4a110 809 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 810 { .name = "socket",
82d4a110 811 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
812 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
813 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c 814 { .name = "socketpair",
82d4a110 815 .arg = { [0] = STRARRAY(family, socket_families),
162d3edb
ACM
816 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
817 [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
1f63139c
ACM
818 { .name = "stat", .alias = "newstat", },
819 { .name = "statx",
82d4a110
ACM
820 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
821 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
822 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 823 { .name = "swapoff",
82d4a110 824 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 825 { .name = "swapon",
82d4a110 826 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 827 { .name = "symlinkat",
82d4a110 828 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 829 { .name = "tgkill",
82d4a110 830 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 831 { .name = "tkill",
82d4a110 832 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c
ACM
833 { .name = "uname", .alias = "newuname", },
834 { .name = "unlinkat",
82d4a110 835 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 836 { .name = "utimensat",
82d4a110 837 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 838 { .name = "wait4", .errpid = true,
82d4a110 839 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 840 { .name = "waitid", .errpid = true,
82d4a110 841 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
842};
843
844static int syscall_fmt__cmp(const void *name, const void *fmtp)
845{
846 const struct syscall_fmt *fmt = fmtp;
847 return strcmp(name, fmt->name);
848}
849
850static struct syscall_fmt *syscall_fmt__find(const char *name)
851{
852 const int nmemb = ARRAY_SIZE(syscall_fmts);
853 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
854}
855
6a648b53
ACM
856/*
857 * is_exit: is this "exit" or "exit_group"?
858 * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
7a983a0f 859 * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
6a648b53 860 */
514f1c67
ACM
861struct syscall {
862 struct event_format *tp_format;
f208bd8d 863 int nr_args;
7a983a0f 864 int args_size;
6a648b53
ACM
865 bool is_exit;
866 bool is_open;
f208bd8d 867 struct format_field *args;
514f1c67
ACM
868 const char *name;
869 struct syscall_fmt *fmt;
82d4a110 870 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
871};
872
fd2b2975
ACM
873/*
874 * We need to have this 'calculated' boolean because in some cases we really
875 * don't know what is the duration of a syscall, for instance, when we start
876 * a session and some threads are waiting for a syscall to finish, say 'poll',
877 * in which case all we can do is to print "( ? ) for duration and for the
878 * start timestamp.
879 */
880static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
881{
882 double duration = (double)t / NSEC_PER_MSEC;
883 size_t printed = fprintf(fp, "(");
884
fd2b2975 885 if (!calculated)
522283fe 886 printed += fprintf(fp, " ");
fd2b2975 887 else if (duration >= 1.0)
60c907ab
ACM
888 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
889 else if (duration >= 0.01)
890 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
891 else
892 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 893 return printed + fprintf(fp, "): ");
60c907ab
ACM
894}
895
f994592d
ACM
896/**
897 * filename.ptr: The filename char pointer that will be vfs_getname'd
898 * filename.entry_str_pos: Where to insert the string translated from
899 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
900 * ret_scnprintf: syscall args may set this to a different syscall return
901 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 902 */
752fde44
ACM
903struct thread_trace {
904 u64 entry_time;
752fde44 905 bool entry_pending;
efd5745e 906 unsigned long nr_events;
a2ea67d7 907 unsigned long pfmaj, pfmin;
752fde44 908 char *entry_str;
1302d88e 909 double runtime_ms;
7ee57434 910 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
911 struct {
912 unsigned long ptr;
7f4f8001
ACM
913 short int entry_str_pos;
914 bool pending_open;
915 unsigned int namelen;
916 char *name;
f994592d 917 } filename;
75b757ca
ACM
918 struct {
919 int max;
920 char **table;
921 } paths;
bf2575c1
DA
922
923 struct intlist *syscall_stats;
752fde44
ACM
924};
925
926static struct thread_trace *thread_trace__new(void)
927{
75b757ca
ACM
928 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
929
930 if (ttrace)
931 ttrace->paths.max = -1;
932
bf2575c1
DA
933 ttrace->syscall_stats = intlist__new(NULL);
934
75b757ca 935 return ttrace;
752fde44
ACM
936}
937
c24ff998 938static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 939{
efd5745e
ACM
940 struct thread_trace *ttrace;
941
752fde44
ACM
942 if (thread == NULL)
943 goto fail;
944
89dceb22
NK
945 if (thread__priv(thread) == NULL)
946 thread__set_priv(thread, thread_trace__new());
48000a1a 947
89dceb22 948 if (thread__priv(thread) == NULL)
752fde44
ACM
949 goto fail;
950
89dceb22 951 ttrace = thread__priv(thread);
efd5745e
ACM
952 ++ttrace->nr_events;
953
954 return ttrace;
752fde44 955fail:
c24ff998 956 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
957 "WARNING: not enough memory, dropping samples!\n");
958 return NULL;
959}
960
84486caa
ACM
961
962void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 963 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
964{
965 struct thread_trace *ttrace = thread__priv(arg->thread);
966
967 ttrace->ret_scnprintf = ret_scnprintf;
968}
969
598d02c5
SF
970#define TRACE_PFMAJ (1 << 0)
971#define TRACE_PFMIN (1 << 1)
972
e4d44e83
ACM
973static const size_t trace__entry_str_size = 2048;
974
97119f37 975static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 976{
89dceb22 977 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
978
979 if (fd > ttrace->paths.max) {
980 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
981
982 if (npath == NULL)
983 return -1;
984
985 if (ttrace->paths.max != -1) {
986 memset(npath + ttrace->paths.max + 1, 0,
987 (fd - ttrace->paths.max) * sizeof(char *));
988 } else {
989 memset(npath, 0, (fd + 1) * sizeof(char *));
990 }
991
992 ttrace->paths.table = npath;
993 ttrace->paths.max = fd;
994 }
995
996 ttrace->paths.table[fd] = strdup(pathname);
997
998 return ttrace->paths.table[fd] != NULL ? 0 : -1;
999}
1000
97119f37
ACM
1001static int thread__read_fd_path(struct thread *thread, int fd)
1002{
1003 char linkname[PATH_MAX], pathname[PATH_MAX];
1004 struct stat st;
1005 int ret;
1006
1007 if (thread->pid_ == thread->tid) {
1008 scnprintf(linkname, sizeof(linkname),
1009 "/proc/%d/fd/%d", thread->pid_, fd);
1010 } else {
1011 scnprintf(linkname, sizeof(linkname),
1012 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1013 }
1014
1015 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1016 return -1;
1017
1018 ret = readlink(linkname, pathname, sizeof(pathname));
1019
1020 if (ret < 0 || ret > st.st_size)
1021 return -1;
1022
1023 pathname[ret] = '\0';
1024 return trace__set_fd_pathname(thread, fd, pathname);
1025}
1026
c522739d
ACM
1027static const char *thread__fd_path(struct thread *thread, int fd,
1028 struct trace *trace)
75b757ca 1029{
89dceb22 1030 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
1031
1032 if (ttrace == NULL)
1033 return NULL;
1034
1035 if (fd < 0)
1036 return NULL;
1037
cdcd1e6b 1038 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
1039 if (!trace->live)
1040 return NULL;
1041 ++trace->stats.proc_getname;
cdcd1e6b 1042 if (thread__read_fd_path(thread, fd))
c522739d
ACM
1043 return NULL;
1044 }
75b757ca
ACM
1045
1046 return ttrace->paths.table[fd];
1047}
1048
fc65eb82 1049size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1050{
1051 int fd = arg->val;
1052 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1053 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1054
1055 if (path)
1056 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1057
1058 return printed;
1059}
1060
0a2f7540
ACM
1061size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1062{
1063 size_t printed = scnprintf(bf, size, "%d", fd);
1064 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1065
1066 if (thread) {
1067 const char *path = thread__fd_path(thread, fd, trace);
1068
1069 if (path)
1070 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1071
1072 thread__put(thread);
1073 }
1074
1075 return printed;
1076}
1077
75b757ca
ACM
1078static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1079 struct syscall_arg *arg)
1080{
1081 int fd = arg->val;
1082 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1083 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1084
04662523
ACM
1085 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1086 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1087
1088 return printed;
1089}
1090
f994592d
ACM
1091static void thread__set_filename_pos(struct thread *thread, const char *bf,
1092 unsigned long ptr)
1093{
1094 struct thread_trace *ttrace = thread__priv(thread);
1095
1096 ttrace->filename.ptr = ptr;
1097 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1098}
1099
1100static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1101 struct syscall_arg *arg)
1102{
1103 unsigned long ptr = arg->val;
1104
1105 if (!arg->trace->vfs_getname)
1106 return scnprintf(bf, size, "%#x", ptr);
1107
1108 thread__set_filename_pos(arg->thread, bf, ptr);
1109 return 0;
1110}
1111
ae9ed035
ACM
1112static bool trace__filter_duration(struct trace *trace, double t)
1113{
1114 return t < (trace->duration_filter * NSEC_PER_MSEC);
1115}
1116
fd2b2975 1117static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1118{
1119 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1120
60c907ab 1121 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1122}
1123
fd2b2975
ACM
1124/*
1125 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1126 * using ttrace->entry_time for a thread that receives a sys_exit without
1127 * first having received a sys_enter ("poll" issued before tracing session
1128 * starts, lost sys_enter exit due to ring buffer overflow).
1129 */
1130static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1131{
1132 if (tstamp > 0)
1133 return __trace__fprintf_tstamp(trace, tstamp, fp);
1134
1135 return fprintf(fp, " ? ");
1136}
1137
f15eb531 1138static bool done = false;
ba209f85 1139static bool interrupted = false;
f15eb531 1140
ba209f85 1141static void sig_handler(int sig)
f15eb531
NK
1142{
1143 done = true;
ba209f85 1144 interrupted = sig == SIGINT;
f15eb531
NK
1145}
1146
6dcbd212 1147static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
752fde44 1148{
6dcbd212 1149 size_t printed = 0;
752fde44 1150
50c95cbd
ACM
1151 if (trace->multiple_threads) {
1152 if (trace->show_comm)
1902efe7 1153 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1154 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1155 }
752fde44
ACM
1156
1157 return printed;
1158}
1159
6dcbd212
ACM
1160static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1161 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1162{
1163 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1164 printed += fprintf_duration(duration, duration_calculated, fp);
1165 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1166}
1167
c24ff998 1168static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1169 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1170{
1171 int ret = 0;
1172
1173 switch (event->header.type) {
1174 case PERF_RECORD_LOST:
c24ff998 1175 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1176 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1177 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1178 break;
752fde44 1179 default:
162f0bef 1180 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1181 break;
1182 }
1183
1184 return ret;
1185}
1186
c24ff998 1187static int trace__tool_process(struct perf_tool *tool,
752fde44 1188 union perf_event *event,
162f0bef 1189 struct perf_sample *sample,
752fde44
ACM
1190 struct machine *machine)
1191{
c24ff998 1192 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1193 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1194}
1195
caf8a0d0
ACM
1196static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1197{
1198 struct machine *machine = vmachine;
1199
1200 if (machine->kptr_restrict_warned)
1201 return NULL;
1202
1203 if (symbol_conf.kptr_restrict) {
1204 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1205 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1206 "Kernel samples will not be resolved.\n");
1207 machine->kptr_restrict_warned = true;
1208 return NULL;
1209 }
1210
1211 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1212}
1213
752fde44
ACM
1214static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1215{
0a7e6d1b 1216 int err = symbol__init(NULL);
752fde44
ACM
1217
1218 if (err)
1219 return err;
1220
8fb598e5
DA
1221 trace->host = machine__new_host();
1222 if (trace->host == NULL)
1223 return -ENOMEM;
752fde44 1224
cbd5c178
AV
1225 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1226 if (err < 0)
1227 goto out;
706c3da4 1228
a33fbd56 1229 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76 1230 evlist->threads, trace__tool_process, false,
340b47f5 1231 trace->opts.proc_map_timeout, 1);
cbd5c178 1232out:
752fde44
ACM
1233 if (err)
1234 symbol__exit();
1235
1236 return err;
1237}
1238
33974a41
AV
1239static void trace__symbols__exit(struct trace *trace)
1240{
1241 machine__exit(trace->host);
1242 trace->host = NULL;
1243
1244 symbol__exit();
1245}
1246
5e58fcfa 1247static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
13d4ff3e 1248{
5e58fcfa 1249 int idx;
13d4ff3e 1250
332337da
ACM
1251 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1252 nr_args = sc->fmt->nr_args;
1253
5e58fcfa 1254 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
82d4a110 1255 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1256 return -1;
1257
5e58fcfa
ACM
1258 for (idx = 0; idx < nr_args; ++idx) {
1259 if (sc->fmt)
82d4a110 1260 sc->arg_fmt[idx] = sc->fmt->arg[idx];
5e58fcfa 1261 }
82d4a110 1262
5e58fcfa
ACM
1263 sc->nr_args = nr_args;
1264 return 0;
1265}
1266
1267static int syscall__set_arg_fmts(struct syscall *sc)
1268{
7a983a0f 1269 struct format_field *field, *last_field = NULL;
5e58fcfa
ACM
1270 int idx = 0, len;
1271
1272 for (field = sc->args; field; field = field->next, ++idx) {
7a983a0f
ACM
1273 last_field = field;
1274
5e58fcfa
ACM
1275 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1276 continue;
1f115cb7 1277
82d4a110 1278 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1279 (strcmp(field->name, "filename") == 0 ||
1280 strcmp(field->name, "path") == 0 ||
1281 strcmp(field->name, "pathname") == 0))
82d4a110 1282 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1283 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1284 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1285 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1286 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1287 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1288 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1289 else if ((strcmp(field->type, "int") == 0 ||
1290 strcmp(field->type, "unsigned int") == 0 ||
1291 strcmp(field->type, "long") == 0) &&
1292 (len = strlen(field->name)) >= 2 &&
1293 strcmp(field->name + len - 2, "fd") == 0) {
1294 /*
1295 * /sys/kernel/tracing/events/syscalls/sys_enter*
1296 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1297 * 65 int
1298 * 23 unsigned int
1299 * 7 unsigned long
1300 */
82d4a110 1301 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1302 }
13d4ff3e
ACM
1303 }
1304
7a983a0f
ACM
1305 if (last_field)
1306 sc->args_size = last_field->offset + last_field->size;
1307
13d4ff3e
ACM
1308 return 0;
1309}
1310
514f1c67
ACM
1311static int trace__read_syscall_info(struct trace *trace, int id)
1312{
1313 char tp_name[128];
1314 struct syscall *sc;
fd0db102 1315 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1316
1317 if (name == NULL)
1318 return -1;
514f1c67
ACM
1319
1320 if (id > trace->syscalls.max) {
1321 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1322
1323 if (nsyscalls == NULL)
1324 return -1;
1325
1326 if (trace->syscalls.max != -1) {
1327 memset(nsyscalls + trace->syscalls.max + 1, 0,
1328 (id - trace->syscalls.max) * sizeof(*sc));
1329 } else {
1330 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1331 }
1332
1333 trace->syscalls.table = nsyscalls;
1334 trace->syscalls.max = id;
1335 }
1336
1337 sc = trace->syscalls.table + id;
3a531260 1338 sc->name = name;
2ae3a312 1339
3a531260 1340 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1341
aec1930b 1342 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1343 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1344
8dd2a131 1345 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1346 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1347 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1348 }
514f1c67 1349
5e58fcfa
ACM
1350 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1351 return -1;
1352
8dd2a131 1353 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1354 return -1;
1355
f208bd8d 1356 sc->args = sc->tp_format->format.fields;
c42de706
TS
1357 /*
1358 * We need to check and discard the first variable '__syscall_nr'
1359 * or 'nr' that mean the syscall number. It is needless here.
1360 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1361 */
1362 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1363 sc->args = sc->args->next;
1364 --sc->nr_args;
1365 }
1366
5089f20e 1367 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
6a648b53 1368 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
5089f20e 1369
13d4ff3e 1370 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1371}
1372
d0cc439b
ACM
1373static int trace__validate_ev_qualifier(struct trace *trace)
1374{
8b3ce757 1375 int err = 0, i;
27702bcf 1376 size_t nr_allocated;
d0cc439b
ACM
1377 struct str_node *pos;
1378
8b3ce757
ACM
1379 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1380 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1381 sizeof(trace->ev_qualifier_ids.entries[0]));
1382
1383 if (trace->ev_qualifier_ids.entries == NULL) {
1384 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1385 trace->output);
1386 err = -EINVAL;
1387 goto out;
1388 }
1389
27702bcf 1390 nr_allocated = trace->ev_qualifier_ids.nr;
8b3ce757
ACM
1391 i = 0;
1392
602a1f4d 1393 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1394 const char *sc = pos->s;
27702bcf 1395 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
d0cc439b 1396
8b3ce757 1397 if (id < 0) {
27702bcf
ACM
1398 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1399 if (id >= 0)
1400 goto matches;
1401
d0cc439b
ACM
1402 if (err == 0) {
1403 fputs("Error:\tInvalid syscall ", trace->output);
1404 err = -EINVAL;
1405 } else {
1406 fputs(", ", trace->output);
1407 }
1408
1409 fputs(sc, trace->output);
1410 }
27702bcf 1411matches:
8b3ce757 1412 trace->ev_qualifier_ids.entries[i++] = id;
27702bcf
ACM
1413 if (match_next == -1)
1414 continue;
1415
1416 while (1) {
1417 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1418 if (id < 0)
1419 break;
1420 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1421 void *entries;
1422
1423 nr_allocated += 8;
1424 entries = realloc(trace->ev_qualifier_ids.entries,
1425 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1426 if (entries == NULL) {
1427 err = -ENOMEM;
1428 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1429 goto out_free;
1430 }
1431 trace->ev_qualifier_ids.entries = entries;
1432 }
1433 trace->ev_qualifier_ids.nr++;
1434 trace->ev_qualifier_ids.entries[i++] = id;
1435 }
d0cc439b
ACM
1436 }
1437
1438 if (err < 0) {
1439 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1440 "\nHint:\tand: 'man syscalls'\n", trace->output);
27702bcf 1441out_free:
8b3ce757
ACM
1442 zfree(&trace->ev_qualifier_ids.entries);
1443 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1444 }
8b3ce757 1445out:
d0cc439b
ACM
1446 return err;
1447}
1448
55d43bca
DA
1449/*
1450 * args is to be interpreted as a series of longs but we need to handle
1451 * 8-byte unaligned accesses. args points to raw_data within the event
1452 * and raw_data is guaranteed to be 8-byte unaligned because it is
1453 * preceded by raw_size which is a u32. So we need to copy args to a temp
1454 * variable to read it. Most notably this avoids extended load instructions
1455 * on unaligned addresses
1456 */
325f5091 1457unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
f9f83b33
ACM
1458{
1459 unsigned long val;
325f5091 1460 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
f9f83b33
ACM
1461
1462 memcpy(&val, p, sizeof(val));
1463 return val;
1464}
1465
c51bdfec
ACM
1466static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1467 struct syscall_arg *arg)
1468{
1469 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1470 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1471
1472 return scnprintf(bf, size, "arg%d: ", arg->idx);
1473}
1474
d032d79e
ACM
1475static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1476 struct syscall_arg *arg, unsigned long val)
1477{
1478 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1479 arg->val = val;
1480 if (sc->arg_fmt[arg->idx].parm)
1481 arg->parm = sc->arg_fmt[arg->idx].parm;
1482 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1483 }
1484 return scnprintf(bf, size, "%ld", val);
1485}
1486
752fde44 1487static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
7a983a0f
ACM
1488 unsigned char *args, void *augmented_args, int augmented_args_size,
1489 struct trace *trace, struct thread *thread)
514f1c67 1490{
514f1c67 1491 size_t printed = 0;
55d43bca 1492 unsigned long val;
d032d79e
ACM
1493 u8 bit = 1;
1494 struct syscall_arg arg = {
1495 .args = args,
7a983a0f
ACM
1496 .augmented = {
1497 .size = augmented_args_size,
1498 .args = augmented_args,
1499 },
d032d79e
ACM
1500 .idx = 0,
1501 .mask = 0,
1502 .trace = trace,
1503 .thread = thread,
1504 };
84486caa
ACM
1505 struct thread_trace *ttrace = thread__priv(thread);
1506
1507 /*
1508 * Things like fcntl will set this in its 'cmd' formatter to pick the
1509 * right formatter for the return value (an fd? file flags?), which is
1510 * not needed for syscalls that always return a given type, say an fd.
1511 */
1512 ttrace->ret_scnprintf = NULL;
514f1c67 1513
f208bd8d 1514 if (sc->args != NULL) {
514f1c67 1515 struct format_field *field;
6e7eeb51 1516
f208bd8d 1517 for (field = sc->args; field;
01533e97
ACM
1518 field = field->next, ++arg.idx, bit <<= 1) {
1519 if (arg.mask & bit)
6e7eeb51 1520 continue;
55d43bca 1521
f9f83b33 1522 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1523
4aa58232
ACM
1524 /*
1525 * Suppress this argument if its value is zero and
1526 * and we don't have a string associated in an
1527 * strarray for it.
1528 */
55d43bca 1529 if (val == 0 &&
82d4a110 1530 !(sc->arg_fmt &&
d47737d5
ACM
1531 (sc->arg_fmt[arg.idx].show_zero ||
1532 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1533 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1534 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1535 continue;
1536
752fde44 1537 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1538 "%s%s: ", printed ? ", " : "", field->name);
d032d79e 1539 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
514f1c67 1540 }
4c4d6e51
ACM
1541 } else if (IS_ERR(sc->tp_format)) {
1542 /*
1543 * If we managed to read the tracepoint /format file, then we
1544 * may end up not having any args, like with gettid(), so only
1545 * print the raw args when we didn't manage to read it.
1546 */
332337da 1547 while (arg.idx < sc->nr_args) {
d032d79e
ACM
1548 if (arg.mask & bit)
1549 goto next_arg;
1550 val = syscall_arg__val(&arg, arg.idx);
c51bdfec
ACM
1551 if (printed)
1552 printed += scnprintf(bf + printed, size - printed, ", ");
1553 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
d032d79e
ACM
1554 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1555next_arg:
1556 ++arg.idx;
1557 bit <<= 1;
514f1c67
ACM
1558 }
1559 }
1560
1561 return printed;
1562}
1563
ba3d7dee 1564typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1565 union perf_event *event,
ba3d7dee
ACM
1566 struct perf_sample *sample);
1567
1568static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1569 struct perf_evsel *evsel, int id)
ba3d7dee 1570{
ba3d7dee
ACM
1571
1572 if (id < 0) {
adaa18bf
ACM
1573
1574 /*
1575 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1576 * before that, leaving at a higher verbosity level till that is
1577 * explained. Reproduced with plain ftrace with:
1578 *
1579 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1580 * grep "NR -1 " /t/trace_pipe
1581 *
1582 * After generating some load on the machine.
1583 */
1584 if (verbose > 1) {
1585 static u64 n;
1586 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1587 id, perf_evsel__name(evsel), ++n);
1588 }
ba3d7dee
ACM
1589 return NULL;
1590 }
1591
1592 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1593 trace__read_syscall_info(trace, id))
1594 goto out_cant_read;
1595
1596 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1597 goto out_cant_read;
1598
1599 return &trace->syscalls.table[id];
1600
1601out_cant_read:
bb963e16 1602 if (verbose > 0) {
7c304ee0
ACM
1603 fprintf(trace->output, "Problems reading syscall %d", id);
1604 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1605 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1606 fputs(" information\n", trace->output);
1607 }
ba3d7dee
ACM
1608 return NULL;
1609}
1610
bf2575c1
DA
1611static void thread__update_stats(struct thread_trace *ttrace,
1612 int id, struct perf_sample *sample)
1613{
1614 struct int_node *inode;
1615 struct stats *stats;
1616 u64 duration = 0;
1617
1618 inode = intlist__findnew(ttrace->syscall_stats, id);
1619 if (inode == NULL)
1620 return;
1621
1622 stats = inode->priv;
1623 if (stats == NULL) {
1624 stats = malloc(sizeof(struct stats));
1625 if (stats == NULL)
1626 return;
1627 init_stats(stats);
1628 inode->priv = stats;
1629 }
1630
1631 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1632 duration = sample->time - ttrace->entry_time;
1633
1634 update_stats(stats, duration);
1635}
1636
522283fe 1637static int trace__printf_interrupted_entry(struct trace *trace)
e596663e
ACM
1638{
1639 struct thread_trace *ttrace;
e596663e
ACM
1640 size_t printed;
1641
0a6545bd 1642 if (trace->failure_only || trace->current == NULL)
e596663e
ACM
1643 return 0;
1644
1645 ttrace = thread__priv(trace->current);
1646
1647 if (!ttrace->entry_pending)
1648 return 0;
1649
522283fe 1650 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
e596663e
ACM
1651 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1652 ttrace->entry_pending = false;
1653
1654 return printed;
1655}
1656
591421e1
ACM
1657static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1658 struct perf_sample *sample, struct thread *thread)
1659{
1660 int printed = 0;
1661
1662 if (trace->print_sample) {
1663 double ts = (double)sample->time / NSEC_PER_MSEC;
1664
1665 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1666 perf_evsel__name(evsel), ts,
1667 thread__comm_str(thread),
1668 sample->pid, sample->tid, sample->cpu);
1669 }
1670
1671 return printed;
1672}
1673
ba3d7dee 1674static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1675 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1676 struct perf_sample *sample)
1677{
752fde44 1678 char *msg;
ba3d7dee 1679 void *args;
752fde44 1680 size_t printed = 0;
2ae3a312 1681 struct thread *thread;
b91fc39f 1682 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1683 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1684 struct thread_trace *ttrace;
1685
1686 if (sc == NULL)
1687 return -1;
ba3d7dee 1688
8fb598e5 1689 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1690 ttrace = thread__trace(thread, trace->output);
2ae3a312 1691 if (ttrace == NULL)
b91fc39f 1692 goto out_put;
ba3d7dee 1693
591421e1
ACM
1694 trace__fprintf_sample(trace, evsel, sample, thread);
1695
77170988 1696 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1697
1698 if (ttrace->entry_str == NULL) {
e4d44e83 1699 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1700 if (!ttrace->entry_str)
b91fc39f 1701 goto out_put;
752fde44
ACM
1702 }
1703
5cf9c84e 1704 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
522283fe 1705 trace__printf_interrupted_entry(trace);
e596663e 1706
752fde44
ACM
1707 ttrace->entry_time = sample->time;
1708 msg = ttrace->entry_str;
e4d44e83 1709 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1710
e4d44e83 1711 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
7a983a0f 1712 args, NULL, 0, trace, thread);
752fde44 1713
5089f20e 1714 if (sc->is_exit) {
0a6545bd 1715 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
fd2b2975 1716 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1717 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1718 }
7f4f8001 1719 } else {
752fde44 1720 ttrace->entry_pending = true;
7f4f8001
ACM
1721 /* See trace__vfs_getname & trace__sys_exit */
1722 ttrace->filename.pending_open = false;
1723 }
ba3d7dee 1724
f3b623b8
ACM
1725 if (trace->current != thread) {
1726 thread__put(trace->current);
1727 trace->current = thread__get(thread);
1728 }
b91fc39f
ACM
1729 err = 0;
1730out_put:
1731 thread__put(thread);
1732 return err;
ba3d7dee
ACM
1733}
1734
a98392bb
ACM
1735static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
1736 struct perf_sample *sample)
1737{
a98392bb
ACM
1738 struct thread_trace *ttrace;
1739 struct thread *thread;
f3acd886
ACM
1740 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1741 struct syscall *sc = trace__syscall_info(trace, evsel, id);
a98392bb 1742 char msg[1024];
7a983a0f
ACM
1743 void *args, *augmented_args = NULL;
1744 int augmented_args_size;
a98392bb 1745
a98392bb
ACM
1746 if (sc == NULL)
1747 return -1;
1748
1749 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1750 ttrace = thread__trace(thread, trace->output);
1751 /*
1752 * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
1753 * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
1754 */
1755 if (ttrace == NULL)
1756 goto out_put;
1757
f3acd886 1758 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
7a983a0f
ACM
1759 augmented_args_size = sample->raw_size - sc->args_size;
1760 if (augmented_args_size > 0)
1761 augmented_args = sample->raw_data + sc->args_size;
1762
1763 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
a98392bb
ACM
1764 fprintf(trace->output, "%s", msg);
1765 err = 0;
1766out_put:
1767 thread__put(thread);
1768 return err;
1769}
1770
5cf9c84e
ACM
1771static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1772 struct perf_sample *sample,
1773 struct callchain_cursor *cursor)
202ff968
ACM
1774{
1775 struct addr_location al;
3a9e9a47
RB
1776 int max_stack = evsel->attr.sample_max_stack ?
1777 evsel->attr.sample_max_stack :
1778 trace->max_stack;
5cf9c84e
ACM
1779
1780 if (machine__resolve(trace->host, &al, sample) < 0 ||
3a9e9a47 1781 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
5cf9c84e
ACM
1782 return -1;
1783
1784 return 0;
1785}
1786
1787static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1788{
202ff968 1789 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1790 const unsigned int print_opts = EVSEL__PRINT_SYM |
1791 EVSEL__PRINT_DSO |
1792 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1793
d327e60c 1794 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1795}
1796
092bd3cd
HB
1797static const char *errno_to_name(struct perf_evsel *evsel, int err)
1798{
1799 struct perf_env *env = perf_evsel__env(evsel);
1800 const char *arch_name = perf_env__arch(env);
1801
1802 return arch_syscalls__strerrno(arch_name, err);
1803}
1804
ba3d7dee 1805static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1806 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1807 struct perf_sample *sample)
1808{
2c82c3ad 1809 long ret;
60c907ab 1810 u64 duration = 0;
fd2b2975 1811 bool duration_calculated = false;
2ae3a312 1812 struct thread *thread;
5cf9c84e 1813 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1814 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1815 struct thread_trace *ttrace;
1816
1817 if (sc == NULL)
1818 return -1;
ba3d7dee 1819
8fb598e5 1820 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1821 ttrace = thread__trace(thread, trace->output);
2ae3a312 1822 if (ttrace == NULL)
b91fc39f 1823 goto out_put;
ba3d7dee 1824
591421e1
ACM
1825 trace__fprintf_sample(trace, evsel, sample, thread);
1826
bf2575c1
DA
1827 if (trace->summary)
1828 thread__update_stats(ttrace, id, sample);
1829
77170988 1830 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1831
6a648b53 1832 if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1833 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1834 ttrace->filename.pending_open = false;
c522739d
ACM
1835 ++trace->stats.vfs_getname;
1836 }
1837
ae9ed035 1838 if (ttrace->entry_time) {
60c907ab 1839 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1840 if (trace__filter_duration(trace, duration))
1841 goto out;
fd2b2975 1842 duration_calculated = true;
ae9ed035
ACM
1843 } else if (trace->duration_filter)
1844 goto out;
60c907ab 1845
5cf9c84e
ACM
1846 if (sample->callchain) {
1847 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1848 if (callchain_ret == 0) {
1849 if (callchain_cursor.nr < trace->min_stack)
1850 goto out;
1851 callchain_ret = 1;
1852 }
1853 }
1854
0a6545bd 1855 if (trace->summary_only || (ret >= 0 && trace->failure_only))
fd2eabaf
DA
1856 goto out;
1857
fd2b2975 1858 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1859
1860 if (ttrace->entry_pending) {
c24ff998 1861 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1862 } else {
c24ff998
ACM
1863 fprintf(trace->output, " ... [");
1864 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1865 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1866 }
1867
da3c9a44 1868 if (sc->fmt == NULL) {
1f63139c
ACM
1869 if (ret < 0)
1870 goto errno_print;
da3c9a44 1871signed_print:
6f8fe61e 1872 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1873 } else if (ret < 0) {
1874errno_print: {
942a91ed 1875 char bf[STRERR_BUFSIZE];
c8b5f2c9 1876 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
092bd3cd 1877 *e = errno_to_name(evsel, -ret);
ba3d7dee 1878
c24ff998 1879 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1880 }
da3c9a44 1881 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1882 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1883 else if (ttrace->ret_scnprintf) {
1884 char bf[1024];
7ee57434
ACM
1885 struct syscall_arg arg = {
1886 .val = ret,
1887 .thread = thread,
1888 .trace = trace,
1889 };
1890 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1891 ttrace->ret_scnprintf = NULL;
1892 fprintf(trace->output, ") = %s", bf);
1893 } else if (sc->fmt->hexret)
2c82c3ad 1894 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1895 else if (sc->fmt->errpid) {
1896 struct thread *child = machine__find_thread(trace->host, ret, ret);
1897
1898 if (child != NULL) {
1899 fprintf(trace->output, ") = %ld", ret);
1900 if (child->comm_set)
1901 fprintf(trace->output, " (%s)", thread__comm_str(child));
1902 thread__put(child);
1903 }
1904 } else
da3c9a44 1905 goto signed_print;
ba3d7dee 1906
c24ff998 1907 fputc('\n', trace->output);
566a0885 1908
5cf9c84e
ACM
1909 if (callchain_ret > 0)
1910 trace__fprintf_callchain(trace, sample);
1911 else if (callchain_ret < 0)
1912 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1913out:
752fde44 1914 ttrace->entry_pending = false;
b91fc39f
ACM
1915 err = 0;
1916out_put:
1917 thread__put(thread);
1918 return err;
ba3d7dee
ACM
1919}
1920
c522739d 1921static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1922 union perf_event *event __maybe_unused,
c522739d
ACM
1923 struct perf_sample *sample)
1924{
f994592d
ACM
1925 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1926 struct thread_trace *ttrace;
1927 size_t filename_len, entry_str_len, to_move;
1928 ssize_t remaining_space;
1929 char *pos;
7f4f8001 1930 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1931
1932 if (!thread)
1933 goto out;
1934
1935 ttrace = thread__priv(thread);
1936 if (!ttrace)
ef65e96e 1937 goto out_put;
f994592d 1938
7f4f8001 1939 filename_len = strlen(filename);
39f0e7a8 1940 if (filename_len == 0)
ef65e96e 1941 goto out_put;
7f4f8001
ACM
1942
1943 if (ttrace->filename.namelen < filename_len) {
1944 char *f = realloc(ttrace->filename.name, filename_len + 1);
1945
1946 if (f == NULL)
ef65e96e 1947 goto out_put;
7f4f8001
ACM
1948
1949 ttrace->filename.namelen = filename_len;
1950 ttrace->filename.name = f;
1951 }
1952
1953 strcpy(ttrace->filename.name, filename);
1954 ttrace->filename.pending_open = true;
1955
f994592d 1956 if (!ttrace->filename.ptr)
ef65e96e 1957 goto out_put;
f994592d
ACM
1958
1959 entry_str_len = strlen(ttrace->entry_str);
1960 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1961 if (remaining_space <= 0)
ef65e96e 1962 goto out_put;
f994592d 1963
f994592d
ACM
1964 if (filename_len > (size_t)remaining_space) {
1965 filename += filename_len - remaining_space;
1966 filename_len = remaining_space;
1967 }
1968
1969 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1970 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1971 memmove(pos + filename_len, pos, to_move);
1972 memcpy(pos, filename, filename_len);
1973
1974 ttrace->filename.ptr = 0;
1975 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1976out_put:
1977 thread__put(thread);
f994592d 1978out:
c522739d
ACM
1979 return 0;
1980}
1981
1302d88e 1982static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1983 union perf_event *event __maybe_unused,
1302d88e
ACM
1984 struct perf_sample *sample)
1985{
1986 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1987 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1988 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1989 sample->pid,
1990 sample->tid);
c24ff998 1991 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1992
1993 if (ttrace == NULL)
1994 goto out_dump;
1995
1996 ttrace->runtime_ms += runtime_ms;
1997 trace->runtime_ms += runtime_ms;
ef65e96e 1998out_put:
b91fc39f 1999 thread__put(thread);
1302d88e
ACM
2000 return 0;
2001
2002out_dump:
c24ff998 2003 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
2004 evsel->name,
2005 perf_evsel__strval(evsel, sample, "comm"),
2006 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2007 runtime,
2008 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 2009 goto out_put;
1302d88e
ACM
2010}
2011
923d0c9a
ACM
2012static int bpf_output__printer(enum binary_printer_ops op,
2013 unsigned int val, void *extra __maybe_unused, FILE *fp)
1d6c9407 2014{
1d6c9407
WN
2015 unsigned char ch = (unsigned char)val;
2016
2017 switch (op) {
2018 case BINARY_PRINT_CHAR_DATA:
923d0c9a 2019 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1d6c9407
WN
2020 case BINARY_PRINT_DATA_BEGIN:
2021 case BINARY_PRINT_LINE_BEGIN:
2022 case BINARY_PRINT_ADDR:
2023 case BINARY_PRINT_NUM_DATA:
2024 case BINARY_PRINT_NUM_PAD:
2025 case BINARY_PRINT_SEP:
2026 case BINARY_PRINT_CHAR_PAD:
2027 case BINARY_PRINT_LINE_END:
2028 case BINARY_PRINT_DATA_END:
2029 default:
2030 break;
2031 }
923d0c9a
ACM
2032
2033 return 0;
1d6c9407
WN
2034}
2035
2036static void bpf_output__fprintf(struct trace *trace,
2037 struct perf_sample *sample)
2038{
923d0c9a
ACM
2039 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2040 bpf_output__printer, NULL, trace->output);
1d6c9407
WN
2041}
2042
14a052df
ACM
2043static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2044 union perf_event *event __maybe_unused,
2045 struct perf_sample *sample)
2046{
7ad35615
ACM
2047 int callchain_ret = 0;
2048
2049 if (sample->callchain) {
2050 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2051 if (callchain_ret == 0) {
2052 if (callchain_cursor.nr < trace->min_stack)
2053 goto out;
2054 callchain_ret = 1;
2055 }
2056 }
2057
522283fe 2058 trace__printf_interrupted_entry(trace);
14a052df 2059 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
2060
2061 if (trace->trace_syscalls)
2062 fprintf(trace->output, "( ): ");
2063
1cdf618f
ACM
2064 if (evsel == trace->syscalls.events.augmented) {
2065 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2066 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2067
2068 if (sc) {
2069 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2070
2071 if (thread) {
2072 fprintf(trace->output, "%s(", sc->name);
2073 trace__fprintf_sys_enter(trace, evsel, sample);
2074 fputc(')', trace->output);
2075 thread__put(thread);
2076 goto newline;
2077 }
2078 }
2079
2080 /*
2081 * XXX: Not having the associated syscall info or not finding/adding
2082 * the thread should never happen, but if it does...
2083 * fall thru and print it as a bpf_output event.
2084 */
2085 }
2086
0808921a 2087 fprintf(trace->output, "%s:", evsel->name);
14a052df 2088
1d6c9407 2089 if (perf_evsel__is_bpf_output(evsel)) {
1cdf618f 2090 bpf_output__fprintf(trace, sample);
1d6c9407 2091 } else if (evsel->tp_format) {
a98392bb
ACM
2092 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2093 trace__fprintf_sys_enter(trace, evsel, sample)) {
2094 event_format__fprintf(evsel->tp_format, sample->cpu,
2095 sample->raw_data, sample->raw_size,
2096 trace->output);
2097 }
14a052df
ACM
2098 }
2099
1cdf618f 2100newline:
51125a29 2101 fprintf(trace->output, "\n");
202ff968 2102
7ad35615
ACM
2103 if (callchain_ret > 0)
2104 trace__fprintf_callchain(trace, sample);
2105 else if (callchain_ret < 0)
2106 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2107out:
14a052df
ACM
2108 return 0;
2109}
2110
598d02c5
SF
2111static void print_location(FILE *f, struct perf_sample *sample,
2112 struct addr_location *al,
2113 bool print_dso, bool print_sym)
2114{
2115
bb963e16 2116 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
2117 fprintf(f, "%s@", al->map->dso->long_name);
2118
bb963e16 2119 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 2120 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
2121 al->addr - al->sym->start);
2122 else if (al->map)
4414a3c5 2123 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 2124 else
4414a3c5 2125 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
2126}
2127
2128static int trace__pgfault(struct trace *trace,
2129 struct perf_evsel *evsel,
473398a2 2130 union perf_event *event __maybe_unused,
598d02c5
SF
2131 struct perf_sample *sample)
2132{
2133 struct thread *thread;
598d02c5
SF
2134 struct addr_location al;
2135 char map_type = 'd';
a2ea67d7 2136 struct thread_trace *ttrace;
b91fc39f 2137 int err = -1;
1df54290 2138 int callchain_ret = 0;
598d02c5
SF
2139
2140 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
2141
2142 if (sample->callchain) {
2143 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2144 if (callchain_ret == 0) {
2145 if (callchain_cursor.nr < trace->min_stack)
2146 goto out_put;
2147 callchain_ret = 1;
2148 }
2149 }
2150
a2ea67d7
SF
2151 ttrace = thread__trace(thread, trace->output);
2152 if (ttrace == NULL)
b91fc39f 2153 goto out_put;
a2ea67d7
SF
2154
2155 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2156 ttrace->pfmaj++;
2157 else
2158 ttrace->pfmin++;
2159
2160 if (trace->summary_only)
b91fc39f 2161 goto out;
598d02c5 2162
4546263d 2163 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
598d02c5 2164
fd2b2975 2165 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
2166
2167 fprintf(trace->output, "%sfault [",
2168 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2169 "maj" : "min");
2170
2171 print_location(trace->output, sample, &al, false, true);
2172
2173 fprintf(trace->output, "] => ");
2174
117d3c24 2175 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2176
2177 if (!al.map) {
4546263d 2178 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
598d02c5
SF
2179
2180 if (al.map)
2181 map_type = 'x';
2182 else
2183 map_type = '?';
2184 }
2185
2186 print_location(trace->output, sample, &al, true, false);
2187
2188 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2189
1df54290
ACM
2190 if (callchain_ret > 0)
2191 trace__fprintf_callchain(trace, sample);
2192 else if (callchain_ret < 0)
2193 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2194out:
2195 err = 0;
2196out_put:
2197 thread__put(thread);
2198 return err;
598d02c5
SF
2199}
2200
e6001980 2201static void trace__set_base_time(struct trace *trace,
8a07a809 2202 struct perf_evsel *evsel,
e6001980
ACM
2203 struct perf_sample *sample)
2204{
8a07a809
ACM
2205 /*
2206 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2207 * and don't use sample->time unconditionally, we may end up having
2208 * some other event in the future without PERF_SAMPLE_TIME for good
2209 * reason, i.e. we may not be interested in its timestamps, just in
2210 * it taking place, picking some piece of information when it
2211 * appears in our event stream (vfs_getname comes to mind).
2212 */
2213 if (trace->base_time == 0 && !trace->full_time &&
2214 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2215 trace->base_time = sample->time;
2216}
2217
6810fc91 2218static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2219 union perf_event *event,
6810fc91
DA
2220 struct perf_sample *sample,
2221 struct perf_evsel *evsel,
2222 struct machine *machine __maybe_unused)
2223{
2224 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2225 struct thread *thread;
6810fc91
DA
2226 int err = 0;
2227
744a9719 2228 tracepoint_handler handler = evsel->handler;
6810fc91 2229
aa07df6e
DA
2230 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2231 if (thread && thread__is_filtered(thread))
ef65e96e 2232 goto out;
bdc89661 2233
e6001980 2234 trace__set_base_time(trace, evsel, sample);
6810fc91 2235
3160565f
DA
2236 if (handler) {
2237 ++trace->nr_events;
0c82adcf 2238 handler(trace, evsel, event, sample);
3160565f 2239 }
ef65e96e
ACM
2240out:
2241 thread__put(thread);
6810fc91
DA
2242 return err;
2243}
2244
1e28fe0a 2245static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2246{
2247 unsigned int rec_argc, i, j;
2248 const char **rec_argv;
2249 const char * const record_args[] = {
2250 "record",
2251 "-R",
2252 "-m", "1024",
2253 "-c", "1",
5e2485b1
DA
2254 };
2255
1e28fe0a
SF
2256 const char * const sc_args[] = { "-e", };
2257 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2258 const char * const majpf_args[] = { "-e", "major-faults" };
2259 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2260 const char * const minpf_args[] = { "-e", "minor-faults" };
2261 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2262
9aca7f17 2263 /* +1 is for the event string below */
1e28fe0a
SF
2264 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2265 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2266 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2267
2268 if (rec_argv == NULL)
2269 return -ENOMEM;
2270
1e28fe0a 2271 j = 0;
5e2485b1 2272 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2273 rec_argv[j++] = record_args[i];
2274
e281a960
SF
2275 if (trace->trace_syscalls) {
2276 for (i = 0; i < sc_args_nr; i++)
2277 rec_argv[j++] = sc_args[i];
2278
2279 /* event string may be different for older kernels - e.g., RHEL6 */
2280 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2281 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2282 else if (is_valid_tracepoint("syscalls:sys_enter"))
2283 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2284 else {
2285 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
c896f85a 2286 free(rec_argv);
e281a960
SF
2287 return -1;
2288 }
9aca7f17 2289 }
9aca7f17 2290
1e28fe0a
SF
2291 if (trace->trace_pgfaults & TRACE_PFMAJ)
2292 for (i = 0; i < majpf_args_nr; i++)
2293 rec_argv[j++] = majpf_args[i];
2294
2295 if (trace->trace_pgfaults & TRACE_PFMIN)
2296 for (i = 0; i < minpf_args_nr; i++)
2297 rec_argv[j++] = minpf_args[i];
2298
2299 for (i = 0; i < (unsigned int)argc; i++)
2300 rec_argv[j++] = argv[i];
5e2485b1 2301
b0ad8ea6 2302 return cmd_record(j, rec_argv);
5e2485b1
DA
2303}
2304
bf2575c1
DA
2305static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2306
08c98776 2307static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2308{
ef503831 2309 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2310
2311 if (IS_ERR(evsel))
08c98776 2312 return false;
c522739d
ACM
2313
2314 if (perf_evsel__field(evsel, "pathname") == NULL) {
2315 perf_evsel__delete(evsel);
08c98776 2316 return false;
c522739d
ACM
2317 }
2318
744a9719 2319 evsel->handler = trace__vfs_getname;
c522739d 2320 perf_evlist__add(evlist, evsel);
08c98776 2321 return true;
c522739d
ACM
2322}
2323
0ae537cb 2324static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2325{
2326 struct perf_evsel *evsel;
2327 struct perf_event_attr attr = {
2328 .type = PERF_TYPE_SOFTWARE,
2329 .mmap_data = 1,
598d02c5
SF
2330 };
2331
2332 attr.config = config;
0524798c 2333 attr.sample_period = 1;
598d02c5
SF
2334
2335 event_attr_init(&attr);
2336
2337 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2338 if (evsel)
2339 evsel->handler = trace__pgfault;
598d02c5 2340
0ae537cb 2341 return evsel;
598d02c5
SF
2342}
2343
ddbb1b13
ACM
2344static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2345{
2346 const u32 type = event->header.type;
2347 struct perf_evsel *evsel;
2348
ddbb1b13
ACM
2349 if (type != PERF_RECORD_SAMPLE) {
2350 trace__process_event(trace, trace->host, event, sample);
2351 return;
2352 }
2353
2354 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2355 if (evsel == NULL) {
2356 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2357 return;
2358 }
2359
e6001980
ACM
2360 trace__set_base_time(trace, evsel, sample);
2361
ddbb1b13
ACM
2362 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2363 sample->raw_data == NULL) {
2364 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2365 perf_evsel__name(evsel), sample->tid,
2366 sample->cpu, sample->raw_size);
2367 } else {
2368 tracepoint_handler handler = evsel->handler;
2369 handler(trace, evsel, event, sample);
2370 }
2371}
2372
c27366f0
ACM
2373static int trace__add_syscall_newtp(struct trace *trace)
2374{
2375 int ret = -1;
2376 struct perf_evlist *evlist = trace->evlist;
2377 struct perf_evsel *sys_enter, *sys_exit;
2378
63f11c80 2379 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
c27366f0
ACM
2380 if (sys_enter == NULL)
2381 goto out;
2382
2383 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2384 goto out_delete_sys_enter;
2385
63f11c80 2386 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
c27366f0
ACM
2387 if (sys_exit == NULL)
2388 goto out_delete_sys_enter;
2389
2390 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2391 goto out_delete_sys_exit;
2392
08e26396
ACM
2393 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2394 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2395
c27366f0
ACM
2396 perf_evlist__add(evlist, sys_enter);
2397 perf_evlist__add(evlist, sys_exit);
2398
2ddd5c04 2399 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2400 /*
2401 * We're interested only in the user space callchain
2402 * leading to the syscall, allow overriding that for
2403 * debugging reasons using --kernel_syscall_callchains
2404 */
2405 sys_exit->attr.exclude_callchain_kernel = 1;
2406 }
2407
8b3ce757
ACM
2408 trace->syscalls.events.sys_enter = sys_enter;
2409 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2410
2411 ret = 0;
2412out:
2413 return ret;
2414
2415out_delete_sys_exit:
2416 perf_evsel__delete_priv(sys_exit);
2417out_delete_sys_enter:
2418 perf_evsel__delete_priv(sys_enter);
2419 goto out;
2420}
2421
19867b61
ACM
2422static int trace__set_ev_qualifier_filter(struct trace *trace)
2423{
2424 int err = -1;
b15d0a4c 2425 struct perf_evsel *sys_exit;
19867b61
ACM
2426 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2427 trace->ev_qualifier_ids.nr,
2428 trace->ev_qualifier_ids.entries);
2429
2430 if (filter == NULL)
2431 goto out_enomem;
2432
3541c034
MP
2433 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2434 filter)) {
b15d0a4c 2435 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2436 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2437 }
19867b61
ACM
2438
2439 free(filter);
2440out:
2441 return err;
2442out_enomem:
2443 errno = ENOMEM;
2444 goto out;
2445}
c27366f0 2446
dd1a5037
ACM
2447static int trace__set_filter_loop_pids(struct trace *trace)
2448{
082ab9a1 2449 unsigned int nr = 1;
dd1a5037
ACM
2450 pid_t pids[32] = {
2451 getpid(),
2452 };
082ab9a1
ACM
2453 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2454
2455 while (thread && nr < ARRAY_SIZE(pids)) {
2456 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2457
2458 if (parent == NULL)
2459 break;
2460
2461 if (!strcmp(thread__comm_str(parent), "sshd")) {
2462 pids[nr++] = parent->tid;
2463 break;
2464 }
2465 thread = parent;
2466 }
dd1a5037
ACM
2467
2468 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2469}
2470
f15eb531 2471static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2472{
14a052df 2473 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2474 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2475 int err = -1, i;
2476 unsigned long before;
f15eb531 2477 const bool forks = argc > 0;
46fb3c21 2478 bool draining = false;
514f1c67 2479
75b757ca
ACM
2480 trace->live = true;
2481
c27366f0 2482 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2483 goto out_error_raw_syscalls;
514f1c67 2484
e281a960 2485 if (trace->trace_syscalls)
08c98776 2486 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2487
0ae537cb
ACM
2488 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2489 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2490 if (pgfault_maj == NULL)
2491 goto out_error_mem;
08e26396 2492 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
0ae537cb 2493 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2494 }
598d02c5 2495
0ae537cb
ACM
2496 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2497 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2498 if (pgfault_min == NULL)
2499 goto out_error_mem;
08e26396 2500 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
0ae537cb
ACM
2501 perf_evlist__add(evlist, pgfault_min);
2502 }
598d02c5 2503
1302d88e 2504 if (trace->sched &&
2cc990ba
ACM
2505 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2506 trace__sched_stat_runtime))
2507 goto out_error_sched_stat_runtime;
1302d88e 2508
9ea42ba4
ACM
2509 /*
2510 * If a global cgroup was set, apply it to all the events without an
2511 * explicit cgroup. I.e.:
2512 *
2513 * trace -G A -e sched:*switch
2514 *
2515 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
2516 * _and_ sched:sched_switch to the 'A' cgroup, while:
2517 *
2518 * trace -e sched:*switch -G A
2519 *
2520 * will only set the sched:sched_switch event to the 'A' cgroup, all the
2521 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
2522 * a cgroup (on the root cgroup, sys wide, etc).
2523 *
2524 * Multiple cgroups:
2525 *
2526 * trace -G A -e sched:*switch -G B
2527 *
2528 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
2529 * to the 'B' cgroup.
2530 *
2531 * evlist__set_default_cgroup() grabs a reference of the passed cgroup
2532 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
2533 */
2534 if (trace->cgroup)
2535 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
2536
514f1c67
ACM
2537 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2538 if (err < 0) {
c24ff998 2539 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2540 goto out_delete_evlist;
2541 }
2542
752fde44
ACM
2543 err = trace__symbols_init(trace, evlist);
2544 if (err < 0) {
c24ff998 2545 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2546 goto out_delete_evlist;
752fde44
ACM
2547 }
2548
75d50117 2549 perf_evlist__config(evlist, &trace->opts, &callchain_param);
fde54b78 2550
f15eb531
NK
2551 signal(SIGCHLD, sig_handler);
2552 signal(SIGINT, sig_handler);
2553
2554 if (forks) {
6ef73ec4 2555 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2556 argv, false, NULL);
f15eb531 2557 if (err < 0) {
c24ff998 2558 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2559 goto out_delete_evlist;
f15eb531
NK
2560 }
2561 }
2562
514f1c67 2563 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2564 if (err < 0)
2565 goto out_error_open;
514f1c67 2566
ba504235
WN
2567 err = bpf__apply_obj_config();
2568 if (err) {
2569 char errbuf[BUFSIZ];
2570
2571 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2572 pr_err("ERROR: Apply config to BPF failed: %s\n",
2573 errbuf);
2574 goto out_error_open;
2575 }
2576
241b057c
ACM
2577 /*
2578 * Better not use !target__has_task() here because we need to cover the
2579 * case where no threads were specified in the command line, but a
2580 * workload was, and in that case we will fill in the thread_map when
2581 * we fork the workload in perf_evlist__prepare_workload.
2582 */
f078c385
ACM
2583 if (trace->filter_pids.nr > 0)
2584 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2585 else if (thread_map__pid(evlist->threads, 0) == -1)
dd1a5037 2586 err = trace__set_filter_loop_pids(trace);
f078c385 2587
94ad89bc
ACM
2588 if (err < 0)
2589 goto out_error_mem;
2590
19867b61
ACM
2591 if (trace->ev_qualifier_ids.nr > 0) {
2592 err = trace__set_ev_qualifier_filter(trace);
2593 if (err < 0)
2594 goto out_errno;
19867b61 2595
2e5e5f87
ACM
2596 pr_debug("event qualifier tracepoint filter: %s\n",
2597 trace->syscalls.events.sys_exit->filter);
2598 }
19867b61 2599
94ad89bc
ACM
2600 err = perf_evlist__apply_filters(evlist, &evsel);
2601 if (err < 0)
2602 goto out_error_apply_filters;
241b057c 2603
f74b9d3a 2604 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
e09b18d4
ACM
2605 if (err < 0)
2606 goto out_error_mmap;
514f1c67 2607
e36b7821 2608 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2609 perf_evlist__enable(evlist);
2610
f15eb531
NK
2611 if (forks)
2612 perf_evlist__start_workload(evlist);
2613
e36b7821
AB
2614 if (trace->opts.initial_delay) {
2615 usleep(trace->opts.initial_delay * 1000);
2616 perf_evlist__enable(evlist);
2617 }
2618
e13798c7 2619 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2620 evlist->threads->nr > 1 ||
2621 perf_evlist__first(evlist)->attr.inherit;
bd3dda9a
ACM
2622
2623 /*
2624 * Now that we already used evsel->attr to ask the kernel to setup the
2625 * events, lets reuse evsel->attr.sample_max_stack as the limit in
2626 * trace__resolve_callchain(), allowing per-event max-stack settings
2627 * to override an explicitely set --max-stack global setting.
2628 */
2629 evlist__for_each_entry(evlist, evsel) {
27de9b2b 2630 if (evsel__has_callchain(evsel) &&
bd3dda9a
ACM
2631 evsel->attr.sample_max_stack == 0)
2632 evsel->attr.sample_max_stack = trace->max_stack;
2633 }
514f1c67 2634again:
efd5745e 2635 before = trace->nr_events;
514f1c67
ACM
2636
2637 for (i = 0; i < evlist->nr_mmaps; i++) {
2638 union perf_event *event;
d7f55c62 2639 struct perf_mmap *md;
514f1c67 2640
d7f55c62 2641 md = &evlist->mmap[i];
b9bae2c8 2642 if (perf_mmap__read_init(md) < 0)
d7f55c62
KL
2643 continue;
2644
0019dc87 2645 while ((event = perf_mmap__read_event(md)) != NULL) {
514f1c67 2646 struct perf_sample sample;
514f1c67 2647
efd5745e 2648 ++trace->nr_events;
514f1c67 2649
514f1c67
ACM
2650 err = perf_evlist__parse_sample(evlist, event, &sample);
2651 if (err) {
c24ff998 2652 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2653 goto next_event;
514f1c67
ACM
2654 }
2655
ddbb1b13 2656 trace__handle_event(trace, event, &sample);
8e50d384 2657next_event:
d6ace3df 2658 perf_mmap__consume(md);
20c5f10e 2659
ba209f85
ACM
2660 if (interrupted)
2661 goto out_disable;
02ac5421
ACM
2662
2663 if (done && !draining) {
2664 perf_evlist__disable(evlist);
2665 draining = true;
2666 }
514f1c67 2667 }
d7f55c62 2668 perf_mmap__read_done(md);
514f1c67
ACM
2669 }
2670
efd5745e 2671 if (trace->nr_events == before) {
ba209f85 2672 int timeout = done ? 100 : -1;
f15eb531 2673
46fb3c21
ACM
2674 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2675 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2676 draining = true;
2677
ba209f85 2678 goto again;
46fb3c21 2679 }
ba209f85
ACM
2680 } else {
2681 goto again;
f15eb531
NK
2682 }
2683
ba209f85 2684out_disable:
f3b623b8
ACM
2685 thread__zput(trace->current);
2686
ba209f85 2687 perf_evlist__disable(evlist);
514f1c67 2688
c522739d
ACM
2689 if (!err) {
2690 if (trace->summary)
2691 trace__fprintf_thread_summary(trace, trace->output);
2692
2693 if (trace->show_tool_stats) {
2694 fprintf(trace->output, "Stats:\n "
2695 " vfs_getname : %" PRIu64 "\n"
2696 " proc_getname: %" PRIu64 "\n",
2697 trace->stats.vfs_getname,
2698 trace->stats.proc_getname);
2699 }
2700 }
bf2575c1 2701
514f1c67 2702out_delete_evlist:
33974a41
AV
2703 trace__symbols__exit(trace);
2704
514f1c67 2705 perf_evlist__delete(evlist);
9ea42ba4 2706 cgroup__put(trace->cgroup);
14a052df 2707 trace->evlist = NULL;
75b757ca 2708 trace->live = false;
514f1c67 2709 return err;
6ef068cb
ACM
2710{
2711 char errbuf[BUFSIZ];
a8f23d8f 2712
2cc990ba 2713out_error_sched_stat_runtime:
988bdb31 2714 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2715 goto out_error;
2716
801c67b0 2717out_error_raw_syscalls:
988bdb31 2718 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2719 goto out_error;
2720
e09b18d4
ACM
2721out_error_mmap:
2722 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2723 goto out_error;
2724
a8f23d8f
ACM
2725out_error_open:
2726 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2727
2728out_error:
6ef068cb 2729 fprintf(trace->output, "%s\n", errbuf);
87f91868 2730 goto out_delete_evlist;
94ad89bc
ACM
2731
2732out_error_apply_filters:
2733 fprintf(trace->output,
2734 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2735 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2736 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2737 goto out_delete_evlist;
514f1c67 2738}
5ed08dae
ACM
2739out_error_mem:
2740 fprintf(trace->output, "Not enough memory to run!\n");
2741 goto out_delete_evlist;
19867b61
ACM
2742
2743out_errno:
2744 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2745 goto out_delete_evlist;
a8f23d8f 2746}
514f1c67 2747
6810fc91
DA
2748static int trace__replay(struct trace *trace)
2749{
2750 const struct perf_evsel_str_handler handlers[] = {
c522739d 2751 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2752 };
8ceb41d7 2753 struct perf_data data = {
eae8ad80
JO
2754 .file = {
2755 .path = input_name,
2756 },
2757 .mode = PERF_DATA_MODE_READ,
2758 .force = trace->force,
f5fc1412 2759 };
6810fc91 2760 struct perf_session *session;
003824e8 2761 struct perf_evsel *evsel;
6810fc91
DA
2762 int err = -1;
2763
2764 trace->tool.sample = trace__process_sample;
2765 trace->tool.mmap = perf_event__process_mmap;
384c671e 2766 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2767 trace->tool.comm = perf_event__process_comm;
2768 trace->tool.exit = perf_event__process_exit;
2769 trace->tool.fork = perf_event__process_fork;
2770 trace->tool.attr = perf_event__process_attr;
f3b3614a 2771 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2772 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2773 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2774
0a8cb85c 2775 trace->tool.ordered_events = true;
6810fc91
DA
2776 trace->tool.ordering_requires_timestamps = true;
2777
2778 /* add tid to output */
2779 trace->multiple_threads = true;
2780
8ceb41d7 2781 session = perf_session__new(&data, false, &trace->tool);
6810fc91 2782 if (session == NULL)
52e02834 2783 return -1;
6810fc91 2784
aa07df6e
DA
2785 if (trace->opts.target.pid)
2786 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2787
2788 if (trace->opts.target.tid)
2789 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2790
0a7e6d1b 2791 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2792 goto out;
2793
8fb598e5
DA
2794 trace->host = &session->machines.host;
2795
6810fc91
DA
2796 err = perf_session__set_tracepoints_handlers(session, handlers);
2797 if (err)
2798 goto out;
2799
003824e8
NK
2800 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2801 "raw_syscalls:sys_enter");
9aca7f17
DA
2802 /* older kernels have syscalls tp versus raw_syscalls */
2803 if (evsel == NULL)
2804 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2805 "syscalls:sys_enter");
003824e8 2806
e281a960 2807 if (evsel &&
63f11c80 2808 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
e281a960 2809 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2810 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2811 goto out;
2812 }
2813
2814 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2815 "raw_syscalls:sys_exit");
9aca7f17
DA
2816 if (evsel == NULL)
2817 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2818 "syscalls:sys_exit");
e281a960 2819 if (evsel &&
63f11c80 2820 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
e281a960 2821 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2822 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2823 goto out;
2824 }
2825
e5cadb93 2826 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2827 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2828 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2829 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2830 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2831 evsel->handler = trace__pgfault;
2832 }
2833
6810fc91
DA
2834 setup_pager();
2835
b7b61cbe 2836 err = perf_session__process_events(session);
6810fc91
DA
2837 if (err)
2838 pr_err("Failed to process events, error %d", err);
2839
bf2575c1
DA
2840 else if (trace->summary)
2841 trace__fprintf_thread_summary(trace, trace->output);
2842
6810fc91
DA
2843out:
2844 perf_session__delete(session);
2845
2846 return err;
2847}
2848
1302d88e
ACM
2849static size_t trace__fprintf_threads_header(FILE *fp)
2850{
2851 size_t printed;
2852
99ff7150 2853 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2854
2855 return printed;
2856}
2857
b535d523
ACM
2858DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2859 struct stats *stats;
2860 double msecs;
2861 int syscall;
2862)
2863{
2864 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2865 struct stats *stats = source->priv;
2866
2867 entry->syscall = source->i;
2868 entry->stats = stats;
2869 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2870}
2871
bf2575c1
DA
2872static size_t thread__dump_stats(struct thread_trace *ttrace,
2873 struct trace *trace, FILE *fp)
2874{
bf2575c1
DA
2875 size_t printed = 0;
2876 struct syscall *sc;
b535d523
ACM
2877 struct rb_node *nd;
2878 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2879
b535d523 2880 if (syscall_stats == NULL)
bf2575c1
DA
2881 return 0;
2882
2883 printed += fprintf(fp, "\n");
2884
834fd46d
MW
2885 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2886 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2887 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2888
98a91837 2889 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2890 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2891 if (stats) {
2892 double min = (double)(stats->min) / NSEC_PER_MSEC;
2893 double max = (double)(stats->max) / NSEC_PER_MSEC;
2894 double avg = avg_stats(stats);
2895 double pct;
2896 u64 n = (u64) stats->n;
2897
2898 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2899 avg /= NSEC_PER_MSEC;
2900
b535d523 2901 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2902 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2903 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2904 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2905 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2906 }
bf2575c1
DA
2907 }
2908
b535d523 2909 resort_rb__delete(syscall_stats);
bf2575c1 2910 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2911
2912 return printed;
2913}
2914
96c14451 2915static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2916{
96c14451 2917 size_t printed = 0;
89dceb22 2918 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2919 double ratio;
2920
2921 if (ttrace == NULL)
2922 return 0;
2923
2924 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2925
15e65c69 2926 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2927 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2928 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2929 if (ttrace->pfmaj)
2930 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2931 if (ttrace->pfmin)
2932 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2933 if (trace->sched)
2934 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2935 else if (fputc('\n', fp) != EOF)
2936 ++printed;
2937
bf2575c1 2938 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2939
96c14451
ACM
2940 return printed;
2941}
896cbb56 2942
96c14451
ACM
2943static unsigned long thread__nr_events(struct thread_trace *ttrace)
2944{
2945 return ttrace ? ttrace->nr_events : 0;
2946}
2947
2948DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2949 struct thread *thread;
2950)
2951{
2952 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2953}
2954
1302d88e
ACM
2955static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2956{
96c14451
ACM
2957 size_t printed = trace__fprintf_threads_header(fp);
2958 struct rb_node *nd;
91e467bc 2959 int i;
1302d88e 2960
91e467bc
KL
2961 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2962 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
96c14451 2963
91e467bc
KL
2964 if (threads == NULL) {
2965 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2966 return 0;
2967 }
896cbb56 2968
91e467bc
KL
2969 resort_rb__for_each_entry(nd, threads)
2970 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
96c14451 2971
91e467bc
KL
2972 resort_rb__delete(threads);
2973 }
96c14451 2974 return printed;
1302d88e
ACM
2975}
2976
ae9ed035
ACM
2977static int trace__set_duration(const struct option *opt, const char *str,
2978 int unset __maybe_unused)
2979{
2980 struct trace *trace = opt->value;
2981
2982 trace->duration_filter = atof(str);
2983 return 0;
2984}
2985
f078c385
ACM
2986static int trace__set_filter_pids(const struct option *opt, const char *str,
2987 int unset __maybe_unused)
2988{
2989 int ret = -1;
2990 size_t i;
2991 struct trace *trace = opt->value;
2992 /*
2993 * FIXME: introduce a intarray class, plain parse csv and create a
2994 * { int nr, int entries[] } struct...
2995 */
2996 struct intlist *list = intlist__new(str);
2997
2998 if (list == NULL)
2999 return -1;
3000
3001 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3002 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3003
3004 if (trace->filter_pids.entries == NULL)
3005 goto out;
3006
3007 trace->filter_pids.entries[0] = getpid();
3008
3009 for (i = 1; i < trace->filter_pids.nr; ++i)
3010 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3011
3012 intlist__delete(list);
3013 ret = 0;
3014out:
3015 return ret;
3016}
3017
c24ff998
ACM
3018static int trace__open_output(struct trace *trace, const char *filename)
3019{
3020 struct stat st;
3021
3022 if (!stat(filename, &st) && st.st_size) {
3023 char oldname[PATH_MAX];
3024
3025 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3026 unlink(oldname);
3027 rename(filename, oldname);
3028 }
3029
3030 trace->output = fopen(filename, "w");
3031
3032 return trace->output == NULL ? -errno : 0;
3033}
3034
598d02c5
SF
3035static int parse_pagefaults(const struct option *opt, const char *str,
3036 int unset __maybe_unused)
3037{
3038 int *trace_pgfaults = opt->value;
3039
3040 if (strcmp(str, "all") == 0)
3041 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3042 else if (strcmp(str, "maj") == 0)
3043 *trace_pgfaults |= TRACE_PFMAJ;
3044 else if (strcmp(str, "min") == 0)
3045 *trace_pgfaults |= TRACE_PFMIN;
3046 else
3047 return -1;
3048
3049 return 0;
3050}
3051
14a052df
ACM
3052static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3053{
3054 struct perf_evsel *evsel;
3055
e5cadb93 3056 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
3057 evsel->handler = handler;
3058}
3059
d32855fa
ACM
3060static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
3061{
3062 struct perf_evsel *evsel;
3063
3064 evlist__for_each_entry(evlist, evsel) {
3065 if (evsel->priv || !evsel->tp_format)
3066 continue;
3067
3068 if (strcmp(evsel->tp_format->system, "syscalls"))
3069 continue;
3070
3071 if (perf_evsel__init_syscall_tp(evsel))
3072 return -1;
3073
3074 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
3075 struct syscall_tp *sc = evsel->priv;
3076
3077 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
3078 return -1;
3079 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
3080 struct syscall_tp *sc = evsel->priv;
3081
3082 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
3083 return -1;
3084 }
3085 }
3086
3087 return 0;
3088}
3089
017037ff
ACM
3090/*
3091 * XXX: Hackish, just splitting the combined -e+--event (syscalls
3092 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
3093 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
3094 *
3095 * It'd be better to introduce a parse_options() variant that would return a
3096 * list with the terms it didn't match to an event...
3097 */
3098static int trace__parse_events_option(const struct option *opt, const char *str,
3099 int unset __maybe_unused)
3100{
3101 struct trace *trace = (struct trace *)opt->value;
3102 const char *s = str;
3103 char *sep = NULL, *lists[2] = { NULL, NULL, };
27702bcf 3104 int len = strlen(str) + 1, err = -1, list, idx;
017037ff
ACM
3105 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
3106 char group_name[PATH_MAX];
3107
3108 if (strace_groups_dir == NULL)
3109 return -1;
3110
3111 if (*s == '!') {
3112 ++s;
3113 trace->not_ev_qualifier = true;
3114 }
3115
3116 while (1) {
3117 if ((sep = strchr(s, ',')) != NULL)
3118 *sep = '\0';
3119
3120 list = 0;
27702bcf
ACM
3121 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
3122 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
017037ff
ACM
3123 list = 1;
3124 } else {
3125 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
3126 if (access(group_name, R_OK) == 0)
3127 list = 1;
3128 }
3129
3130 if (lists[list]) {
3131 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
3132 } else {
3133 lists[list] = malloc(len);
3134 if (lists[list] == NULL)
3135 goto out;
3136 strcpy(lists[list], s);
3137 }
3138
3139 if (!sep)
3140 break;
3141
3142 *sep = ',';
3143 s = sep + 1;
3144 }
3145
3146 if (lists[1] != NULL) {
3147 struct strlist_config slist_config = {
3148 .dirname = strace_groups_dir,
3149 };
3150
3151 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
3152 if (trace->ev_qualifier == NULL) {
3153 fputs("Not enough memory to parse event qualifier", trace->output);
3154 goto out;
3155 }
3156
3157 if (trace__validate_ev_qualifier(trace))
3158 goto out;
b912885a 3159 trace->trace_syscalls = true;
017037ff
ACM
3160 }
3161
3162 err = 0;
3163
3164 if (lists[0]) {
3165 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3166 "event selector. use 'perf list' to list available events",
3167 parse_events_option);
3168 err = parse_events_option(&o, lists[0], 0);
3169 }
3170out:
3171 if (sep)
3172 *sep = ',';
3173
3174 return err;
3175}
3176
9ea42ba4
ACM
3177static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
3178{
3179 struct trace *trace = opt->value;
3180
3181 if (!list_empty(&trace->evlist->entries))
3182 return parse_cgroups(opt, str, unset);
3183
3184 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
3185
3186 return 0;
3187}
3188
b0ad8ea6 3189int cmd_trace(int argc, const char **argv)
514f1c67 3190{
6fdd9cb7 3191 const char *trace_usage[] = {
f15eb531
NK
3192 "perf trace [<options>] [<command>]",
3193 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
3194 "perf trace record [<options>] [<command>]",
3195 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
3196 NULL
3197 };
3198 struct trace trace = {
514f1c67
ACM
3199 .syscalls = {
3200 . max = -1,
3201 },
3202 .opts = {
3203 .target = {
3204 .uid = UINT_MAX,
3205 .uses_mmap = true,
3206 },
3207 .user_freq = UINT_MAX,
3208 .user_interval = ULLONG_MAX,
509051ea 3209 .no_buffering = true,
38d5447d 3210 .mmap_pages = UINT_MAX,
9d9cad76 3211 .proc_map_timeout = 500,
514f1c67 3212 },
007d66a0 3213 .output = stderr,
50c95cbd 3214 .show_comm = true,
b912885a 3215 .trace_syscalls = false,
44621819 3216 .kernel_syscallchains = false,
05614993 3217 .max_stack = UINT_MAX,
514f1c67 3218 };
c24ff998 3219 const char *output_name = NULL;
514f1c67 3220 const struct option trace_options[] = {
017037ff
ACM
3221 OPT_CALLBACK('e', "event", &trace, "event",
3222 "event/syscall selector. use 'perf list' to list available events",
3223 trace__parse_events_option),
50c95cbd
ACM
3224 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3225 "show the thread COMM next to its id"),
c522739d 3226 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
3227 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
3228 trace__parse_events_option),
c24ff998 3229 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 3230 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
3231 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3232 "trace events on existing process id"),
ac9be8ee 3233 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 3234 "trace events on existing thread id"),
fa0e4ffe
ACM
3235 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3236 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 3237 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 3238 "system-wide collection from all CPUs"),
ac9be8ee 3239 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 3240 "list of cpus to monitor"),
6810fc91 3241 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 3242 "child tasks do not inherit counters"),
994a1f78
JO
3243 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3244 "number of mmap data pages",
3245 perf_evlist__parse_mmap_pages),
ac9be8ee 3246 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 3247 "user to profile"),
ae9ed035
ACM
3248 OPT_CALLBACK(0, "duration", &trace, "float",
3249 "show only events with duration > N.M ms",
3250 trace__set_duration),
1302d88e 3251 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 3252 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
3253 OPT_BOOLEAN('T', "time", &trace.full_time,
3254 "Show full timestamp, not time relative to first start"),
0a6545bd
ACM
3255 OPT_BOOLEAN(0, "failure", &trace.failure_only,
3256 "Show only syscalls that failed"),
fd2eabaf
DA
3257 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3258 "Show only syscall summary with statistics"),
3259 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3260 "Show all syscalls and summary with statistics"),
598d02c5
SF
3261 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3262 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 3263 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 3264 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
3265 OPT_CALLBACK(0, "call-graph", &trace.opts,
3266 "record_mode[,record_size]", record_callchain_help,
3267 &record_parse_callchain_opt),
44621819
ACM
3268 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3269 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
3270 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3271 "Set the minimum stack depth when parsing the callchain, "
3272 "anything below the specified depth will be ignored."),
c6d4a494
ACM
3273 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3274 "Set the maximum stack depth when parsing the callchain, "
3275 "anything beyond the specified depth will be ignored. "
4cb93446 3276 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
591421e1
ACM
3277 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
3278 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
9d9cad76
KL
3279 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3280 "per thread proc mmap processing timeout in ms"),
9ea42ba4
ACM
3281 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
3282 trace__parse_cgroups),
e36b7821
AB
3283 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3284 "ms to wait before starting measurement after program "
3285 "start"),
514f1c67
ACM
3286 OPT_END()
3287 };
ccd62a89 3288 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3289 bool mmap_pages_user_set = true;
78e890ea 3290 struct perf_evsel *evsel;
6fdd9cb7 3291 const char * const trace_subcommands[] = { "record", NULL };
78e890ea 3292 int err = -1;
32caf0d1 3293 char bf[BUFSIZ];
514f1c67 3294
4d08cb80
ACM
3295 signal(SIGSEGV, sighandler_dump_stack);
3296 signal(SIGFPE, sighandler_dump_stack);
3297
14a052df 3298 trace.evlist = perf_evlist__new();
fd0db102 3299 trace.sctbl = syscalltbl__new();
14a052df 3300
fd0db102 3301 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3302 pr_err("Not enough memory to run!\n");
ff8f695c 3303 err = -ENOMEM;
14a052df
ACM
3304 goto out;
3305 }
3306
6fdd9cb7
YS
3307 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3308 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3309
9ea42ba4
ACM
3310 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
3311 usage_with_options_msg(trace_usage, trace_options,
3312 "cgroup monitoring only available in system-wide mode");
3313 }
3314
78e890ea
ACM
3315 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
3316 if (IS_ERR(evsel)) {
3317 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
e0b6d2ef
ACM
3318 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
3319 goto out;
3320 }
3321
d3d1c4bd
ACM
3322 if (evsel) {
3323 if (perf_evsel__init_augmented_syscall_tp(evsel) ||
3324 perf_evsel__init_augmented_syscall_tp_args(evsel))
3325 goto out;
3326 trace.syscalls.events.augmented = evsel;
3327 }
3328
d7888573
WN
3329 err = bpf__setup_stdout(trace.evlist);
3330 if (err) {
3331 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3332 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3333 goto out;
3334 }
3335
59247e33
ACM
3336 err = -1;
3337
598d02c5
SF
3338 if (trace.trace_pgfaults) {
3339 trace.opts.sample_address = true;
3340 trace.opts.sample_time = true;
3341 }
3342
f3e459d1
ACM
3343 if (trace.opts.mmap_pages == UINT_MAX)
3344 mmap_pages_user_set = false;
3345
05614993 3346 if (trace.max_stack == UINT_MAX) {
029c75e5 3347 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
05614993
ACM
3348 max_stack_user_set = false;
3349 }
3350
3351#ifdef HAVE_DWARF_UNWIND_SUPPORT
75d50117 3352 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
05614993 3353 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
75d50117 3354 }
05614993
ACM
3355#endif
3356
2ddd5c04 3357 if (callchain_param.enabled) {
f3e459d1
ACM
3358 if (!mmap_pages_user_set && geteuid() == 0)
3359 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3360
566a0885 3361 symbol_conf.use_callchain = true;
f3e459d1 3362 }
566a0885 3363
d32855fa 3364 if (trace.evlist->nr_entries > 0) {
14a052df 3365 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
d32855fa
ACM
3366 if (evlist__set_syscall_tp_fields(trace.evlist)) {
3367 perror("failed to set syscalls:* tracepoint fields");
3368 goto out;
3369 }
3370 }
14a052df 3371
1e28fe0a
SF
3372 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3373 return trace__record(&trace, argc-1, &argv[1]);
3374
3375 /* summary_only implies summary option, but don't overwrite summary if set */
3376 if (trace.summary_only)
3377 trace.summary = trace.summary_only;
3378
726f3234
ACM
3379 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3380 trace.evlist->nr_entries == 0 /* Was --events used? */) {
b912885a 3381 trace.trace_syscalls = true;
59247e33
ACM
3382 }
3383
c24ff998
ACM
3384 if (output_name != NULL) {
3385 err = trace__open_output(&trace, output_name);
3386 if (err < 0) {
3387 perror("failed to create output file");
3388 goto out;
3389 }
3390 }
3391
602ad878 3392 err = target__validate(&trace.opts.target);
32caf0d1 3393 if (err) {
602ad878 3394 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3395 fprintf(trace.output, "%s", bf);
3396 goto out_close;
32caf0d1
NK
3397 }
3398
602ad878 3399 err = target__parse_uid(&trace.opts.target);
514f1c67 3400 if (err) {
602ad878 3401 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3402 fprintf(trace.output, "%s", bf);
3403 goto out_close;
514f1c67
ACM
3404 }
3405
602ad878 3406 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3407 trace.opts.target.system_wide = true;
3408
6810fc91
DA
3409 if (input_name)
3410 err = trace__replay(&trace);
3411 else
3412 err = trace__run(&trace, argc, argv);
1302d88e 3413
c24ff998
ACM
3414out_close:
3415 if (output_name != NULL)
3416 fclose(trace.output);
3417out:
1302d88e 3418 return err;
514f1c67 3419}