u64 base_time;
FILE *output;
unsigned long nr_events;
+ unsigned long nr_events_printed;
+ unsigned long max_events;
struct strlist *ev_qualifier;
struct {
size_t nr;
} stats;
unsigned int max_stack;
unsigned int min_stack;
+ bool raw_augmented_syscalls;
bool not_ev_qualifier;
bool live;
bool full_time;
struct syscall_arg_fmt {
size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+ unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
void *parm;
const char *name;
bool show_zero;
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
[2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
[3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
+ { .name = "mount",
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+ [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
+ .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
{ .name = "mprotect",
.arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
[2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
.arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "tkill",
.arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
- { .name = "umount2", .alias = "umount", },
+ { .name = "umount2", .alias = "umount",
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
{ .name = "uname", .alias = "newuname", },
{ .name = "unlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
}
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+{
+ int i, nmemb = ARRAY_SIZE(syscall_fmts);
+
+ for (i = 0; i < nmemb; ++i) {
+ if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
+ return &syscall_fmts[i];
+ }
+
+ return NULL;
+}
+
/*
* is_exit: is this "exit" or "exit_group"?
* is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
return scnprintf(bf, size, "arg%d: ", arg->idx);
}
+/*
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
+ * in tools/perf/trace/beauty/mount_flags.c
+ */
+static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
+{
+ if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
+ return sc->arg_fmt[arg->idx].mask_val(arg, val);
+
+ return val;
+}
+
static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
struct syscall_arg *arg, unsigned long val)
{
continue;
val = syscall_arg__val(&arg, arg.idx);
+ /*
+ * Some syscall args need some mask, most don't and
+ * return val untouched.
+ */
+ val = syscall__mask_val(sc, &arg, val);
/*
* Suppress this argument if its value is zero and
printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
ttrace->entry_pending = false;
+ ++trace->nr_events_printed;
+
return printed;
}
return printed;
}
-static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size)
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
{
void *augmented_args = NULL;
+ /*
+ * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
+ * and there we get all 6 syscall args plus the tracepoint common
+ * fields (sizeof(long)) and the syscall_nr (another long). So we check
+ * if that is the case and if so don't look after the sc->args_size,
+ * but always after the full raw_syscalls:sys_enter payload, which is
+ * fixed.
+ *
+ * We'll revisit this later to pass s->args_size to the BPF augmenter
+ * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
+ * copies only what we need for each syscall, like what happens when we
+ * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
+ * traffic to just what is needed for each syscall.
+ */
+ int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;
- *augmented_args_size = sample->raw_size - sc->args_size;
+ *augmented_args_size = sample->raw_size - args_size;
if (*augmented_args_size > 0)
- augmented_args = sample->raw_data + sc->args_size;
+ augmented_args = sample->raw_data + args_size;
return augmented_args;
}
* here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
*/
if (evsel != trace->syscalls.events.sys_enter)
- augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
ttrace->entry_time = sample->time;
msg = ttrace->entry_str;
printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
goto out_put;
args = perf_evsel__sc_tp_ptr(evsel, args, sample);
- augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
+ augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
fprintf(trace->output, "%s", msg);
err = 0;
int max_stack = evsel->attr.sample_max_stack ?
evsel->attr.sample_max_stack :
trace->max_stack;
+ int err;
- if (machine__resolve(trace->host, &al, sample) < 0 ||
- thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+ if (machine__resolve(trace->host, &al, sample) < 0)
return -1;
- return 0;
+ err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
+ addr_location__put(&al);
+ return err;
}
static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
fputc('\n', trace->output);
+ /*
+ * We only consider an 'event' for the sake of --max-events a non-filtered
+ * sys_enter + sys_exit and other tracepoint events.
+ */
+ if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
+ interrupted = true;
+
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
{
binary__fprintf(sample->raw_data, sample->raw_size, 8,
bpf_output__printer, NULL, trace->output);
+ ++trace->nr_events_printed;
}
static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
- struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ struct thread *thread;
int callchain_ret = 0;
+ /*
+ * Check if we called perf_evsel__disable(evsel) due to, for instance,
+ * this event's max_events having been hit and this is an entry coming
+ * from the ring buffer that we should discard, since the max events
+ * have already been considered/printed.
+ */
+ if (evsel->disabled)
+ return 0;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
if (sample->callchain) {
callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
event_format__fprintf(evsel->tp_format, sample->cpu,
sample->raw_data, sample->raw_size,
trace->output);
+ ++trace->nr_events_printed;
+
+ if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
+ perf_evsel__disable(evsel);
+ perf_evsel__close(evsel);
+ }
}
}
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
- thread__put(thread);
out:
+ thread__put(thread);
return 0;
}
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+
+ ++trace->nr_events_printed;
out:
err = 0;
out_put:
tracepoint_handler handler = evsel->handler;
handler(trace, evsel, event, sample);
}
+
+ if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
+ interrupted = true;
}
static int trace__add_syscall_newtp(struct trace *trace)
int timeout = done ? 100 : -1;
if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
- if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+ if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
draining = true;
goto again;
int len = strlen(str) + 1, err = -1, list, idx;
char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
char group_name[PATH_MAX];
+ struct syscall_fmt *fmt;
if (strace_groups_dir == NULL)
return -1;
if (syscalltbl__id(trace->sctbl, s) >= 0 ||
syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
list = 1;
+ goto do_concat;
+ }
+
+ fmt = syscall_fmt__find_by_alias(s);
+ if (fmt != NULL) {
+ list = 1;
+ s = fmt->name;
} else {
path__join(group_name, sizeof(group_name), strace_groups_dir, s);
if (access(group_name, R_OK) == 0)
list = 1;
}
-
+do_concat:
if (lists[list]) {
sprintf(lists[list] + strlen(lists[list]), ",%s", s);
} else {
.trace_syscalls = false,
.kernel_syscallchains = false,
.max_stack = UINT_MAX,
+ .max_events = ULONG_MAX,
};
const char *output_name = NULL;
const struct option trace_options[] = {
&record_parse_callchain_opt),
OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
"Show the kernel callchains on the syscall exit path"),
+ OPT_ULONG(0, "max-events", &trace.max_events,
+ "Set the maximum number of events to print, exit after that is reached. "),
OPT_UINTEGER(0, "min-stack", &trace.min_stack,
"Set the minimum stack depth when parsing the callchain, "
"anything below the specified depth will be ignored."),
evsel->handler = trace__sys_enter;
evlist__for_each_entry(trace.evlist, evsel) {
+ bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+
+ if (raw_syscalls_sys_exit) {
+ trace.raw_augmented_syscalls = true;
+ goto init_augmented_syscall_tp;
+ }
+
if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+init_augmented_syscall_tp:
perf_evsel__init_augmented_syscall_tp(evsel);
perf_evsel__init_augmented_syscall_tp_ret(evsel);
evsel->handler = trace__sys_exit;