perf tools: Fix attributes for '{}' defined event groups
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
78da39fa
BR
34#ifndef HAVE_ON_EXIT
35#ifndef ATEXIT_MAX
36#define ATEXIT_MAX 32
37#endif
38static int __on_exit_count = 0;
39typedef void (*on_exit_func_t) (int, void *);
40static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41static void *__on_exit_args[ATEXIT_MAX];
42static int __exitcode = 0;
43static void __handle_on_exit_funcs(void);
44static int on_exit(on_exit_func_t function, void *arg);
45#define exit(x) (exit)(__exitcode = (x))
46
47static int on_exit(on_exit_func_t function, void *arg)
48{
49 if (__on_exit_count == ATEXIT_MAX)
50 return -ENOMEM;
51 else if (__on_exit_count == 0)
52 atexit(__handle_on_exit_funcs);
53 __on_exit_funcs[__on_exit_count] = function;
54 __on_exit_args[__on_exit_count++] = arg;
55 return 0;
56}
57
58static void __handle_on_exit_funcs(void)
59{
60 int i;
61 for (i = 0; i < __on_exit_count; i++)
62 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63}
64#endif
65
7865e817
FW
66enum write_mode_t {
67 WRITE_FORCE,
68 WRITE_APPEND
69};
70
d20deb64 71struct perf_record {
45694aa7 72 struct perf_tool tool;
d20deb64
ACM
73 struct perf_record_opts opts;
74 u64 bytes_written;
75 const char *output_name;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
78 const char *progname;
79 int output;
80 unsigned int page_size;
81 int realtime_prio;
82 enum write_mode_t write_mode;
83 bool no_buildid;
84 bool no_buildid_cache;
85 bool force;
86 bool file_new;
87 bool append_file;
88 long samples;
89 off_t post_processing_offset;
0f82ebc4 90};
a21ca2ca 91
d20deb64 92static void advance_output(struct perf_record *rec, size_t size)
9215545e 93{
d20deb64 94 rec->bytes_written += size;
9215545e
TZ
95}
96
8d3eca20 97static int write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
98{
99 while (size) {
d20deb64 100 int ret = write(rec->output, buf, size);
f5970550 101
8d3eca20
DA
102 if (ret < 0) {
103 pr_err("failed to write\n");
104 return -1;
105 }
f5970550
PZ
106
107 size -= ret;
108 buf += ret;
109
d20deb64 110 rec->bytes_written += ret;
f5970550 111 }
8d3eca20
DA
112
113 return 0;
f5970550
PZ
114}
115
45694aa7 116static int process_synthesized_event(struct perf_tool *tool,
d20deb64 117 union perf_event *event,
1d037ca1
IT
118 struct perf_sample *sample __maybe_unused,
119 struct machine *machine __maybe_unused)
234fbbf5 120{
45694aa7 121 struct perf_record *rec = container_of(tool, struct perf_record, tool);
8d3eca20
DA
122 if (write_output(rec, event, event->header.size) < 0)
123 return -1;
124
234fbbf5
ACM
125 return 0;
126}
127
8d3eca20 128static int perf_record__mmap_read(struct perf_record *rec,
d20deb64 129 struct perf_mmap *md)
de9ac07b 130{
744bd8aa 131 unsigned int head = perf_mmap__read_head(md);
de9ac07b 132 unsigned int old = md->prev;
d20deb64 133 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
134 unsigned long size;
135 void *buf;
8d3eca20 136 int rc = 0;
de9ac07b 137
dc82009a 138 if (old == head)
8d3eca20 139 return 0;
dc82009a 140
d20deb64 141 rec->samples++;
de9ac07b
PZ
142
143 size = head - old;
144
145 if ((old & md->mask) + size != (head & md->mask)) {
146 buf = &data[old & md->mask];
147 size = md->mask + 1 - (old & md->mask);
148 old += size;
021e9f47 149
8d3eca20
DA
150 if (write_output(rec, buf, size) < 0) {
151 rc = -1;
152 goto out;
153 }
de9ac07b
PZ
154 }
155
156 buf = &data[old & md->mask];
157 size = head - old;
158 old += size;
021e9f47 159
8d3eca20
DA
160 if (write_output(rec, buf, size) < 0) {
161 rc = -1;
162 goto out;
163 }
de9ac07b
PZ
164
165 md->prev = old;
115d2d89 166 perf_mmap__write_tail(md, old);
8d3eca20
DA
167
168out:
169 return rc;
de9ac07b
PZ
170}
171
172static volatile int done = 0;
f7b7c26e 173static volatile int signr = -1;
33e49ea7 174static volatile int child_finished = 0;
de9ac07b 175
16c8a109 176static void sig_handler(int sig)
de9ac07b 177{
33e49ea7
AK
178 if (sig == SIGCHLD)
179 child_finished = 1;
180
16c8a109 181 done = 1;
f7b7c26e
PZ
182 signr = sig;
183}
184
1d037ca1 185static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
f7b7c26e 186{
d20deb64 187 struct perf_record *rec = arg;
33e49ea7
AK
188 int status;
189
d20deb64 190 if (rec->evlist->workload.pid > 0) {
33e49ea7 191 if (!child_finished)
d20deb64 192 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
193
194 wait(&status);
195 if (WIFSIGNALED(status))
d20deb64 196 psignal(WTERMSIG(status), rec->progname);
33e49ea7 197 }
933da83a 198
18483b81 199 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
200 return;
201
202 signal(signr, SIG_DFL);
203 kill(getpid(), signr);
de9ac07b
PZ
204}
205
a91e5431
ACM
206static bool perf_evlist__equal(struct perf_evlist *evlist,
207 struct perf_evlist *other)
208{
209 struct perf_evsel *pos, *pair;
210
211 if (evlist->nr_entries != other->nr_entries)
212 return false;
213
0c21f736 214 pair = perf_evlist__first(other);
a91e5431
ACM
215
216 list_for_each_entry(pos, &evlist->entries, node) {
217 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218 return false;
0c21f736 219 pair = perf_evsel__next(pair);
a91e5431
ACM
220 }
221
222 return true;
223}
224
8d3eca20 225static int perf_record__open(struct perf_record *rec)
dd7927f4 226{
6a4bb04c 227 struct perf_evsel *pos;
d20deb64
ACM
228 struct perf_evlist *evlist = rec->evlist;
229 struct perf_session *session = rec->session;
230 struct perf_record_opts *opts = &rec->opts;
8d3eca20 231 int rc = 0;
dd7927f4 232
cac21425
JO
233 /*
234 * Set the evsel leader links before we configure attributes,
235 * since some might depend on this info.
236 */
6a4bb04c 237 if (opts->group)
63dab225 238 perf_evlist__set_leader(evlist);
6a4bb04c 239
cac21425
JO
240 perf_evlist__config_attrs(evlist, opts);
241
dd7927f4
ACM
242 list_for_each_entry(pos, &evlist->entries, node) {
243 struct perf_event_attr *attr = &pos->attr;
244 /*
245 * Check if parse_single_tracepoint_event has already asked for
246 * PERF_SAMPLE_TIME.
247 *
248 * XXX this is kludgy but short term fix for problems introduced by
249 * eac23d1c that broke 'perf script' by having different sample_types
250 * when using multiple tracepoint events when we use a perf binary
251 * that tries to use sample_id_all on an older kernel.
252 *
253 * We need to move counter creation to perf_session, support
254 * different sample_types, etc.
255 */
256 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
d6d901c2 257
0c978128
ACM
258fallback_missing_features:
259 if (opts->exclude_guest_missing)
260 attr->exclude_guest = attr->exclude_host = 0;
dd7927f4 261retry_sample_id:
808e1226 262 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
dd7927f4 263try_again:
6a4bb04c 264 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
d6d901c2
ZY
265 int err = errno;
266
c286c419 267 if (err == EPERM || err == EACCES) {
b8631e6e 268 ui__error_paranoid();
8d3eca20
DA
269 rc = -err;
270 goto out;
bea03405 271 } else if (err == ENODEV && opts->target.cpu_list) {
8d3eca20
DA
272 pr_err("No such device - did you specify"
273 " an out-of-range profile CPU?\n");
274 rc = -err;
275 goto out;
0c978128
ACM
276 } else if (err == EINVAL) {
277 if (!opts->exclude_guest_missing &&
278 (attr->exclude_guest || attr->exclude_host)) {
279 pr_debug("Old kernel, cannot exclude "
280 "guest or host samples.\n");
281 opts->exclude_guest_missing = true;
282 goto fallback_missing_features;
808e1226 283 } else if (!opts->sample_id_all_missing) {
0c978128
ACM
284 /*
285 * Old kernel, no attr->sample_id_type_all field
286 */
808e1226 287 opts->sample_id_all_missing = true;
0c978128
ACM
288 if (!opts->sample_time && !opts->raw_samples && !time_needed)
289 attr->sample_type &= ~PERF_SAMPLE_TIME;
290
291 goto retry_sample_id;
292 }
d6d901c2 293 }
3da297a6 294
d6d901c2
ZY
295 /*
296 * If it's cycles then fall back to hrtimer
297 * based cpu-clock-tick sw counter, which
028d455b
DA
298 * is always available even if no PMU support.
299 *
300 * PPC returns ENXIO until 2.6.37 (behavior changed
301 * with commit b0a873e).
d6d901c2 302 */
028d455b
DA
303 if ((err == ENOENT || err == ENXIO)
304 && attr->type == PERF_TYPE_HARDWARE
d6d901c2
ZY
305 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
306
307 if (verbose)
ca6a4258
DA
308 ui__warning("The cycles event is not supported, "
309 "trying to fall back to cpu-clock-ticks\n");
d6d901c2
ZY
310 attr->type = PERF_TYPE_SOFTWARE;
311 attr->config = PERF_COUNT_SW_CPU_CLOCK;
d1cae34d
DA
312 if (pos->name) {
313 free(pos->name);
314 pos->name = NULL;
315 }
d6d901c2
ZY
316 goto try_again;
317 }
ca6a4258
DA
318
319 if (err == ENOENT) {
3780f488 320 ui__error("The %s event is not supported.\n",
7289f83c 321 perf_evsel__name(pos));
8d3eca20
DA
322 rc = -err;
323 goto out;
2305c82f
DA
324 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
325 ui__error("\'precise\' request may not be supported. "
326 "Try removing 'p' modifier\n");
327 rc = -err;
328 goto out;
ca6a4258
DA
329 }
330
d6d901c2 331 printf("\n");
1863fbbb
SE
332 error("sys_perf_event_open() syscall returned with %d "
333 "(%s) for event %s. /bin/dmesg may provide "
334 "additional information.\n",
335 err, strerror(err), perf_evsel__name(pos));
bfd45118
SK
336
337#if defined(__i386__) || defined(__x86_64__)
8d3eca20
DA
338 if (attr->type == PERF_TYPE_HARDWARE &&
339 err == EOPNOTSUPP) {
340 pr_err("No hardware sampling interrupt available."
341 " No APIC? If so then you can boot the kernel"
342 " with the \"lapic\" boot parameter to"
343 " force-enable it.\n");
344 rc = -err;
345 goto out;
346 }
bfd45118
SK
347#endif
348
8d3eca20
DA
349 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
350 rc = -err;
351 goto out;
c171b552
LZ
352 }
353 }
a43d3f08 354
1491a632 355 if (perf_evlist__apply_filters(evlist)) {
0a102479
FW
356 error("failed to set filter with %d (%s)\n", errno,
357 strerror(errno));
8d3eca20
DA
358 rc = -1;
359 goto out;
0a102479
FW
360 }
361
18e60939 362 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
8d3eca20
DA
363 if (errno == EPERM) {
364 pr_err("Permission error mapping pages.\n"
365 "Consider increasing "
366 "/proc/sys/kernel/perf_event_mlock_kb,\n"
367 "or try again with a smaller value of -m/--mmap_pages.\n"
368 "(current value: %d)\n", opts->mmap_pages);
369 rc = -errno;
0089fa98
JO
370 } else if (!is_power_of_2(opts->mmap_pages) &&
371 (opts->mmap_pages != UINT_MAX)) {
8d3eca20
DA
372 pr_err("--mmap_pages/-m value must be a power of two.");
373 rc = -EINVAL;
374 } else {
375 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
376 rc = -errno;
377 }
378 goto out;
18e60939 379 }
0a27d7f9 380
d20deb64 381 if (rec->file_new)
a91e5431
ACM
382 session->evlist = evlist;
383 else {
384 if (!perf_evlist__equal(session->evlist, evlist)) {
385 fprintf(stderr, "incompatible append\n");
8d3eca20
DA
386 rc = -1;
387 goto out;
a91e5431
ACM
388 }
389 }
390
7b56cce2 391 perf_session__set_id_hdr_size(session);
8d3eca20
DA
392out:
393 return rc;
16c8a109
PZ
394}
395
d20deb64 396static int process_buildids(struct perf_record *rec)
6122e4e4 397{
d20deb64 398 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 399
9f591fd7
ACM
400 if (size == 0)
401 return 0;
402
d20deb64
ACM
403 rec->session->fd = rec->output;
404 return __perf_session__process_events(rec->session, rec->post_processing_offset,
405 size - rec->post_processing_offset,
6122e4e4
ACM
406 size, &build_id__mark_dso_hit_ops);
407}
408
8d3eca20 409static void perf_record__exit(int status, void *arg)
f5970550 410{
d20deb64
ACM
411 struct perf_record *rec = arg;
412
8d3eca20
DA
413 if (status != 0)
414 return;
415
d20deb64
ACM
416 if (!rec->opts.pipe_output) {
417 rec->session->header.data_size += rec->bytes_written;
418
419 if (!rec->no_buildid)
420 process_buildids(rec);
421 perf_session__write_header(rec->session, rec->evlist,
422 rec->output, true);
423 perf_session__delete(rec->session);
424 perf_evlist__delete(rec->evlist);
d65a458b 425 symbol__exit();
c7929e47 426 }
f5970550
PZ
427}
428
8115d60c 429static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
430{
431 int err;
45694aa7 432 struct perf_tool *tool = data;
a1645ce1 433
23346f21 434 if (machine__is_host(machine))
a1645ce1
ZY
435 return;
436
437 /*
438 *As for guest kernel when processing subcommand record&report,
439 *we arrange module mmap prior to guest kernel mmap and trigger
440 *a preload dso because default guest module symbols are loaded
441 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
442 *method is used to avoid symbol missing when the first addr is
443 *in module instead of in guest kernel.
444 */
45694aa7 445 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 446 machine);
a1645ce1
ZY
447 if (err < 0)
448 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 449 " relocation symbol.\n", machine->pid);
a1645ce1 450
a1645ce1
ZY
451 /*
452 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
453 * have no _text sometimes.
454 */
45694aa7 455 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 456 machine, "_text");
a1645ce1 457 if (err < 0)
45694aa7 458 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 459 machine, "_stext");
a1645ce1
ZY
460 if (err < 0)
461 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 462 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
463}
464
98402807
FW
465static struct perf_event_header finished_round_event = {
466 .size = sizeof(struct perf_event_header),
467 .type = PERF_RECORD_FINISHED_ROUND,
468};
469
8d3eca20 470static int perf_record__mmap_read_all(struct perf_record *rec)
98402807 471{
0e2e63dd 472 int i;
8d3eca20 473 int rc = 0;
98402807 474
d20deb64 475 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
8d3eca20
DA
476 if (rec->evlist->mmap[i].base) {
477 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
478 rc = -1;
479 goto out;
480 }
481 }
98402807
FW
482 }
483
2eeaaa09 484 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
8d3eca20
DA
485 rc = write_output(rec, &finished_round_event,
486 sizeof(finished_round_event));
487
488out:
489 return rc;
98402807
FW
490}
491
d20deb64 492static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 493{
abaff32a 494 struct stat st;
abaff32a 495 int flags;
781ba9d2 496 int err, output, feat;
8b412664 497 unsigned long waking = 0;
46be604b 498 const bool forks = argc > 0;
23346f21 499 struct machine *machine;
45694aa7 500 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
501 struct perf_record_opts *opts = &rec->opts;
502 struct perf_evlist *evsel_list = rec->evlist;
503 const char *output_name = rec->output_name;
504 struct perf_session *session;
de9ac07b 505
d20deb64 506 rec->progname = argv[0];
33e49ea7 507
d20deb64 508 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 509
d20deb64 510 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
511 signal(SIGCHLD, sig_handler);
512 signal(SIGINT, sig_handler);
18483b81 513 signal(SIGUSR1, sig_handler);
f5970550 514
d7065adb
FBH
515 if (!output_name) {
516 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 517 opts->pipe_output = true;
d7065adb 518 else
d20deb64 519 rec->output_name = output_name = "perf.data";
d7065adb
FBH
520 }
521 if (output_name) {
522 if (!strcmp(output_name, "-"))
d20deb64 523 opts->pipe_output = true;
d7065adb 524 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 525 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
526 char oldname[PATH_MAX];
527 snprintf(oldname, sizeof(oldname), "%s.old",
528 output_name);
529 unlink(oldname);
530 rename(output_name, oldname);
531 }
d20deb64
ACM
532 } else if (rec->write_mode == WRITE_APPEND) {
533 rec->write_mode = WRITE_FORCE;
266e0e21 534 }
97124d5e
PZ
535 }
536
f887f301 537 flags = O_CREAT|O_RDWR;
d20deb64
ACM
538 if (rec->write_mode == WRITE_APPEND)
539 rec->file_new = 0;
abaff32a
IM
540 else
541 flags |= O_TRUNC;
542
d20deb64 543 if (opts->pipe_output)
529870e3
TZ
544 output = STDOUT_FILENO;
545 else
546 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
547 if (output < 0) {
548 perror("failed to create output file");
8d3eca20 549 return -1;
de9ac07b
PZ
550 }
551
d20deb64
ACM
552 rec->output = output;
553
7865e817 554 session = perf_session__new(output_name, O_WRONLY,
d20deb64 555 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 556 if (session == NULL) {
a9a70bbc
ACM
557 pr_err("Not enough memory for reading perf file header\n");
558 return -1;
559 }
560
d20deb64
ACM
561 rec->session = session;
562
781ba9d2
RR
563 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
564 perf_header__set_feat(&session->header, feat);
565
566 if (rec->no_buildid)
567 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
568
569 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 570 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 571
330aa675
SE
572 if (!rec->opts.branch_stack)
573 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
574
d20deb64 575 if (!rec->file_new) {
a91e5431 576 err = perf_session__read_header(session, output);
4dc0a04b 577 if (err < 0)
39d17dac 578 goto out_delete_session;
4dc0a04b
ACM
579 }
580
d4db3f16 581 if (forks) {
d20deb64 582 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
583 if (err < 0) {
584 pr_err("Couldn't run the workload!\n");
585 goto out_delete_session;
856e9660 586 }
856e9660
PZ
587 }
588
8d3eca20
DA
589 if (perf_record__open(rec) != 0) {
590 err = -1;
591 goto out_delete_session;
592 }
de9ac07b 593
712a4b60 594 /*
d20deb64 595 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 596 */
d20deb64 597 on_exit(perf_record__exit, rec);
712a4b60 598
d20deb64 599 if (opts->pipe_output) {
529870e3
TZ
600 err = perf_header__write_pipe(output);
601 if (err < 0)
8d3eca20 602 goto out_delete_session;
d20deb64 603 } else if (rec->file_new) {
a91e5431
ACM
604 err = perf_session__write_header(session, evsel_list,
605 output, false);
d5eed904 606 if (err < 0)
8d3eca20 607 goto out_delete_session;
56b03f3c
ACM
608 }
609
d3665498 610 if (!rec->no_buildid
e20960c0 611 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 612 pr_err("Couldn't generate buildids. "
e20960c0 613 "Use --no-buildid to profile anyway.\n");
8d3eca20
DA
614 err = -1;
615 goto out_delete_session;
e20960c0
RR
616 }
617
d20deb64 618 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 619
743eb868
ACM
620 machine = perf_session__find_host_machine(session);
621 if (!machine) {
622 pr_err("Couldn't find native kernel information.\n");
8d3eca20
DA
623 err = -1;
624 goto out_delete_session;
743eb868
ACM
625 }
626
d20deb64 627 if (opts->pipe_output) {
45694aa7 628 err = perf_event__synthesize_attrs(tool, session,
d20deb64 629 process_synthesized_event);
2c46dbb5
TZ
630 if (err < 0) {
631 pr_err("Couldn't synthesize attrs.\n");
8d3eca20 632 goto out_delete_session;
2c46dbb5 633 }
cd19a035 634
45694aa7 635 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 636 machine);
cd19a035
TZ
637 if (err < 0) {
638 pr_err("Couldn't synthesize event_types.\n");
8d3eca20 639 goto out_delete_session;
cd19a035 640 }
9215545e 641
361c99a6 642 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
643 /*
644 * FIXME err <= 0 here actually means that
645 * there were no tracepoints so its not really
646 * an error, just that we don't need to
647 * synthesize anything. We really have to
648 * return this more properly and also
649 * propagate errors that now are calling die()
650 */
45694aa7 651 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 652 process_synthesized_event);
63e0c771
TZ
653 if (err <= 0) {
654 pr_err("Couldn't record tracing data.\n");
8d3eca20 655 goto out_delete_session;
63e0c771 656 }
d20deb64 657 advance_output(rec, err);
63e0c771 658 }
2c46dbb5
TZ
659 }
660
45694aa7 661 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 662 machine, "_text");
70162138 663 if (err < 0)
45694aa7 664 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 665 machine, "_stext");
c1a3a4b9
ACM
666 if (err < 0)
667 pr_err("Couldn't record kernel reference relocation symbol\n"
668 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
669 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 670
45694aa7 671 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 672 machine);
c1a3a4b9
ACM
673 if (err < 0)
674 pr_err("Couldn't record kernel module information.\n"
675 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
676 "Check /proc/modules permission or run as root.\n");
677
a1645ce1 678 if (perf_guest)
45694aa7 679 perf_session__process_machines(session, tool,
8115d60c 680 perf_event__synthesize_guest_os);
7c6a1c65 681
bea03405 682 if (!opts->target.system_wide)
8d3eca20 683 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 684 process_synthesized_event,
743eb868 685 machine);
234fbbf5 686 else
8d3eca20 687 err = perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 688 machine);
7c6a1c65 689
8d3eca20
DA
690 if (err != 0)
691 goto out_delete_session;
692
d20deb64 693 if (rec->realtime_prio) {
de9ac07b
PZ
694 struct sched_param param;
695
d20deb64 696 param.sched_priority = rec->realtime_prio;
de9ac07b 697 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 698 pr_err("Could not set realtime priority.\n");
8d3eca20
DA
699 err = -1;
700 goto out_delete_session;
de9ac07b
PZ
701 }
702 }
703
764e16a3
DA
704 perf_evlist__enable(evsel_list);
705
856e9660
PZ
706 /*
707 * Let the child rip
708 */
d4db3f16 709 if (forks)
35b9d88e 710 perf_evlist__start_workload(evsel_list);
856e9660 711
649c48a9 712 for (;;) {
d20deb64 713 int hits = rec->samples;
de9ac07b 714
8d3eca20
DA
715 if (perf_record__mmap_read_all(rec) < 0) {
716 err = -1;
717 goto out_delete_session;
718 }
de9ac07b 719
d20deb64 720 if (hits == rec->samples) {
649c48a9
PZ
721 if (done)
722 break;
5c581041 723 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
724 waking++;
725 }
726
4152ab37
ACM
727 if (done)
728 perf_evlist__disable(evsel_list);
de9ac07b
PZ
729 }
730
18483b81 731 if (quiet || signr == SIGUSR1)
b44308f5
ACM
732 return 0;
733
8b412664
PZ
734 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
735
021e9f47
IM
736 /*
737 * Approximate RIP event size: 24 bytes.
738 */
739 fprintf(stderr,
9486aa38 740 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 741 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 742 output_name,
d20deb64 743 rec->bytes_written / 24);
addc2785 744
de9ac07b 745 return 0;
39d17dac
ACM
746
747out_delete_session:
748 perf_session__delete(session);
749 return err;
de9ac07b 750}
0e9b20b8 751
bdfebd84
RAV
752#define BRANCH_OPT(n, m) \
753 { .name = n, .mode = (m) }
754
755#define BRANCH_END { .name = NULL }
756
757struct branch_mode {
758 const char *name;
759 int mode;
760};
761
762static const struct branch_mode branch_modes[] = {
763 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
764 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
765 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
766 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
767 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
768 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
769 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
770 BRANCH_END
771};
772
773static int
a5aabdac 774parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
775{
776#define ONLY_PLM \
777 (PERF_SAMPLE_BRANCH_USER |\
778 PERF_SAMPLE_BRANCH_KERNEL |\
779 PERF_SAMPLE_BRANCH_HV)
780
781 uint64_t *mode = (uint64_t *)opt->value;
782 const struct branch_mode *br;
a5aabdac 783 char *s, *os = NULL, *p;
bdfebd84
RAV
784 int ret = -1;
785
a5aabdac
SE
786 if (unset)
787 return 0;
bdfebd84 788
a5aabdac
SE
789 /*
790 * cannot set it twice, -b + --branch-filter for instance
791 */
792 if (*mode)
bdfebd84
RAV
793 return -1;
794
a5aabdac
SE
795 /* str may be NULL in case no arg is passed to -b */
796 if (str) {
797 /* because str is read-only */
798 s = os = strdup(str);
799 if (!s)
800 return -1;
801
802 for (;;) {
803 p = strchr(s, ',');
804 if (p)
805 *p = '\0';
806
807 for (br = branch_modes; br->name; br++) {
808 if (!strcasecmp(s, br->name))
809 break;
810 }
811 if (!br->name) {
812 ui__warning("unknown branch filter %s,"
813 " check man page\n", s);
814 goto error;
815 }
bdfebd84 816
a5aabdac 817 *mode |= br->mode;
bdfebd84 818
a5aabdac
SE
819 if (!p)
820 break;
bdfebd84 821
a5aabdac
SE
822 s = p + 1;
823 }
bdfebd84
RAV
824 }
825 ret = 0;
826
a5aabdac 827 /* default to any branch */
bdfebd84 828 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 829 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
830 }
831error:
832 free(os);
833 return ret;
834}
835
95485b1c 836#ifdef LIBUNWIND_SUPPORT
26d33022
JO
837static int get_stack_size(char *str, unsigned long *_size)
838{
839 char *endptr;
840 unsigned long size;
841 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
842
843 size = strtoul(str, &endptr, 0);
844
845 do {
846 if (*endptr)
847 break;
848
849 size = round_up(size, sizeof(u64));
850 if (!size || size > max_size)
851 break;
852
853 *_size = size;
854 return 0;
855
856 } while (0);
857
858 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
859 max_size, str);
860 return -1;
861}
95485b1c 862#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
863
864static int
1d037ca1 865parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
26d33022
JO
866 int unset)
867{
868 struct perf_record *rec = (struct perf_record *)opt->value;
869 char *tok, *name, *saveptr = NULL;
870 char *buf;
871 int ret = -1;
872
873 /* --no-call-graph */
874 if (unset)
875 return 0;
876
877 /* We specified default option if none is provided. */
878 BUG_ON(!arg);
879
880 /* We need buffer that we know we can write to. */
881 buf = malloc(strlen(arg) + 1);
882 if (!buf)
883 return -ENOMEM;
884
885 strcpy(buf, arg);
886
887 tok = strtok_r((char *)buf, ",", &saveptr);
888 name = tok ? : (char *)buf;
889
890 do {
891 /* Framepointer style */
892 if (!strncmp(name, "fp", sizeof("fp"))) {
893 if (!strtok_r(NULL, ",", &saveptr)) {
894 rec->opts.call_graph = CALLCHAIN_FP;
895 ret = 0;
896 } else
897 pr_err("callchain: No more arguments "
898 "needed for -g fp\n");
899 break;
900
95485b1c 901#ifdef LIBUNWIND_SUPPORT
26d33022
JO
902 /* Dwarf style */
903 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
61eaa3be
ACM
904 const unsigned long default_stack_dump_size = 8192;
905
26d33022
JO
906 ret = 0;
907 rec->opts.call_graph = CALLCHAIN_DWARF;
908 rec->opts.stack_dump_size = default_stack_dump_size;
909
910 tok = strtok_r(NULL, ",", &saveptr);
911 if (tok) {
912 unsigned long size = 0;
913
914 ret = get_stack_size(tok, &size);
915 rec->opts.stack_dump_size = size;
916 }
917
918 if (!ret)
919 pr_debug("callchain: stack dump size %d\n",
920 rec->opts.stack_dump_size);
95485b1c 921#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
922 } else {
923 pr_err("callchain: Unknown -g option "
924 "value: %s\n", arg);
925 break;
926 }
927
928 } while (0);
929
930 free(buf);
931
932 if (!ret)
933 pr_debug("callchain: type %d\n", rec->opts.call_graph);
934
935 return ret;
936}
937
0e9b20b8 938static const char * const record_usage[] = {
9e096753
MG
939 "perf record [<options>] [<command>]",
940 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
941 NULL
942};
943
d20deb64
ACM
944/*
945 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
946 * because we need to have access to it in perf_record__exit, that is called
947 * after cmd_record() exits, but since record_options need to be accessible to
948 * builtin-script, leave it here.
949 *
950 * At least we don't ouch it in all the other functions here directly.
951 *
952 * Just say no to tons of global variables, sigh.
953 */
954static struct perf_record record = {
955 .opts = {
d20deb64
ACM
956 .mmap_pages = UINT_MAX,
957 .user_freq = UINT_MAX,
958 .user_interval = ULLONG_MAX,
447a6013 959 .freq = 4000,
d1cb9fce
NK
960 .target = {
961 .uses_mmap = true,
962 },
d20deb64
ACM
963 },
964 .write_mode = WRITE_FORCE,
965 .file_new = true,
966};
7865e817 967
61eaa3be
ACM
968#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
969
970#ifdef LIBUNWIND_SUPPORT
971static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
972#else
973static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
974#endif
975
d20deb64
ACM
976/*
977 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
978 * with it and switch to use the library functions in perf_evlist that came
979 * from builtin-record.c, i.e. use perf_record_opts,
980 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
981 * using pipes, etc.
982 */
bca647aa 983const struct option record_options[] = {
d20deb64 984 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 985 "event selector. use 'perf list' to list available events",
f120f9d5 986 parse_events_option),
d20deb64 987 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 988 "event filter", parse_filter),
bea03405 989 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 990 "record events on existing process id"),
bea03405 991 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 992 "record events on existing thread id"),
d20deb64 993 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 994 "collect data with this RT SCHED_FIFO priority"),
d20deb64 995 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 996 "collect data without buffering"),
d20deb64 997 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 998 "collect raw sample records from all opened counters"),
bea03405 999 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 1000 "system-wide collection from all CPUs"),
d20deb64 1001 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 1002 "append to the output file to do incremental profiling"),
bea03405 1003 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 1004 "list of cpus to monitor"),
d20deb64 1005 OPT_BOOLEAN('f', "force", &record.force,
7865e817 1006 "overwrite existing data file (deprecated)"),
d20deb64
ACM
1007 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1008 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 1009 "output file name"),
d20deb64 1010 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 1011 "child tasks do not inherit counters"),
d20deb64
ACM
1012 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1013 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 1014 "number of mmap data pages"),
d20deb64 1015 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 1016 "put the counters into a counter group"),
26d33022
JO
1017 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
1018 callchain_help, &parse_callchain_opt,
1019 "fp"),
c0555642 1020 OPT_INCR('v', "verbose", &verbose,
3da297a6 1021 "be more verbose (show counter open errors, etc)"),
b44308f5 1022 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 1023 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 1024 "per thread counts"),
d20deb64 1025 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 1026 "Sample addresses"),
d20deb64 1027 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 1028 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 1029 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 1030 "don't sample"),
d20deb64 1031 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 1032 "do not update the buildid cache"),
d20deb64 1033 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 1034 "do not collect buildids in perf.data"),
d20deb64 1035 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
1036 "monitor event in cgroup name only",
1037 parse_cgroups),
bea03405
NK
1038 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1039 "user to profile"),
a5aabdac
SE
1040
1041 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1042 "branch any", "sample any taken branches",
1043 parse_branch_stack),
1044
1045 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1046 "branch filter mask", "branch stack filter modes",
bdfebd84 1047 parse_branch_stack),
0e9b20b8
IM
1048 OPT_END()
1049};
1050
1d037ca1 1051int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
0e9b20b8 1052{
69aad6f1
ACM
1053 int err = -ENOMEM;
1054 struct perf_evsel *pos;
d20deb64
ACM
1055 struct perf_evlist *evsel_list;
1056 struct perf_record *rec = &record;
16ad2ffb 1057 char errbuf[BUFSIZ];
0e9b20b8 1058
7e2ed097 1059 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
1060 if (evsel_list == NULL)
1061 return -ENOMEM;
1062
d20deb64
ACM
1063 rec->evlist = evsel_list;
1064
bca647aa 1065 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 1066 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 1067 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 1068 usage_with_options(record_usage, record_options);
0e9b20b8 1069
d20deb64 1070 if (rec->force && rec->append_file) {
3780f488
NK
1071 ui__error("Can't overwrite and append at the same time."
1072 " You need to choose between -f and -A");
bca647aa 1073 usage_with_options(record_usage, record_options);
d20deb64
ACM
1074 } else if (rec->append_file) {
1075 rec->write_mode = WRITE_APPEND;
7865e817 1076 } else {
d20deb64 1077 rec->write_mode = WRITE_FORCE;
7865e817
FW
1078 }
1079
bea03405 1080 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
1081 ui__error("cgroup monitoring only available in"
1082 " system-wide mode\n");
023695d9
SE
1083 usage_with_options(record_usage, record_options);
1084 }
1085
655000e7 1086 symbol__init();
baa2f6ce 1087
ec80fde7 1088 if (symbol_conf.kptr_restrict)
646aaea6
ACM
1089 pr_warning(
1090"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1091"check /proc/sys/kernel/kptr_restrict.\n\n"
1092"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1093"file is not found in the buildid cache or in the vmlinux path.\n\n"
1094"Samples in kernel modules won't be resolved at all.\n\n"
1095"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1096"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 1097
d20deb64 1098 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 1099 disable_buildid_cache();
655000e7 1100
361c99a6
ACM
1101 if (evsel_list->nr_entries == 0 &&
1102 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
1103 pr_err("Not enough memory for event selector list\n");
1104 goto out_symbol_exit;
bbd36e5e 1105 }
0e9b20b8 1106
16ad2ffb
NK
1107 err = perf_target__validate(&rec->opts.target);
1108 if (err) {
1109 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1110 ui__warning("%s", errbuf);
1111 }
1112
1113 err = perf_target__parse_uid(&rec->opts.target);
1114 if (err) {
1115 int saved_errno = errno;
4bd0f2d2 1116
16ad2ffb 1117 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1118 ui__error("%s", errbuf);
16ad2ffb
NK
1119
1120 err = -saved_errno;
0d37aa34 1121 goto out_free_fd;
16ad2ffb 1122 }
0d37aa34 1123
16ad2ffb 1124 err = -ENOMEM;
b809ac10 1125 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 1126 usage_with_options(record_usage, record_options);
69aad6f1 1127
361c99a6 1128 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 1129 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 1130 goto out_free_fd;
d6d901c2 1131 }
5c581041 1132
d20deb64
ACM
1133 if (rec->opts.user_interval != ULLONG_MAX)
1134 rec->opts.default_interval = rec->opts.user_interval;
1135 if (rec->opts.user_freq != UINT_MAX)
1136 rec->opts.freq = rec->opts.user_freq;
f9212819 1137
7e4ff9e3
MG
1138 /*
1139 * User specified count overrides default frequency.
1140 */
d20deb64
ACM
1141 if (rec->opts.default_interval)
1142 rec->opts.freq = 0;
1143 else if (rec->opts.freq) {
1144 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 1145 } else {
3780f488 1146 ui__error("frequency and count are zero, aborting\n");
39d17dac 1147 err = -EINVAL;
5c581041 1148 goto out_free_fd;
7e4ff9e3
MG
1149 }
1150
d20deb64 1151 err = __cmd_record(&record, argc, argv);
39d17dac 1152out_free_fd:
7e2ed097 1153 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
1154out_symbol_exit:
1155 symbol__exit();
39d17dac 1156 return err;
0e9b20b8 1157}