perf tools: Add support to update event modifier
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
26d33022
JO
34#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
35
36#ifdef NO_LIBUNWIND_SUPPORT
37static char callchain_help[] = CALLCHAIN_HELP "[fp]";
38#else
39static unsigned long default_stack_dump_size = 8192;
40static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
41#endif
42
7865e817
FW
43enum write_mode_t {
44 WRITE_FORCE,
45 WRITE_APPEND
46};
47
d20deb64 48struct perf_record {
45694aa7 49 struct perf_tool tool;
d20deb64
ACM
50 struct perf_record_opts opts;
51 u64 bytes_written;
52 const char *output_name;
53 struct perf_evlist *evlist;
54 struct perf_session *session;
55 const char *progname;
56 int output;
57 unsigned int page_size;
58 int realtime_prio;
59 enum write_mode_t write_mode;
60 bool no_buildid;
61 bool no_buildid_cache;
62 bool force;
63 bool file_new;
64 bool append_file;
65 long samples;
66 off_t post_processing_offset;
0f82ebc4 67};
a21ca2ca 68
d20deb64 69static void advance_output(struct perf_record *rec, size_t size)
9215545e 70{
d20deb64 71 rec->bytes_written += size;
9215545e
TZ
72}
73
d20deb64 74static void write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
75{
76 while (size) {
d20deb64 77 int ret = write(rec->output, buf, size);
f5970550
PZ
78
79 if (ret < 0)
80 die("failed to write");
81
82 size -= ret;
83 buf += ret;
84
d20deb64 85 rec->bytes_written += ret;
f5970550
PZ
86 }
87}
88
45694aa7 89static int process_synthesized_event(struct perf_tool *tool,
d20deb64 90 union perf_event *event,
8d50e5b4 91 struct perf_sample *sample __used,
743eb868 92 struct machine *machine __used)
234fbbf5 93{
45694aa7 94 struct perf_record *rec = container_of(tool, struct perf_record, tool);
d20deb64 95 write_output(rec, event, event->header.size);
234fbbf5
ACM
96 return 0;
97}
98
d20deb64
ACM
99static void perf_record__mmap_read(struct perf_record *rec,
100 struct perf_mmap *md)
de9ac07b 101{
744bd8aa 102 unsigned int head = perf_mmap__read_head(md);
de9ac07b 103 unsigned int old = md->prev;
d20deb64 104 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
105 unsigned long size;
106 void *buf;
de9ac07b 107
dc82009a
ACM
108 if (old == head)
109 return;
110
d20deb64 111 rec->samples++;
de9ac07b
PZ
112
113 size = head - old;
114
115 if ((old & md->mask) + size != (head & md->mask)) {
116 buf = &data[old & md->mask];
117 size = md->mask + 1 - (old & md->mask);
118 old += size;
021e9f47 119
d20deb64 120 write_output(rec, buf, size);
de9ac07b
PZ
121 }
122
123 buf = &data[old & md->mask];
124 size = head - old;
125 old += size;
021e9f47 126
d20deb64 127 write_output(rec, buf, size);
de9ac07b
PZ
128
129 md->prev = old;
115d2d89 130 perf_mmap__write_tail(md, old);
de9ac07b
PZ
131}
132
133static volatile int done = 0;
f7b7c26e 134static volatile int signr = -1;
33e49ea7 135static volatile int child_finished = 0;
de9ac07b 136
16c8a109 137static void sig_handler(int sig)
de9ac07b 138{
33e49ea7
AK
139 if (sig == SIGCHLD)
140 child_finished = 1;
141
16c8a109 142 done = 1;
f7b7c26e
PZ
143 signr = sig;
144}
145
d20deb64 146static void perf_record__sig_exit(int exit_status __used, void *arg)
f7b7c26e 147{
d20deb64 148 struct perf_record *rec = arg;
33e49ea7
AK
149 int status;
150
d20deb64 151 if (rec->evlist->workload.pid > 0) {
33e49ea7 152 if (!child_finished)
d20deb64 153 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
154
155 wait(&status);
156 if (WIFSIGNALED(status))
d20deb64 157 psignal(WTERMSIG(status), rec->progname);
33e49ea7 158 }
933da83a 159
18483b81 160 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
161 return;
162
163 signal(signr, SIG_DFL);
164 kill(getpid(), signr);
de9ac07b
PZ
165}
166
a91e5431
ACM
167static bool perf_evlist__equal(struct perf_evlist *evlist,
168 struct perf_evlist *other)
169{
170 struct perf_evsel *pos, *pair;
171
172 if (evlist->nr_entries != other->nr_entries)
173 return false;
174
175 pair = list_entry(other->entries.next, struct perf_evsel, node);
176
177 list_for_each_entry(pos, &evlist->entries, node) {
178 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
179 return false;
180 pair = list_entry(pair->node.next, struct perf_evsel, node);
181 }
182
183 return true;
184}
185
d20deb64 186static void perf_record__open(struct perf_record *rec)
dd7927f4 187{
727ab04e 188 struct perf_evsel *pos, *first;
d20deb64
ACM
189 struct perf_evlist *evlist = rec->evlist;
190 struct perf_session *session = rec->session;
191 struct perf_record_opts *opts = &rec->opts;
dd7927f4 192
727ab04e
ACM
193 first = list_entry(evlist->entries.next, struct perf_evsel, node);
194
d20deb64 195 perf_evlist__config_attrs(evlist, opts);
0f82ebc4 196
dd7927f4
ACM
197 list_for_each_entry(pos, &evlist->entries, node) {
198 struct perf_event_attr *attr = &pos->attr;
727ab04e 199 struct xyarray *group_fd = NULL;
dd7927f4
ACM
200 /*
201 * Check if parse_single_tracepoint_event has already asked for
202 * PERF_SAMPLE_TIME.
203 *
204 * XXX this is kludgy but short term fix for problems introduced by
205 * eac23d1c that broke 'perf script' by having different sample_types
206 * when using multiple tracepoint events when we use a perf binary
207 * that tries to use sample_id_all on an older kernel.
208 *
209 * We need to move counter creation to perf_session, support
210 * different sample_types, etc.
211 */
212 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
d6d901c2 213
d20deb64 214 if (opts->group && pos != first)
727ab04e 215 group_fd = first->fd;
0c978128
ACM
216fallback_missing_features:
217 if (opts->exclude_guest_missing)
218 attr->exclude_guest = attr->exclude_host = 0;
dd7927f4 219retry_sample_id:
808e1226 220 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
dd7927f4 221try_again:
ed80f581 222 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
d20deb64 223 opts->group, group_fd) < 0) {
d6d901c2
ZY
224 int err = errno;
225
c286c419 226 if (err == EPERM || err == EACCES) {
b8631e6e 227 ui__error_paranoid();
c286c419 228 exit(EXIT_FAILURE);
bea03405 229 } else if (err == ENODEV && opts->target.cpu_list) {
d6d901c2
ZY
230 die("No such device - did you specify"
231 " an out-of-range profile CPU?\n");
0c978128
ACM
232 } else if (err == EINVAL) {
233 if (!opts->exclude_guest_missing &&
234 (attr->exclude_guest || attr->exclude_host)) {
235 pr_debug("Old kernel, cannot exclude "
236 "guest or host samples.\n");
237 opts->exclude_guest_missing = true;
238 goto fallback_missing_features;
808e1226 239 } else if (!opts->sample_id_all_missing) {
0c978128
ACM
240 /*
241 * Old kernel, no attr->sample_id_type_all field
242 */
808e1226 243 opts->sample_id_all_missing = true;
0c978128
ACM
244 if (!opts->sample_time && !opts->raw_samples && !time_needed)
245 attr->sample_type &= ~PERF_SAMPLE_TIME;
246
247 goto retry_sample_id;
248 }
d6d901c2 249 }
3da297a6 250
d6d901c2
ZY
251 /*
252 * If it's cycles then fall back to hrtimer
253 * based cpu-clock-tick sw counter, which
028d455b
DA
254 * is always available even if no PMU support.
255 *
256 * PPC returns ENXIO until 2.6.37 (behavior changed
257 * with commit b0a873e).
d6d901c2 258 */
028d455b
DA
259 if ((err == ENOENT || err == ENXIO)
260 && attr->type == PERF_TYPE_HARDWARE
d6d901c2
ZY
261 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
262
263 if (verbose)
ca6a4258
DA
264 ui__warning("The cycles event is not supported, "
265 "trying to fall back to cpu-clock-ticks\n");
d6d901c2
ZY
266 attr->type = PERF_TYPE_SOFTWARE;
267 attr->config = PERF_COUNT_SW_CPU_CLOCK;
d1cae34d
DA
268 if (pos->name) {
269 free(pos->name);
270 pos->name = NULL;
271 }
d6d901c2
ZY
272 goto try_again;
273 }
ca6a4258
DA
274
275 if (err == ENOENT) {
3780f488 276 ui__error("The %s event is not supported.\n",
7289f83c 277 perf_evsel__name(pos));
ca6a4258
DA
278 exit(EXIT_FAILURE);
279 }
280
d6d901c2 281 printf("\n");
d9cf837e 282 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
dd7927f4 283 err, strerror(err));
bfd45118
SK
284
285#if defined(__i386__) || defined(__x86_64__)
d6d901c2
ZY
286 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
287 die("No hardware sampling interrupt available."
288 " No APIC? If so then you can boot the kernel"
289 " with the \"lapic\" boot parameter to"
290 " force-enable it.\n");
bfd45118
SK
291#endif
292
d6d901c2 293 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
c171b552
LZ
294 }
295 }
a43d3f08 296
0a102479
FW
297 if (perf_evlist__set_filters(evlist)) {
298 error("failed to set filter with %d (%s)\n", errno,
299 strerror(errno));
300 exit(-1);
301 }
302
18e60939
NE
303 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
304 if (errno == EPERM)
305 die("Permission error mapping pages.\n"
306 "Consider increasing "
307 "/proc/sys/kernel/perf_event_mlock_kb,\n"
308 "or try again with a smaller value of -m/--mmap_pages.\n"
309 "(current value: %d)\n", opts->mmap_pages);
41d0d933
NE
310 else if (!is_power_of_2(opts->mmap_pages))
311 die("--mmap_pages/-m value must be a power of two.");
312
0a27d7f9 313 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
18e60939 314 }
0a27d7f9 315
d20deb64 316 if (rec->file_new)
a91e5431
ACM
317 session->evlist = evlist;
318 else {
319 if (!perf_evlist__equal(session->evlist, evlist)) {
320 fprintf(stderr, "incompatible append\n");
321 exit(-1);
322 }
323 }
324
7b56cce2 325 perf_session__set_id_hdr_size(session);
16c8a109
PZ
326}
327
d20deb64 328static int process_buildids(struct perf_record *rec)
6122e4e4 329{
d20deb64 330 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 331
9f591fd7
ACM
332 if (size == 0)
333 return 0;
334
d20deb64
ACM
335 rec->session->fd = rec->output;
336 return __perf_session__process_events(rec->session, rec->post_processing_offset,
337 size - rec->post_processing_offset,
6122e4e4
ACM
338 size, &build_id__mark_dso_hit_ops);
339}
340
d20deb64 341static void perf_record__exit(int status __used, void *arg)
f5970550 342{
d20deb64
ACM
343 struct perf_record *rec = arg;
344
345 if (!rec->opts.pipe_output) {
346 rec->session->header.data_size += rec->bytes_written;
347
348 if (!rec->no_buildid)
349 process_buildids(rec);
350 perf_session__write_header(rec->session, rec->evlist,
351 rec->output, true);
352 perf_session__delete(rec->session);
353 perf_evlist__delete(rec->evlist);
d65a458b 354 symbol__exit();
c7929e47 355 }
f5970550
PZ
356}
357
8115d60c 358static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
359{
360 int err;
45694aa7 361 struct perf_tool *tool = data;
a1645ce1 362
23346f21 363 if (machine__is_host(machine))
a1645ce1
ZY
364 return;
365
366 /*
367 *As for guest kernel when processing subcommand record&report,
368 *we arrange module mmap prior to guest kernel mmap and trigger
369 *a preload dso because default guest module symbols are loaded
370 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
371 *method is used to avoid symbol missing when the first addr is
372 *in module instead of in guest kernel.
373 */
45694aa7 374 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 375 machine);
a1645ce1
ZY
376 if (err < 0)
377 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 378 " relocation symbol.\n", machine->pid);
a1645ce1 379
a1645ce1
ZY
380 /*
381 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
382 * have no _text sometimes.
383 */
45694aa7 384 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 385 machine, "_text");
a1645ce1 386 if (err < 0)
45694aa7 387 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 388 machine, "_stext");
a1645ce1
ZY
389 if (err < 0)
390 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 391 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
392}
393
98402807
FW
394static struct perf_event_header finished_round_event = {
395 .size = sizeof(struct perf_event_header),
396 .type = PERF_RECORD_FINISHED_ROUND,
397};
398
d20deb64 399static void perf_record__mmap_read_all(struct perf_record *rec)
98402807 400{
0e2e63dd 401 int i;
98402807 402
d20deb64
ACM
403 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
404 if (rec->evlist->mmap[i].base)
405 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
98402807
FW
406 }
407
2eeaaa09 408 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
d20deb64 409 write_output(rec, &finished_round_event, sizeof(finished_round_event));
98402807
FW
410}
411
d20deb64 412static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 413{
abaff32a 414 struct stat st;
abaff32a 415 int flags;
781ba9d2 416 int err, output, feat;
8b412664 417 unsigned long waking = 0;
46be604b 418 const bool forks = argc > 0;
23346f21 419 struct machine *machine;
45694aa7 420 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
421 struct perf_record_opts *opts = &rec->opts;
422 struct perf_evlist *evsel_list = rec->evlist;
423 const char *output_name = rec->output_name;
424 struct perf_session *session;
de9ac07b 425
d20deb64 426 rec->progname = argv[0];
33e49ea7 427
d20deb64 428 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 429
d20deb64 430 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
431 signal(SIGCHLD, sig_handler);
432 signal(SIGINT, sig_handler);
18483b81 433 signal(SIGUSR1, sig_handler);
f5970550 434
d7065adb
FBH
435 if (!output_name) {
436 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 437 opts->pipe_output = true;
d7065adb 438 else
d20deb64 439 rec->output_name = output_name = "perf.data";
d7065adb
FBH
440 }
441 if (output_name) {
442 if (!strcmp(output_name, "-"))
d20deb64 443 opts->pipe_output = true;
d7065adb 444 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 445 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
446 char oldname[PATH_MAX];
447 snprintf(oldname, sizeof(oldname), "%s.old",
448 output_name);
449 unlink(oldname);
450 rename(output_name, oldname);
451 }
d20deb64
ACM
452 } else if (rec->write_mode == WRITE_APPEND) {
453 rec->write_mode = WRITE_FORCE;
266e0e21 454 }
97124d5e
PZ
455 }
456
f887f301 457 flags = O_CREAT|O_RDWR;
d20deb64
ACM
458 if (rec->write_mode == WRITE_APPEND)
459 rec->file_new = 0;
abaff32a
IM
460 else
461 flags |= O_TRUNC;
462
d20deb64 463 if (opts->pipe_output)
529870e3
TZ
464 output = STDOUT_FILENO;
465 else
466 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
467 if (output < 0) {
468 perror("failed to create output file");
469 exit(-1);
470 }
471
d20deb64
ACM
472 rec->output = output;
473
7865e817 474 session = perf_session__new(output_name, O_WRONLY,
d20deb64 475 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 476 if (session == NULL) {
a9a70bbc
ACM
477 pr_err("Not enough memory for reading perf file header\n");
478 return -1;
479 }
480
d20deb64
ACM
481 rec->session = session;
482
781ba9d2
RR
483 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
484 perf_header__set_feat(&session->header, feat);
485
486 if (rec->no_buildid)
487 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
488
489 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 490 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 491
330aa675
SE
492 if (!rec->opts.branch_stack)
493 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
494
d20deb64 495 if (!rec->file_new) {
a91e5431 496 err = perf_session__read_header(session, output);
4dc0a04b 497 if (err < 0)
39d17dac 498 goto out_delete_session;
4dc0a04b
ACM
499 }
500
d4db3f16 501 if (forks) {
d20deb64 502 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
503 if (err < 0) {
504 pr_err("Couldn't run the workload!\n");
505 goto out_delete_session;
856e9660 506 }
856e9660
PZ
507 }
508
d20deb64 509 perf_record__open(rec);
de9ac07b 510
712a4b60 511 /*
d20deb64 512 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 513 */
d20deb64 514 on_exit(perf_record__exit, rec);
712a4b60 515
d20deb64 516 if (opts->pipe_output) {
529870e3
TZ
517 err = perf_header__write_pipe(output);
518 if (err < 0)
519 return err;
d20deb64 520 } else if (rec->file_new) {
a91e5431
ACM
521 err = perf_session__write_header(session, evsel_list,
522 output, false);
d5eed904
ACM
523 if (err < 0)
524 return err;
56b03f3c
ACM
525 }
526
d3665498 527 if (!rec->no_buildid
e20960c0 528 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 529 pr_err("Couldn't generate buildids. "
e20960c0
RR
530 "Use --no-buildid to profile anyway.\n");
531 return -1;
532 }
533
d20deb64 534 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 535
743eb868
ACM
536 machine = perf_session__find_host_machine(session);
537 if (!machine) {
538 pr_err("Couldn't find native kernel information.\n");
539 return -1;
540 }
541
d20deb64 542 if (opts->pipe_output) {
45694aa7 543 err = perf_event__synthesize_attrs(tool, session,
d20deb64 544 process_synthesized_event);
2c46dbb5
TZ
545 if (err < 0) {
546 pr_err("Couldn't synthesize attrs.\n");
547 return err;
548 }
cd19a035 549
45694aa7 550 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 551 machine);
cd19a035
TZ
552 if (err < 0) {
553 pr_err("Couldn't synthesize event_types.\n");
554 return err;
555 }
9215545e 556
361c99a6 557 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
558 /*
559 * FIXME err <= 0 here actually means that
560 * there were no tracepoints so its not really
561 * an error, just that we don't need to
562 * synthesize anything. We really have to
563 * return this more properly and also
564 * propagate errors that now are calling die()
565 */
45694aa7 566 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 567 process_synthesized_event);
63e0c771
TZ
568 if (err <= 0) {
569 pr_err("Couldn't record tracing data.\n");
570 return err;
571 }
d20deb64 572 advance_output(rec, err);
63e0c771 573 }
2c46dbb5
TZ
574 }
575
45694aa7 576 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 577 machine, "_text");
70162138 578 if (err < 0)
45694aa7 579 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 580 machine, "_stext");
c1a3a4b9
ACM
581 if (err < 0)
582 pr_err("Couldn't record kernel reference relocation symbol\n"
583 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
584 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 585
45694aa7 586 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 587 machine);
c1a3a4b9
ACM
588 if (err < 0)
589 pr_err("Couldn't record kernel module information.\n"
590 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
591 "Check /proc/modules permission or run as root.\n");
592
a1645ce1 593 if (perf_guest)
45694aa7 594 perf_session__process_machines(session, tool,
8115d60c 595 perf_event__synthesize_guest_os);
7c6a1c65 596
bea03405 597 if (!opts->target.system_wide)
45694aa7 598 perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 599 process_synthesized_event,
743eb868 600 machine);
234fbbf5 601 else
45694aa7 602 perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 603 machine);
7c6a1c65 604
d20deb64 605 if (rec->realtime_prio) {
de9ac07b
PZ
606 struct sched_param param;
607
d20deb64 608 param.sched_priority = rec->realtime_prio;
de9ac07b 609 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 610 pr_err("Could not set realtime priority.\n");
de9ac07b
PZ
611 exit(-1);
612 }
613 }
614
764e16a3
DA
615 perf_evlist__enable(evsel_list);
616
856e9660
PZ
617 /*
618 * Let the child rip
619 */
d4db3f16 620 if (forks)
35b9d88e 621 perf_evlist__start_workload(evsel_list);
856e9660 622
649c48a9 623 for (;;) {
d20deb64 624 int hits = rec->samples;
de9ac07b 625
d20deb64 626 perf_record__mmap_read_all(rec);
de9ac07b 627
d20deb64 628 if (hits == rec->samples) {
649c48a9
PZ
629 if (done)
630 break;
5c581041 631 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
632 waking++;
633 }
634
4152ab37
ACM
635 if (done)
636 perf_evlist__disable(evsel_list);
de9ac07b
PZ
637 }
638
18483b81 639 if (quiet || signr == SIGUSR1)
b44308f5
ACM
640 return 0;
641
8b412664
PZ
642 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
643
021e9f47
IM
644 /*
645 * Approximate RIP event size: 24 bytes.
646 */
647 fprintf(stderr,
9486aa38 648 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 649 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 650 output_name,
d20deb64 651 rec->bytes_written / 24);
addc2785 652
de9ac07b 653 return 0;
39d17dac
ACM
654
655out_delete_session:
656 perf_session__delete(session);
657 return err;
de9ac07b 658}
0e9b20b8 659
bdfebd84
RAV
660#define BRANCH_OPT(n, m) \
661 { .name = n, .mode = (m) }
662
663#define BRANCH_END { .name = NULL }
664
665struct branch_mode {
666 const char *name;
667 int mode;
668};
669
670static const struct branch_mode branch_modes[] = {
671 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
672 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
673 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
674 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
675 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
676 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
677 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
678 BRANCH_END
679};
680
681static int
a5aabdac 682parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
683{
684#define ONLY_PLM \
685 (PERF_SAMPLE_BRANCH_USER |\
686 PERF_SAMPLE_BRANCH_KERNEL |\
687 PERF_SAMPLE_BRANCH_HV)
688
689 uint64_t *mode = (uint64_t *)opt->value;
690 const struct branch_mode *br;
a5aabdac 691 char *s, *os = NULL, *p;
bdfebd84
RAV
692 int ret = -1;
693
a5aabdac
SE
694 if (unset)
695 return 0;
bdfebd84 696
a5aabdac
SE
697 /*
698 * cannot set it twice, -b + --branch-filter for instance
699 */
700 if (*mode)
bdfebd84
RAV
701 return -1;
702
a5aabdac
SE
703 /* str may be NULL in case no arg is passed to -b */
704 if (str) {
705 /* because str is read-only */
706 s = os = strdup(str);
707 if (!s)
708 return -1;
709
710 for (;;) {
711 p = strchr(s, ',');
712 if (p)
713 *p = '\0';
714
715 for (br = branch_modes; br->name; br++) {
716 if (!strcasecmp(s, br->name))
717 break;
718 }
719 if (!br->name) {
720 ui__warning("unknown branch filter %s,"
721 " check man page\n", s);
722 goto error;
723 }
bdfebd84 724
a5aabdac 725 *mode |= br->mode;
bdfebd84 726
a5aabdac
SE
727 if (!p)
728 break;
bdfebd84 729
a5aabdac
SE
730 s = p + 1;
731 }
bdfebd84
RAV
732 }
733 ret = 0;
734
a5aabdac 735 /* default to any branch */
bdfebd84 736 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 737 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
738 }
739error:
740 free(os);
741 return ret;
742}
743
26d33022
JO
744#ifndef NO_LIBUNWIND_SUPPORT
745static int get_stack_size(char *str, unsigned long *_size)
746{
747 char *endptr;
748 unsigned long size;
749 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
750
751 size = strtoul(str, &endptr, 0);
752
753 do {
754 if (*endptr)
755 break;
756
757 size = round_up(size, sizeof(u64));
758 if (!size || size > max_size)
759 break;
760
761 *_size = size;
762 return 0;
763
764 } while (0);
765
766 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
767 max_size, str);
768 return -1;
769}
770#endif /* !NO_LIBUNWIND_SUPPORT */
771
772static int
773parse_callchain_opt(const struct option *opt __used, const char *arg,
774 int unset)
775{
776 struct perf_record *rec = (struct perf_record *)opt->value;
777 char *tok, *name, *saveptr = NULL;
778 char *buf;
779 int ret = -1;
780
781 /* --no-call-graph */
782 if (unset)
783 return 0;
784
785 /* We specified default option if none is provided. */
786 BUG_ON(!arg);
787
788 /* We need buffer that we know we can write to. */
789 buf = malloc(strlen(arg) + 1);
790 if (!buf)
791 return -ENOMEM;
792
793 strcpy(buf, arg);
794
795 tok = strtok_r((char *)buf, ",", &saveptr);
796 name = tok ? : (char *)buf;
797
798 do {
799 /* Framepointer style */
800 if (!strncmp(name, "fp", sizeof("fp"))) {
801 if (!strtok_r(NULL, ",", &saveptr)) {
802 rec->opts.call_graph = CALLCHAIN_FP;
803 ret = 0;
804 } else
805 pr_err("callchain: No more arguments "
806 "needed for -g fp\n");
807 break;
808
809#ifndef NO_LIBUNWIND_SUPPORT
810 /* Dwarf style */
811 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
812 ret = 0;
813 rec->opts.call_graph = CALLCHAIN_DWARF;
814 rec->opts.stack_dump_size = default_stack_dump_size;
815
816 tok = strtok_r(NULL, ",", &saveptr);
817 if (tok) {
818 unsigned long size = 0;
819
820 ret = get_stack_size(tok, &size);
821 rec->opts.stack_dump_size = size;
822 }
823
824 if (!ret)
825 pr_debug("callchain: stack dump size %d\n",
826 rec->opts.stack_dump_size);
827#endif /* !NO_LIBUNWIND_SUPPORT */
828 } else {
829 pr_err("callchain: Unknown -g option "
830 "value: %s\n", arg);
831 break;
832 }
833
834 } while (0);
835
836 free(buf);
837
838 if (!ret)
839 pr_debug("callchain: type %d\n", rec->opts.call_graph);
840
841 return ret;
842}
843
0e9b20b8 844static const char * const record_usage[] = {
9e096753
MG
845 "perf record [<options>] [<command>]",
846 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
847 NULL
848};
849
d20deb64
ACM
850/*
851 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
852 * because we need to have access to it in perf_record__exit, that is called
853 * after cmd_record() exits, but since record_options need to be accessible to
854 * builtin-script, leave it here.
855 *
856 * At least we don't ouch it in all the other functions here directly.
857 *
858 * Just say no to tons of global variables, sigh.
859 */
860static struct perf_record record = {
861 .opts = {
d20deb64
ACM
862 .mmap_pages = UINT_MAX,
863 .user_freq = UINT_MAX,
864 .user_interval = ULLONG_MAX,
447a6013 865 .freq = 4000,
d1cb9fce
NK
866 .target = {
867 .uses_mmap = true,
868 },
d20deb64
ACM
869 },
870 .write_mode = WRITE_FORCE,
871 .file_new = true,
872};
7865e817 873
d20deb64
ACM
874/*
875 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
876 * with it and switch to use the library functions in perf_evlist that came
877 * from builtin-record.c, i.e. use perf_record_opts,
878 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
879 * using pipes, etc.
880 */
bca647aa 881const struct option record_options[] = {
d20deb64 882 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 883 "event selector. use 'perf list' to list available events",
f120f9d5 884 parse_events_option),
d20deb64 885 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 886 "event filter", parse_filter),
bea03405 887 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 888 "record events on existing process id"),
bea03405 889 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 890 "record events on existing thread id"),
d20deb64 891 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 892 "collect data with this RT SCHED_FIFO priority"),
d20deb64 893 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 894 "collect data without buffering"),
d20deb64 895 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 896 "collect raw sample records from all opened counters"),
bea03405 897 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 898 "system-wide collection from all CPUs"),
d20deb64 899 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 900 "append to the output file to do incremental profiling"),
bea03405 901 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 902 "list of cpus to monitor"),
d20deb64 903 OPT_BOOLEAN('f', "force", &record.force,
7865e817 904 "overwrite existing data file (deprecated)"),
d20deb64
ACM
905 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
906 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 907 "output file name"),
d20deb64 908 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 909 "child tasks do not inherit counters"),
d20deb64
ACM
910 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
911 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 912 "number of mmap data pages"),
d20deb64 913 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 914 "put the counters into a counter group"),
26d33022
JO
915 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
916 callchain_help, &parse_callchain_opt,
917 "fp"),
c0555642 918 OPT_INCR('v', "verbose", &verbose,
3da297a6 919 "be more verbose (show counter open errors, etc)"),
b44308f5 920 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 921 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 922 "per thread counts"),
d20deb64 923 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 924 "Sample addresses"),
d20deb64 925 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 926 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 927 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 928 "don't sample"),
d20deb64 929 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 930 "do not update the buildid cache"),
d20deb64 931 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 932 "do not collect buildids in perf.data"),
d20deb64 933 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
934 "monitor event in cgroup name only",
935 parse_cgroups),
bea03405
NK
936 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
937 "user to profile"),
a5aabdac
SE
938
939 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
940 "branch any", "sample any taken branches",
941 parse_branch_stack),
942
943 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
944 "branch filter mask", "branch stack filter modes",
bdfebd84 945 parse_branch_stack),
0e9b20b8
IM
946 OPT_END()
947};
948
f37a291c 949int cmd_record(int argc, const char **argv, const char *prefix __used)
0e9b20b8 950{
69aad6f1
ACM
951 int err = -ENOMEM;
952 struct perf_evsel *pos;
d20deb64
ACM
953 struct perf_evlist *evsel_list;
954 struct perf_record *rec = &record;
16ad2ffb 955 char errbuf[BUFSIZ];
0e9b20b8 956
7e2ed097 957 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
958 if (evsel_list == NULL)
959 return -ENOMEM;
960
d20deb64
ACM
961 rec->evlist = evsel_list;
962
bca647aa 963 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 964 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 965 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 966 usage_with_options(record_usage, record_options);
0e9b20b8 967
d20deb64 968 if (rec->force && rec->append_file) {
3780f488
NK
969 ui__error("Can't overwrite and append at the same time."
970 " You need to choose between -f and -A");
bca647aa 971 usage_with_options(record_usage, record_options);
d20deb64
ACM
972 } else if (rec->append_file) {
973 rec->write_mode = WRITE_APPEND;
7865e817 974 } else {
d20deb64 975 rec->write_mode = WRITE_FORCE;
7865e817
FW
976 }
977
bea03405 978 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
979 ui__error("cgroup monitoring only available in"
980 " system-wide mode\n");
023695d9
SE
981 usage_with_options(record_usage, record_options);
982 }
983
655000e7 984 symbol__init();
baa2f6ce 985
ec80fde7 986 if (symbol_conf.kptr_restrict)
646aaea6
ACM
987 pr_warning(
988"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
989"check /proc/sys/kernel/kptr_restrict.\n\n"
990"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
991"file is not found in the buildid cache or in the vmlinux path.\n\n"
992"Samples in kernel modules won't be resolved at all.\n\n"
993"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
994"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 995
d20deb64 996 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 997 disable_buildid_cache();
655000e7 998
361c99a6
ACM
999 if (evsel_list->nr_entries == 0 &&
1000 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
1001 pr_err("Not enough memory for event selector list\n");
1002 goto out_symbol_exit;
bbd36e5e 1003 }
0e9b20b8 1004
16ad2ffb
NK
1005 err = perf_target__validate(&rec->opts.target);
1006 if (err) {
1007 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1008 ui__warning("%s", errbuf);
1009 }
1010
1011 err = perf_target__parse_uid(&rec->opts.target);
1012 if (err) {
1013 int saved_errno = errno;
4bd0f2d2 1014
16ad2ffb 1015 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1016 ui__error("%s", errbuf);
16ad2ffb
NK
1017
1018 err = -saved_errno;
0d37aa34 1019 goto out_free_fd;
16ad2ffb 1020 }
0d37aa34 1021
16ad2ffb 1022 err = -ENOMEM;
b809ac10 1023 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 1024 usage_with_options(record_usage, record_options);
69aad6f1 1025
361c99a6 1026 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 1027 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 1028 goto out_free_fd;
d6d901c2 1029 }
5c581041 1030
d20deb64
ACM
1031 if (rec->opts.user_interval != ULLONG_MAX)
1032 rec->opts.default_interval = rec->opts.user_interval;
1033 if (rec->opts.user_freq != UINT_MAX)
1034 rec->opts.freq = rec->opts.user_freq;
f9212819 1035
7e4ff9e3
MG
1036 /*
1037 * User specified count overrides default frequency.
1038 */
d20deb64
ACM
1039 if (rec->opts.default_interval)
1040 rec->opts.freq = 0;
1041 else if (rec->opts.freq) {
1042 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 1043 } else {
3780f488 1044 ui__error("frequency and count are zero, aborting\n");
39d17dac 1045 err = -EINVAL;
5c581041 1046 goto out_free_fd;
7e4ff9e3
MG
1047 }
1048
d20deb64 1049 err = __cmd_record(&record, argc, argv);
39d17dac 1050out_free_fd:
7e2ed097 1051 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
1052out_symbol_exit:
1053 symbol__exit();
39d17dac 1054 return err;
0e9b20b8 1055}