perf evsel: Introduce event fallback method
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
78da39fa
BR
34#ifndef HAVE_ON_EXIT
35#ifndef ATEXIT_MAX
36#define ATEXIT_MAX 32
37#endif
38static int __on_exit_count = 0;
39typedef void (*on_exit_func_t) (int, void *);
40static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41static void *__on_exit_args[ATEXIT_MAX];
42static int __exitcode = 0;
43static void __handle_on_exit_funcs(void);
44static int on_exit(on_exit_func_t function, void *arg);
45#define exit(x) (exit)(__exitcode = (x))
46
47static int on_exit(on_exit_func_t function, void *arg)
48{
49 if (__on_exit_count == ATEXIT_MAX)
50 return -ENOMEM;
51 else if (__on_exit_count == 0)
52 atexit(__handle_on_exit_funcs);
53 __on_exit_funcs[__on_exit_count] = function;
54 __on_exit_args[__on_exit_count++] = arg;
55 return 0;
56}
57
58static void __handle_on_exit_funcs(void)
59{
60 int i;
61 for (i = 0; i < __on_exit_count; i++)
62 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63}
64#endif
65
7865e817
FW
66enum write_mode_t {
67 WRITE_FORCE,
68 WRITE_APPEND
69};
70
d20deb64 71struct perf_record {
45694aa7 72 struct perf_tool tool;
d20deb64
ACM
73 struct perf_record_opts opts;
74 u64 bytes_written;
75 const char *output_name;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
78 const char *progname;
79 int output;
80 unsigned int page_size;
81 int realtime_prio;
82 enum write_mode_t write_mode;
83 bool no_buildid;
84 bool no_buildid_cache;
85 bool force;
86 bool file_new;
87 bool append_file;
88 long samples;
89 off_t post_processing_offset;
0f82ebc4 90};
a21ca2ca 91
d20deb64 92static void advance_output(struct perf_record *rec, size_t size)
9215545e 93{
d20deb64 94 rec->bytes_written += size;
9215545e
TZ
95}
96
8d3eca20 97static int write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
98{
99 while (size) {
d20deb64 100 int ret = write(rec->output, buf, size);
f5970550 101
8d3eca20
DA
102 if (ret < 0) {
103 pr_err("failed to write\n");
104 return -1;
105 }
f5970550
PZ
106
107 size -= ret;
108 buf += ret;
109
d20deb64 110 rec->bytes_written += ret;
f5970550 111 }
8d3eca20
DA
112
113 return 0;
f5970550
PZ
114}
115
45694aa7 116static int process_synthesized_event(struct perf_tool *tool,
d20deb64 117 union perf_event *event,
1d037ca1
IT
118 struct perf_sample *sample __maybe_unused,
119 struct machine *machine __maybe_unused)
234fbbf5 120{
45694aa7 121 struct perf_record *rec = container_of(tool, struct perf_record, tool);
8d3eca20
DA
122 if (write_output(rec, event, event->header.size) < 0)
123 return -1;
124
234fbbf5
ACM
125 return 0;
126}
127
8d3eca20 128static int perf_record__mmap_read(struct perf_record *rec,
d20deb64 129 struct perf_mmap *md)
de9ac07b 130{
744bd8aa 131 unsigned int head = perf_mmap__read_head(md);
de9ac07b 132 unsigned int old = md->prev;
d20deb64 133 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
134 unsigned long size;
135 void *buf;
8d3eca20 136 int rc = 0;
de9ac07b 137
dc82009a 138 if (old == head)
8d3eca20 139 return 0;
dc82009a 140
d20deb64 141 rec->samples++;
de9ac07b
PZ
142
143 size = head - old;
144
145 if ((old & md->mask) + size != (head & md->mask)) {
146 buf = &data[old & md->mask];
147 size = md->mask + 1 - (old & md->mask);
148 old += size;
021e9f47 149
8d3eca20
DA
150 if (write_output(rec, buf, size) < 0) {
151 rc = -1;
152 goto out;
153 }
de9ac07b
PZ
154 }
155
156 buf = &data[old & md->mask];
157 size = head - old;
158 old += size;
021e9f47 159
8d3eca20
DA
160 if (write_output(rec, buf, size) < 0) {
161 rc = -1;
162 goto out;
163 }
de9ac07b
PZ
164
165 md->prev = old;
115d2d89 166 perf_mmap__write_tail(md, old);
8d3eca20
DA
167
168out:
169 return rc;
de9ac07b
PZ
170}
171
172static volatile int done = 0;
f7b7c26e 173static volatile int signr = -1;
33e49ea7 174static volatile int child_finished = 0;
de9ac07b 175
16c8a109 176static void sig_handler(int sig)
de9ac07b 177{
33e49ea7
AK
178 if (sig == SIGCHLD)
179 child_finished = 1;
180
16c8a109 181 done = 1;
f7b7c26e
PZ
182 signr = sig;
183}
184
1d037ca1 185static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
f7b7c26e 186{
d20deb64 187 struct perf_record *rec = arg;
33e49ea7
AK
188 int status;
189
d20deb64 190 if (rec->evlist->workload.pid > 0) {
33e49ea7 191 if (!child_finished)
d20deb64 192 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
193
194 wait(&status);
195 if (WIFSIGNALED(status))
d20deb64 196 psignal(WTERMSIG(status), rec->progname);
33e49ea7 197 }
933da83a 198
18483b81 199 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
200 return;
201
202 signal(signr, SIG_DFL);
203 kill(getpid(), signr);
de9ac07b
PZ
204}
205
a91e5431
ACM
206static bool perf_evlist__equal(struct perf_evlist *evlist,
207 struct perf_evlist *other)
208{
209 struct perf_evsel *pos, *pair;
210
211 if (evlist->nr_entries != other->nr_entries)
212 return false;
213
0c21f736 214 pair = perf_evlist__first(other);
a91e5431
ACM
215
216 list_for_each_entry(pos, &evlist->entries, node) {
217 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218 return false;
0c21f736 219 pair = perf_evsel__next(pair);
a91e5431
ACM
220 }
221
222 return true;
223}
224
8d3eca20 225static int perf_record__open(struct perf_record *rec)
dd7927f4 226{
c0a54341 227 char msg[128];
6a4bb04c 228 struct perf_evsel *pos;
d20deb64
ACM
229 struct perf_evlist *evlist = rec->evlist;
230 struct perf_session *session = rec->session;
231 struct perf_record_opts *opts = &rec->opts;
8d3eca20 232 int rc = 0;
dd7927f4 233
f77a9518 234 perf_evlist__config(evlist, opts);
cac21425 235
dd7927f4
ACM
236 list_for_each_entry(pos, &evlist->entries, node) {
237 struct perf_event_attr *attr = &pos->attr;
dd7927f4 238try_again:
6a4bb04c 239 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
d6d901c2
ZY
240 int err = errno;
241
c286c419 242 if (err == EPERM || err == EACCES) {
b8631e6e 243 ui__error_paranoid();
8d3eca20
DA
244 rc = -err;
245 goto out;
bea03405 246 } else if (err == ENODEV && opts->target.cpu_list) {
8d3eca20
DA
247 pr_err("No such device - did you specify"
248 " an out-of-range profile CPU?\n");
249 rc = -err;
250 goto out;
d6d901c2 251 }
3da297a6 252
c0a54341 253 if (perf_evsel__fallback(pos, err, msg, sizeof(msg))) {
d6d901c2 254 if (verbose)
c0a54341 255 ui__warning("%s\n", msg);
d6d901c2
ZY
256 goto try_again;
257 }
ca6a4258
DA
258
259 if (err == ENOENT) {
3780f488 260 ui__error("The %s event is not supported.\n",
7289f83c 261 perf_evsel__name(pos));
8d3eca20
DA
262 rc = -err;
263 goto out;
2305c82f
DA
264 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
265 ui__error("\'precise\' request may not be supported. "
266 "Try removing 'p' modifier\n");
267 rc = -err;
268 goto out;
ca6a4258
DA
269 }
270
d6d901c2 271 printf("\n");
1863fbbb
SE
272 error("sys_perf_event_open() syscall returned with %d "
273 "(%s) for event %s. /bin/dmesg may provide "
274 "additional information.\n",
275 err, strerror(err), perf_evsel__name(pos));
bfd45118
SK
276
277#if defined(__i386__) || defined(__x86_64__)
8d3eca20
DA
278 if (attr->type == PERF_TYPE_HARDWARE &&
279 err == EOPNOTSUPP) {
280 pr_err("No hardware sampling interrupt available."
281 " No APIC? If so then you can boot the kernel"
282 " with the \"lapic\" boot parameter to"
283 " force-enable it.\n");
284 rc = -err;
285 goto out;
286 }
bfd45118
SK
287#endif
288
8d3eca20
DA
289 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
290 rc = -err;
291 goto out;
c171b552
LZ
292 }
293 }
a43d3f08 294
1491a632 295 if (perf_evlist__apply_filters(evlist)) {
0a102479
FW
296 error("failed to set filter with %d (%s)\n", errno,
297 strerror(errno));
8d3eca20
DA
298 rc = -1;
299 goto out;
0a102479
FW
300 }
301
18e60939 302 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
8d3eca20
DA
303 if (errno == EPERM) {
304 pr_err("Permission error mapping pages.\n"
305 "Consider increasing "
306 "/proc/sys/kernel/perf_event_mlock_kb,\n"
307 "or try again with a smaller value of -m/--mmap_pages.\n"
308 "(current value: %d)\n", opts->mmap_pages);
309 rc = -errno;
0089fa98
JO
310 } else if (!is_power_of_2(opts->mmap_pages) &&
311 (opts->mmap_pages != UINT_MAX)) {
8d3eca20
DA
312 pr_err("--mmap_pages/-m value must be a power of two.");
313 rc = -EINVAL;
314 } else {
315 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
316 rc = -errno;
317 }
318 goto out;
18e60939 319 }
0a27d7f9 320
d20deb64 321 if (rec->file_new)
a91e5431
ACM
322 session->evlist = evlist;
323 else {
324 if (!perf_evlist__equal(session->evlist, evlist)) {
325 fprintf(stderr, "incompatible append\n");
8d3eca20
DA
326 rc = -1;
327 goto out;
a91e5431
ACM
328 }
329 }
330
7b56cce2 331 perf_session__set_id_hdr_size(session);
8d3eca20
DA
332out:
333 return rc;
16c8a109
PZ
334}
335
d20deb64 336static int process_buildids(struct perf_record *rec)
6122e4e4 337{
d20deb64 338 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 339
9f591fd7
ACM
340 if (size == 0)
341 return 0;
342
d20deb64
ACM
343 rec->session->fd = rec->output;
344 return __perf_session__process_events(rec->session, rec->post_processing_offset,
345 size - rec->post_processing_offset,
6122e4e4
ACM
346 size, &build_id__mark_dso_hit_ops);
347}
348
8d3eca20 349static void perf_record__exit(int status, void *arg)
f5970550 350{
d20deb64
ACM
351 struct perf_record *rec = arg;
352
8d3eca20
DA
353 if (status != 0)
354 return;
355
d20deb64
ACM
356 if (!rec->opts.pipe_output) {
357 rec->session->header.data_size += rec->bytes_written;
358
359 if (!rec->no_buildid)
360 process_buildids(rec);
361 perf_session__write_header(rec->session, rec->evlist,
362 rec->output, true);
363 perf_session__delete(rec->session);
364 perf_evlist__delete(rec->evlist);
d65a458b 365 symbol__exit();
c7929e47 366 }
f5970550
PZ
367}
368
8115d60c 369static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
370{
371 int err;
45694aa7 372 struct perf_tool *tool = data;
a1645ce1 373
23346f21 374 if (machine__is_host(machine))
a1645ce1
ZY
375 return;
376
377 /*
378 *As for guest kernel when processing subcommand record&report,
379 *we arrange module mmap prior to guest kernel mmap and trigger
380 *a preload dso because default guest module symbols are loaded
381 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
382 *method is used to avoid symbol missing when the first addr is
383 *in module instead of in guest kernel.
384 */
45694aa7 385 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 386 machine);
a1645ce1
ZY
387 if (err < 0)
388 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 389 " relocation symbol.\n", machine->pid);
a1645ce1 390
a1645ce1
ZY
391 /*
392 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
393 * have no _text sometimes.
394 */
45694aa7 395 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 396 machine, "_text");
a1645ce1 397 if (err < 0)
45694aa7 398 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 399 machine, "_stext");
a1645ce1
ZY
400 if (err < 0)
401 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 402 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
403}
404
98402807
FW
405static struct perf_event_header finished_round_event = {
406 .size = sizeof(struct perf_event_header),
407 .type = PERF_RECORD_FINISHED_ROUND,
408};
409
8d3eca20 410static int perf_record__mmap_read_all(struct perf_record *rec)
98402807 411{
0e2e63dd 412 int i;
8d3eca20 413 int rc = 0;
98402807 414
d20deb64 415 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
8d3eca20
DA
416 if (rec->evlist->mmap[i].base) {
417 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
418 rc = -1;
419 goto out;
420 }
421 }
98402807
FW
422 }
423
2eeaaa09 424 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
8d3eca20
DA
425 rc = write_output(rec, &finished_round_event,
426 sizeof(finished_round_event));
427
428out:
429 return rc;
98402807
FW
430}
431
d20deb64 432static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 433{
abaff32a 434 struct stat st;
abaff32a 435 int flags;
781ba9d2 436 int err, output, feat;
8b412664 437 unsigned long waking = 0;
46be604b 438 const bool forks = argc > 0;
23346f21 439 struct machine *machine;
45694aa7 440 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
441 struct perf_record_opts *opts = &rec->opts;
442 struct perf_evlist *evsel_list = rec->evlist;
443 const char *output_name = rec->output_name;
444 struct perf_session *session;
2711926a 445 bool disabled = false;
de9ac07b 446
d20deb64 447 rec->progname = argv[0];
33e49ea7 448
d20deb64 449 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 450
d20deb64 451 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
452 signal(SIGCHLD, sig_handler);
453 signal(SIGINT, sig_handler);
18483b81 454 signal(SIGUSR1, sig_handler);
f5970550 455
d7065adb
FBH
456 if (!output_name) {
457 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 458 opts->pipe_output = true;
d7065adb 459 else
d20deb64 460 rec->output_name = output_name = "perf.data";
d7065adb
FBH
461 }
462 if (output_name) {
463 if (!strcmp(output_name, "-"))
d20deb64 464 opts->pipe_output = true;
d7065adb 465 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 466 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
467 char oldname[PATH_MAX];
468 snprintf(oldname, sizeof(oldname), "%s.old",
469 output_name);
470 unlink(oldname);
471 rename(output_name, oldname);
472 }
d20deb64
ACM
473 } else if (rec->write_mode == WRITE_APPEND) {
474 rec->write_mode = WRITE_FORCE;
266e0e21 475 }
97124d5e
PZ
476 }
477
f887f301 478 flags = O_CREAT|O_RDWR;
d20deb64
ACM
479 if (rec->write_mode == WRITE_APPEND)
480 rec->file_new = 0;
abaff32a
IM
481 else
482 flags |= O_TRUNC;
483
d20deb64 484 if (opts->pipe_output)
529870e3
TZ
485 output = STDOUT_FILENO;
486 else
487 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
488 if (output < 0) {
489 perror("failed to create output file");
8d3eca20 490 return -1;
de9ac07b
PZ
491 }
492
d20deb64
ACM
493 rec->output = output;
494
7865e817 495 session = perf_session__new(output_name, O_WRONLY,
d20deb64 496 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 497 if (session == NULL) {
a9a70bbc
ACM
498 pr_err("Not enough memory for reading perf file header\n");
499 return -1;
500 }
501
d20deb64
ACM
502 rec->session = session;
503
781ba9d2
RR
504 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
505 perf_header__set_feat(&session->header, feat);
506
507 if (rec->no_buildid)
508 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
509
510 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 511 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 512
330aa675
SE
513 if (!rec->opts.branch_stack)
514 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
515
d20deb64 516 if (!rec->file_new) {
a91e5431 517 err = perf_session__read_header(session, output);
4dc0a04b 518 if (err < 0)
39d17dac 519 goto out_delete_session;
4dc0a04b
ACM
520 }
521
d4db3f16 522 if (forks) {
d20deb64 523 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
524 if (err < 0) {
525 pr_err("Couldn't run the workload!\n");
526 goto out_delete_session;
856e9660 527 }
856e9660
PZ
528 }
529
8d3eca20
DA
530 if (perf_record__open(rec) != 0) {
531 err = -1;
532 goto out_delete_session;
533 }
de9ac07b 534
712a4b60 535 /*
d20deb64 536 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 537 */
d20deb64 538 on_exit(perf_record__exit, rec);
712a4b60 539
d20deb64 540 if (opts->pipe_output) {
529870e3
TZ
541 err = perf_header__write_pipe(output);
542 if (err < 0)
8d3eca20 543 goto out_delete_session;
d20deb64 544 } else if (rec->file_new) {
a91e5431
ACM
545 err = perf_session__write_header(session, evsel_list,
546 output, false);
d5eed904 547 if (err < 0)
8d3eca20 548 goto out_delete_session;
56b03f3c
ACM
549 }
550
d3665498 551 if (!rec->no_buildid
e20960c0 552 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 553 pr_err("Couldn't generate buildids. "
e20960c0 554 "Use --no-buildid to profile anyway.\n");
8d3eca20
DA
555 err = -1;
556 goto out_delete_session;
e20960c0
RR
557 }
558
d20deb64 559 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 560
743eb868
ACM
561 machine = perf_session__find_host_machine(session);
562 if (!machine) {
563 pr_err("Couldn't find native kernel information.\n");
8d3eca20
DA
564 err = -1;
565 goto out_delete_session;
743eb868
ACM
566 }
567
d20deb64 568 if (opts->pipe_output) {
45694aa7 569 err = perf_event__synthesize_attrs(tool, session,
d20deb64 570 process_synthesized_event);
2c46dbb5
TZ
571 if (err < 0) {
572 pr_err("Couldn't synthesize attrs.\n");
8d3eca20 573 goto out_delete_session;
2c46dbb5 574 }
cd19a035 575
45694aa7 576 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 577 machine);
cd19a035
TZ
578 if (err < 0) {
579 pr_err("Couldn't synthesize event_types.\n");
8d3eca20 580 goto out_delete_session;
cd19a035 581 }
9215545e 582
361c99a6 583 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
584 /*
585 * FIXME err <= 0 here actually means that
586 * there were no tracepoints so its not really
587 * an error, just that we don't need to
588 * synthesize anything. We really have to
589 * return this more properly and also
590 * propagate errors that now are calling die()
591 */
45694aa7 592 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 593 process_synthesized_event);
63e0c771
TZ
594 if (err <= 0) {
595 pr_err("Couldn't record tracing data.\n");
8d3eca20 596 goto out_delete_session;
63e0c771 597 }
d20deb64 598 advance_output(rec, err);
63e0c771 599 }
2c46dbb5
TZ
600 }
601
45694aa7 602 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 603 machine, "_text");
70162138 604 if (err < 0)
45694aa7 605 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 606 machine, "_stext");
c1a3a4b9
ACM
607 if (err < 0)
608 pr_err("Couldn't record kernel reference relocation symbol\n"
609 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
610 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 611
45694aa7 612 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 613 machine);
c1a3a4b9
ACM
614 if (err < 0)
615 pr_err("Couldn't record kernel module information.\n"
616 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
617 "Check /proc/modules permission or run as root.\n");
618
a1645ce1 619 if (perf_guest)
45694aa7 620 perf_session__process_machines(session, tool,
8115d60c 621 perf_event__synthesize_guest_os);
7c6a1c65 622
bea03405 623 if (!opts->target.system_wide)
8d3eca20 624 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 625 process_synthesized_event,
743eb868 626 machine);
234fbbf5 627 else
8d3eca20 628 err = perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 629 machine);
7c6a1c65 630
8d3eca20
DA
631 if (err != 0)
632 goto out_delete_session;
633
d20deb64 634 if (rec->realtime_prio) {
de9ac07b
PZ
635 struct sched_param param;
636
d20deb64 637 param.sched_priority = rec->realtime_prio;
de9ac07b 638 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 639 pr_err("Could not set realtime priority.\n");
8d3eca20
DA
640 err = -1;
641 goto out_delete_session;
de9ac07b
PZ
642 }
643 }
644
774cb499
JO
645 /*
646 * When perf is starting the traced process, all the events
647 * (apart from group members) have enable_on_exec=1 set,
648 * so don't spoil it by prematurely enabling them.
649 */
650 if (!perf_target__none(&opts->target))
651 perf_evlist__enable(evsel_list);
764e16a3 652
856e9660
PZ
653 /*
654 * Let the child rip
655 */
d4db3f16 656 if (forks)
35b9d88e 657 perf_evlist__start_workload(evsel_list);
856e9660 658
649c48a9 659 for (;;) {
d20deb64 660 int hits = rec->samples;
de9ac07b 661
8d3eca20
DA
662 if (perf_record__mmap_read_all(rec) < 0) {
663 err = -1;
664 goto out_delete_session;
665 }
de9ac07b 666
d20deb64 667 if (hits == rec->samples) {
649c48a9
PZ
668 if (done)
669 break;
5c581041 670 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
671 waking++;
672 }
673
774cb499
JO
674 /*
675 * When perf is starting the traced process, at the end events
676 * die with the process and we wait for that. Thus no need to
677 * disable events in this case.
678 */
2711926a 679 if (done && !disabled && !perf_target__none(&opts->target)) {
4152ab37 680 perf_evlist__disable(evsel_list);
2711926a
JO
681 disabled = true;
682 }
de9ac07b
PZ
683 }
684
18483b81 685 if (quiet || signr == SIGUSR1)
b44308f5
ACM
686 return 0;
687
8b412664
PZ
688 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
689
021e9f47
IM
690 /*
691 * Approximate RIP event size: 24 bytes.
692 */
693 fprintf(stderr,
9486aa38 694 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 695 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 696 output_name,
d20deb64 697 rec->bytes_written / 24);
addc2785 698
de9ac07b 699 return 0;
39d17dac
ACM
700
701out_delete_session:
702 perf_session__delete(session);
703 return err;
de9ac07b 704}
0e9b20b8 705
bdfebd84
RAV
706#define BRANCH_OPT(n, m) \
707 { .name = n, .mode = (m) }
708
709#define BRANCH_END { .name = NULL }
710
711struct branch_mode {
712 const char *name;
713 int mode;
714};
715
716static const struct branch_mode branch_modes[] = {
717 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
718 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
719 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
720 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
721 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
722 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
723 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
724 BRANCH_END
725};
726
727static int
a5aabdac 728parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
729{
730#define ONLY_PLM \
731 (PERF_SAMPLE_BRANCH_USER |\
732 PERF_SAMPLE_BRANCH_KERNEL |\
733 PERF_SAMPLE_BRANCH_HV)
734
735 uint64_t *mode = (uint64_t *)opt->value;
736 const struct branch_mode *br;
a5aabdac 737 char *s, *os = NULL, *p;
bdfebd84
RAV
738 int ret = -1;
739
a5aabdac
SE
740 if (unset)
741 return 0;
bdfebd84 742
a5aabdac
SE
743 /*
744 * cannot set it twice, -b + --branch-filter for instance
745 */
746 if (*mode)
bdfebd84
RAV
747 return -1;
748
a5aabdac
SE
749 /* str may be NULL in case no arg is passed to -b */
750 if (str) {
751 /* because str is read-only */
752 s = os = strdup(str);
753 if (!s)
754 return -1;
755
756 for (;;) {
757 p = strchr(s, ',');
758 if (p)
759 *p = '\0';
760
761 for (br = branch_modes; br->name; br++) {
762 if (!strcasecmp(s, br->name))
763 break;
764 }
765 if (!br->name) {
766 ui__warning("unknown branch filter %s,"
767 " check man page\n", s);
768 goto error;
769 }
bdfebd84 770
a5aabdac 771 *mode |= br->mode;
bdfebd84 772
a5aabdac
SE
773 if (!p)
774 break;
bdfebd84 775
a5aabdac
SE
776 s = p + 1;
777 }
bdfebd84
RAV
778 }
779 ret = 0;
780
a5aabdac 781 /* default to any branch */
bdfebd84 782 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 783 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
784 }
785error:
786 free(os);
787 return ret;
788}
789
95485b1c 790#ifdef LIBUNWIND_SUPPORT
26d33022
JO
791static int get_stack_size(char *str, unsigned long *_size)
792{
793 char *endptr;
794 unsigned long size;
795 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
796
797 size = strtoul(str, &endptr, 0);
798
799 do {
800 if (*endptr)
801 break;
802
803 size = round_up(size, sizeof(u64));
804 if (!size || size > max_size)
805 break;
806
807 *_size = size;
808 return 0;
809
810 } while (0);
811
812 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
813 max_size, str);
814 return -1;
815}
95485b1c 816#endif /* LIBUNWIND_SUPPORT */
26d33022 817
75d9a108
ACM
818int record_parse_callchain_opt(const struct option *opt,
819 const char *arg, int unset)
26d33022 820{
c5ff78c3 821 struct perf_record_opts *opts = opt->value;
26d33022
JO
822 char *tok, *name, *saveptr = NULL;
823 char *buf;
824 int ret = -1;
825
826 /* --no-call-graph */
827 if (unset)
828 return 0;
829
830 /* We specified default option if none is provided. */
831 BUG_ON(!arg);
832
833 /* We need buffer that we know we can write to. */
834 buf = malloc(strlen(arg) + 1);
835 if (!buf)
836 return -ENOMEM;
837
838 strcpy(buf, arg);
839
840 tok = strtok_r((char *)buf, ",", &saveptr);
841 name = tok ? : (char *)buf;
842
843 do {
844 /* Framepointer style */
845 if (!strncmp(name, "fp", sizeof("fp"))) {
846 if (!strtok_r(NULL, ",", &saveptr)) {
c5ff78c3 847 opts->call_graph = CALLCHAIN_FP;
26d33022
JO
848 ret = 0;
849 } else
850 pr_err("callchain: No more arguments "
851 "needed for -g fp\n");
852 break;
853
95485b1c 854#ifdef LIBUNWIND_SUPPORT
26d33022
JO
855 /* Dwarf style */
856 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
61eaa3be
ACM
857 const unsigned long default_stack_dump_size = 8192;
858
26d33022 859 ret = 0;
c5ff78c3
ACM
860 opts->call_graph = CALLCHAIN_DWARF;
861 opts->stack_dump_size = default_stack_dump_size;
26d33022
JO
862
863 tok = strtok_r(NULL, ",", &saveptr);
864 if (tok) {
865 unsigned long size = 0;
866
867 ret = get_stack_size(tok, &size);
c5ff78c3 868 opts->stack_dump_size = size;
26d33022
JO
869 }
870
871 if (!ret)
872 pr_debug("callchain: stack dump size %d\n",
c5ff78c3 873 opts->stack_dump_size);
95485b1c 874#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
875 } else {
876 pr_err("callchain: Unknown -g option "
877 "value: %s\n", arg);
878 break;
879 }
880
881 } while (0);
882
883 free(buf);
884
885 if (!ret)
c5ff78c3 886 pr_debug("callchain: type %d\n", opts->call_graph);
26d33022
JO
887
888 return ret;
889}
890
0e9b20b8 891static const char * const record_usage[] = {
9e096753
MG
892 "perf record [<options>] [<command>]",
893 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
894 NULL
895};
896
d20deb64
ACM
897/*
898 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
899 * because we need to have access to it in perf_record__exit, that is called
900 * after cmd_record() exits, but since record_options need to be accessible to
901 * builtin-script, leave it here.
902 *
903 * At least we don't ouch it in all the other functions here directly.
904 *
905 * Just say no to tons of global variables, sigh.
906 */
907static struct perf_record record = {
908 .opts = {
d20deb64
ACM
909 .mmap_pages = UINT_MAX,
910 .user_freq = UINT_MAX,
911 .user_interval = ULLONG_MAX,
447a6013 912 .freq = 4000,
d1cb9fce
NK
913 .target = {
914 .uses_mmap = true,
915 },
d20deb64
ACM
916 },
917 .write_mode = WRITE_FORCE,
918 .file_new = true,
919};
7865e817 920
61eaa3be
ACM
921#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
922
923#ifdef LIBUNWIND_SUPPORT
75d9a108 924const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
61eaa3be 925#else
75d9a108 926const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
61eaa3be
ACM
927#endif
928
d20deb64
ACM
929/*
930 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
931 * with it and switch to use the library functions in perf_evlist that came
932 * from builtin-record.c, i.e. use perf_record_opts,
933 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
934 * using pipes, etc.
935 */
bca647aa 936const struct option record_options[] = {
d20deb64 937 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 938 "event selector. use 'perf list' to list available events",
f120f9d5 939 parse_events_option),
d20deb64 940 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 941 "event filter", parse_filter),
bea03405 942 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 943 "record events on existing process id"),
bea03405 944 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 945 "record events on existing thread id"),
d20deb64 946 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 947 "collect data with this RT SCHED_FIFO priority"),
d20deb64 948 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 949 "collect data without buffering"),
d20deb64 950 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 951 "collect raw sample records from all opened counters"),
bea03405 952 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 953 "system-wide collection from all CPUs"),
d20deb64 954 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 955 "append to the output file to do incremental profiling"),
bea03405 956 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 957 "list of cpus to monitor"),
d20deb64 958 OPT_BOOLEAN('f', "force", &record.force,
7865e817 959 "overwrite existing data file (deprecated)"),
d20deb64
ACM
960 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
961 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 962 "output file name"),
d20deb64 963 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 964 "child tasks do not inherit counters"),
d20deb64
ACM
965 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
966 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 967 "number of mmap data pages"),
d20deb64 968 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 969 "put the counters into a counter group"),
c5ff78c3 970 OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
75d9a108
ACM
971 "mode[,dump_size]", record_callchain_help,
972 &record_parse_callchain_opt, "fp"),
c0555642 973 OPT_INCR('v', "verbose", &verbose,
3da297a6 974 "be more verbose (show counter open errors, etc)"),
b44308f5 975 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 976 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 977 "per thread counts"),
d20deb64 978 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 979 "Sample addresses"),
d20deb64 980 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 981 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 982 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 983 "don't sample"),
d20deb64 984 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 985 "do not update the buildid cache"),
d20deb64 986 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 987 "do not collect buildids in perf.data"),
d20deb64 988 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
989 "monitor event in cgroup name only",
990 parse_cgroups),
bea03405
NK
991 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
992 "user to profile"),
a5aabdac
SE
993
994 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
995 "branch any", "sample any taken branches",
996 parse_branch_stack),
997
998 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
999 "branch filter mask", "branch stack filter modes",
bdfebd84 1000 parse_branch_stack),
0e9b20b8
IM
1001 OPT_END()
1002};
1003
1d037ca1 1004int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
0e9b20b8 1005{
69aad6f1
ACM
1006 int err = -ENOMEM;
1007 struct perf_evsel *pos;
d20deb64
ACM
1008 struct perf_evlist *evsel_list;
1009 struct perf_record *rec = &record;
16ad2ffb 1010 char errbuf[BUFSIZ];
0e9b20b8 1011
7e2ed097 1012 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
1013 if (evsel_list == NULL)
1014 return -ENOMEM;
1015
d20deb64
ACM
1016 rec->evlist = evsel_list;
1017
bca647aa 1018 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 1019 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 1020 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 1021 usage_with_options(record_usage, record_options);
0e9b20b8 1022
d20deb64 1023 if (rec->force && rec->append_file) {
3780f488
NK
1024 ui__error("Can't overwrite and append at the same time."
1025 " You need to choose between -f and -A");
bca647aa 1026 usage_with_options(record_usage, record_options);
d20deb64
ACM
1027 } else if (rec->append_file) {
1028 rec->write_mode = WRITE_APPEND;
7865e817 1029 } else {
d20deb64 1030 rec->write_mode = WRITE_FORCE;
7865e817
FW
1031 }
1032
bea03405 1033 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
1034 ui__error("cgroup monitoring only available in"
1035 " system-wide mode\n");
023695d9
SE
1036 usage_with_options(record_usage, record_options);
1037 }
1038
655000e7 1039 symbol__init();
baa2f6ce 1040
ec80fde7 1041 if (symbol_conf.kptr_restrict)
646aaea6
ACM
1042 pr_warning(
1043"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1044"check /proc/sys/kernel/kptr_restrict.\n\n"
1045"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1046"file is not found in the buildid cache or in the vmlinux path.\n\n"
1047"Samples in kernel modules won't be resolved at all.\n\n"
1048"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1049"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 1050
d20deb64 1051 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 1052 disable_buildid_cache();
655000e7 1053
361c99a6
ACM
1054 if (evsel_list->nr_entries == 0 &&
1055 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
1056 pr_err("Not enough memory for event selector list\n");
1057 goto out_symbol_exit;
bbd36e5e 1058 }
0e9b20b8 1059
16ad2ffb
NK
1060 err = perf_target__validate(&rec->opts.target);
1061 if (err) {
1062 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1063 ui__warning("%s", errbuf);
1064 }
1065
1066 err = perf_target__parse_uid(&rec->opts.target);
1067 if (err) {
1068 int saved_errno = errno;
4bd0f2d2 1069
16ad2ffb 1070 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1071 ui__error("%s", errbuf);
16ad2ffb
NK
1072
1073 err = -saved_errno;
0d37aa34 1074 goto out_free_fd;
16ad2ffb 1075 }
0d37aa34 1076
16ad2ffb 1077 err = -ENOMEM;
b809ac10 1078 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 1079 usage_with_options(record_usage, record_options);
69aad6f1 1080
361c99a6 1081 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 1082 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 1083 goto out_free_fd;
d6d901c2 1084 }
5c581041 1085
d20deb64
ACM
1086 if (rec->opts.user_interval != ULLONG_MAX)
1087 rec->opts.default_interval = rec->opts.user_interval;
1088 if (rec->opts.user_freq != UINT_MAX)
1089 rec->opts.freq = rec->opts.user_freq;
f9212819 1090
7e4ff9e3
MG
1091 /*
1092 * User specified count overrides default frequency.
1093 */
d20deb64
ACM
1094 if (rec->opts.default_interval)
1095 rec->opts.freq = 0;
1096 else if (rec->opts.freq) {
1097 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 1098 } else {
3780f488 1099 ui__error("frequency and count are zero, aborting\n");
39d17dac 1100 err = -EINVAL;
5c581041 1101 goto out_free_fd;
7e4ff9e3
MG
1102 }
1103
d20deb64 1104 err = __cmd_record(&record, argc, argv);
39d17dac 1105out_free_fd:
7e2ed097 1106 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
1107out_symbol_exit:
1108 symbol__exit();
39d17dac 1109 return err;
0e9b20b8 1110}