perf evsel: Do missing feature fallbacks in just one place
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
78da39fa
BR
34#ifndef HAVE_ON_EXIT
35#ifndef ATEXIT_MAX
36#define ATEXIT_MAX 32
37#endif
38static int __on_exit_count = 0;
39typedef void (*on_exit_func_t) (int, void *);
40static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41static void *__on_exit_args[ATEXIT_MAX];
42static int __exitcode = 0;
43static void __handle_on_exit_funcs(void);
44static int on_exit(on_exit_func_t function, void *arg);
45#define exit(x) (exit)(__exitcode = (x))
46
47static int on_exit(on_exit_func_t function, void *arg)
48{
49 if (__on_exit_count == ATEXIT_MAX)
50 return -ENOMEM;
51 else if (__on_exit_count == 0)
52 atexit(__handle_on_exit_funcs);
53 __on_exit_funcs[__on_exit_count] = function;
54 __on_exit_args[__on_exit_count++] = arg;
55 return 0;
56}
57
58static void __handle_on_exit_funcs(void)
59{
60 int i;
61 for (i = 0; i < __on_exit_count; i++)
62 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63}
64#endif
65
7865e817
FW
66enum write_mode_t {
67 WRITE_FORCE,
68 WRITE_APPEND
69};
70
d20deb64 71struct perf_record {
45694aa7 72 struct perf_tool tool;
d20deb64
ACM
73 struct perf_record_opts opts;
74 u64 bytes_written;
75 const char *output_name;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
78 const char *progname;
79 int output;
80 unsigned int page_size;
81 int realtime_prio;
82 enum write_mode_t write_mode;
83 bool no_buildid;
84 bool no_buildid_cache;
85 bool force;
86 bool file_new;
87 bool append_file;
88 long samples;
89 off_t post_processing_offset;
0f82ebc4 90};
a21ca2ca 91
d20deb64 92static void advance_output(struct perf_record *rec, size_t size)
9215545e 93{
d20deb64 94 rec->bytes_written += size;
9215545e
TZ
95}
96
8d3eca20 97static int write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
98{
99 while (size) {
d20deb64 100 int ret = write(rec->output, buf, size);
f5970550 101
8d3eca20
DA
102 if (ret < 0) {
103 pr_err("failed to write\n");
104 return -1;
105 }
f5970550
PZ
106
107 size -= ret;
108 buf += ret;
109
d20deb64 110 rec->bytes_written += ret;
f5970550 111 }
8d3eca20
DA
112
113 return 0;
f5970550
PZ
114}
115
45694aa7 116static int process_synthesized_event(struct perf_tool *tool,
d20deb64 117 union perf_event *event,
1d037ca1
IT
118 struct perf_sample *sample __maybe_unused,
119 struct machine *machine __maybe_unused)
234fbbf5 120{
45694aa7 121 struct perf_record *rec = container_of(tool, struct perf_record, tool);
8d3eca20
DA
122 if (write_output(rec, event, event->header.size) < 0)
123 return -1;
124
234fbbf5
ACM
125 return 0;
126}
127
8d3eca20 128static int perf_record__mmap_read(struct perf_record *rec,
d20deb64 129 struct perf_mmap *md)
de9ac07b 130{
744bd8aa 131 unsigned int head = perf_mmap__read_head(md);
de9ac07b 132 unsigned int old = md->prev;
d20deb64 133 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
134 unsigned long size;
135 void *buf;
8d3eca20 136 int rc = 0;
de9ac07b 137
dc82009a 138 if (old == head)
8d3eca20 139 return 0;
dc82009a 140
d20deb64 141 rec->samples++;
de9ac07b
PZ
142
143 size = head - old;
144
145 if ((old & md->mask) + size != (head & md->mask)) {
146 buf = &data[old & md->mask];
147 size = md->mask + 1 - (old & md->mask);
148 old += size;
021e9f47 149
8d3eca20
DA
150 if (write_output(rec, buf, size) < 0) {
151 rc = -1;
152 goto out;
153 }
de9ac07b
PZ
154 }
155
156 buf = &data[old & md->mask];
157 size = head - old;
158 old += size;
021e9f47 159
8d3eca20
DA
160 if (write_output(rec, buf, size) < 0) {
161 rc = -1;
162 goto out;
163 }
de9ac07b
PZ
164
165 md->prev = old;
115d2d89 166 perf_mmap__write_tail(md, old);
8d3eca20
DA
167
168out:
169 return rc;
de9ac07b
PZ
170}
171
172static volatile int done = 0;
f7b7c26e 173static volatile int signr = -1;
33e49ea7 174static volatile int child_finished = 0;
de9ac07b 175
16c8a109 176static void sig_handler(int sig)
de9ac07b 177{
33e49ea7
AK
178 if (sig == SIGCHLD)
179 child_finished = 1;
180
16c8a109 181 done = 1;
f7b7c26e
PZ
182 signr = sig;
183}
184
1d037ca1 185static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
f7b7c26e 186{
d20deb64 187 struct perf_record *rec = arg;
33e49ea7
AK
188 int status;
189
d20deb64 190 if (rec->evlist->workload.pid > 0) {
33e49ea7 191 if (!child_finished)
d20deb64 192 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
193
194 wait(&status);
195 if (WIFSIGNALED(status))
d20deb64 196 psignal(WTERMSIG(status), rec->progname);
33e49ea7 197 }
933da83a 198
18483b81 199 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
200 return;
201
202 signal(signr, SIG_DFL);
203 kill(getpid(), signr);
de9ac07b
PZ
204}
205
a91e5431
ACM
206static bool perf_evlist__equal(struct perf_evlist *evlist,
207 struct perf_evlist *other)
208{
209 struct perf_evsel *pos, *pair;
210
211 if (evlist->nr_entries != other->nr_entries)
212 return false;
213
0c21f736 214 pair = perf_evlist__first(other);
a91e5431
ACM
215
216 list_for_each_entry(pos, &evlist->entries, node) {
217 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218 return false;
0c21f736 219 pair = perf_evsel__next(pair);
a91e5431
ACM
220 }
221
222 return true;
223}
224
8d3eca20 225static int perf_record__open(struct perf_record *rec)
dd7927f4 226{
6a4bb04c 227 struct perf_evsel *pos;
d20deb64
ACM
228 struct perf_evlist *evlist = rec->evlist;
229 struct perf_session *session = rec->session;
230 struct perf_record_opts *opts = &rec->opts;
8d3eca20 231 int rc = 0;
dd7927f4 232
f77a9518 233 perf_evlist__config(evlist, opts);
cac21425 234
dd7927f4
ACM
235 list_for_each_entry(pos, &evlist->entries, node) {
236 struct perf_event_attr *attr = &pos->attr;
dd7927f4 237try_again:
6a4bb04c 238 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
d6d901c2
ZY
239 int err = errno;
240
c286c419 241 if (err == EPERM || err == EACCES) {
b8631e6e 242 ui__error_paranoid();
8d3eca20
DA
243 rc = -err;
244 goto out;
bea03405 245 } else if (err == ENODEV && opts->target.cpu_list) {
8d3eca20
DA
246 pr_err("No such device - did you specify"
247 " an out-of-range profile CPU?\n");
248 rc = -err;
249 goto out;
d6d901c2 250 }
3da297a6 251
d6d901c2
ZY
252 /*
253 * If it's cycles then fall back to hrtimer
254 * based cpu-clock-tick sw counter, which
028d455b
DA
255 * is always available even if no PMU support.
256 *
257 * PPC returns ENXIO until 2.6.37 (behavior changed
258 * with commit b0a873e).
d6d901c2 259 */
028d455b
DA
260 if ((err == ENOENT || err == ENXIO)
261 && attr->type == PERF_TYPE_HARDWARE
d6d901c2
ZY
262 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
263
264 if (verbose)
ca6a4258
DA
265 ui__warning("The cycles event is not supported, "
266 "trying to fall back to cpu-clock-ticks\n");
d6d901c2
ZY
267 attr->type = PERF_TYPE_SOFTWARE;
268 attr->config = PERF_COUNT_SW_CPU_CLOCK;
d1cae34d
DA
269 if (pos->name) {
270 free(pos->name);
271 pos->name = NULL;
272 }
d6d901c2
ZY
273 goto try_again;
274 }
ca6a4258
DA
275
276 if (err == ENOENT) {
3780f488 277 ui__error("The %s event is not supported.\n",
7289f83c 278 perf_evsel__name(pos));
8d3eca20
DA
279 rc = -err;
280 goto out;
2305c82f
DA
281 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
282 ui__error("\'precise\' request may not be supported. "
283 "Try removing 'p' modifier\n");
284 rc = -err;
285 goto out;
ca6a4258
DA
286 }
287
d6d901c2 288 printf("\n");
1863fbbb
SE
289 error("sys_perf_event_open() syscall returned with %d "
290 "(%s) for event %s. /bin/dmesg may provide "
291 "additional information.\n",
292 err, strerror(err), perf_evsel__name(pos));
bfd45118
SK
293
294#if defined(__i386__) || defined(__x86_64__)
8d3eca20
DA
295 if (attr->type == PERF_TYPE_HARDWARE &&
296 err == EOPNOTSUPP) {
297 pr_err("No hardware sampling interrupt available."
298 " No APIC? If so then you can boot the kernel"
299 " with the \"lapic\" boot parameter to"
300 " force-enable it.\n");
301 rc = -err;
302 goto out;
303 }
bfd45118
SK
304#endif
305
8d3eca20
DA
306 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
307 rc = -err;
308 goto out;
c171b552
LZ
309 }
310 }
a43d3f08 311
1491a632 312 if (perf_evlist__apply_filters(evlist)) {
0a102479
FW
313 error("failed to set filter with %d (%s)\n", errno,
314 strerror(errno));
8d3eca20
DA
315 rc = -1;
316 goto out;
0a102479
FW
317 }
318
18e60939 319 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
8d3eca20
DA
320 if (errno == EPERM) {
321 pr_err("Permission error mapping pages.\n"
322 "Consider increasing "
323 "/proc/sys/kernel/perf_event_mlock_kb,\n"
324 "or try again with a smaller value of -m/--mmap_pages.\n"
325 "(current value: %d)\n", opts->mmap_pages);
326 rc = -errno;
0089fa98
JO
327 } else if (!is_power_of_2(opts->mmap_pages) &&
328 (opts->mmap_pages != UINT_MAX)) {
8d3eca20
DA
329 pr_err("--mmap_pages/-m value must be a power of two.");
330 rc = -EINVAL;
331 } else {
332 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
333 rc = -errno;
334 }
335 goto out;
18e60939 336 }
0a27d7f9 337
d20deb64 338 if (rec->file_new)
a91e5431
ACM
339 session->evlist = evlist;
340 else {
341 if (!perf_evlist__equal(session->evlist, evlist)) {
342 fprintf(stderr, "incompatible append\n");
8d3eca20
DA
343 rc = -1;
344 goto out;
a91e5431
ACM
345 }
346 }
347
7b56cce2 348 perf_session__set_id_hdr_size(session);
8d3eca20
DA
349out:
350 return rc;
16c8a109
PZ
351}
352
d20deb64 353static int process_buildids(struct perf_record *rec)
6122e4e4 354{
d20deb64 355 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 356
9f591fd7
ACM
357 if (size == 0)
358 return 0;
359
d20deb64
ACM
360 rec->session->fd = rec->output;
361 return __perf_session__process_events(rec->session, rec->post_processing_offset,
362 size - rec->post_processing_offset,
6122e4e4
ACM
363 size, &build_id__mark_dso_hit_ops);
364}
365
8d3eca20 366static void perf_record__exit(int status, void *arg)
f5970550 367{
d20deb64
ACM
368 struct perf_record *rec = arg;
369
8d3eca20
DA
370 if (status != 0)
371 return;
372
d20deb64
ACM
373 if (!rec->opts.pipe_output) {
374 rec->session->header.data_size += rec->bytes_written;
375
376 if (!rec->no_buildid)
377 process_buildids(rec);
378 perf_session__write_header(rec->session, rec->evlist,
379 rec->output, true);
380 perf_session__delete(rec->session);
381 perf_evlist__delete(rec->evlist);
d65a458b 382 symbol__exit();
c7929e47 383 }
f5970550
PZ
384}
385
8115d60c 386static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
387{
388 int err;
45694aa7 389 struct perf_tool *tool = data;
a1645ce1 390
23346f21 391 if (machine__is_host(machine))
a1645ce1
ZY
392 return;
393
394 /*
395 *As for guest kernel when processing subcommand record&report,
396 *we arrange module mmap prior to guest kernel mmap and trigger
397 *a preload dso because default guest module symbols are loaded
398 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
399 *method is used to avoid symbol missing when the first addr is
400 *in module instead of in guest kernel.
401 */
45694aa7 402 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 403 machine);
a1645ce1
ZY
404 if (err < 0)
405 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 406 " relocation symbol.\n", machine->pid);
a1645ce1 407
a1645ce1
ZY
408 /*
409 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
410 * have no _text sometimes.
411 */
45694aa7 412 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 413 machine, "_text");
a1645ce1 414 if (err < 0)
45694aa7 415 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 416 machine, "_stext");
a1645ce1
ZY
417 if (err < 0)
418 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 419 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
420}
421
98402807
FW
422static struct perf_event_header finished_round_event = {
423 .size = sizeof(struct perf_event_header),
424 .type = PERF_RECORD_FINISHED_ROUND,
425};
426
8d3eca20 427static int perf_record__mmap_read_all(struct perf_record *rec)
98402807 428{
0e2e63dd 429 int i;
8d3eca20 430 int rc = 0;
98402807 431
d20deb64 432 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
8d3eca20
DA
433 if (rec->evlist->mmap[i].base) {
434 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
435 rc = -1;
436 goto out;
437 }
438 }
98402807
FW
439 }
440
2eeaaa09 441 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
8d3eca20
DA
442 rc = write_output(rec, &finished_round_event,
443 sizeof(finished_round_event));
444
445out:
446 return rc;
98402807
FW
447}
448
d20deb64 449static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 450{
abaff32a 451 struct stat st;
abaff32a 452 int flags;
781ba9d2 453 int err, output, feat;
8b412664 454 unsigned long waking = 0;
46be604b 455 const bool forks = argc > 0;
23346f21 456 struct machine *machine;
45694aa7 457 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
458 struct perf_record_opts *opts = &rec->opts;
459 struct perf_evlist *evsel_list = rec->evlist;
460 const char *output_name = rec->output_name;
461 struct perf_session *session;
2711926a 462 bool disabled = false;
de9ac07b 463
d20deb64 464 rec->progname = argv[0];
33e49ea7 465
d20deb64 466 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 467
d20deb64 468 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
469 signal(SIGCHLD, sig_handler);
470 signal(SIGINT, sig_handler);
18483b81 471 signal(SIGUSR1, sig_handler);
f5970550 472
d7065adb
FBH
473 if (!output_name) {
474 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 475 opts->pipe_output = true;
d7065adb 476 else
d20deb64 477 rec->output_name = output_name = "perf.data";
d7065adb
FBH
478 }
479 if (output_name) {
480 if (!strcmp(output_name, "-"))
d20deb64 481 opts->pipe_output = true;
d7065adb 482 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 483 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
484 char oldname[PATH_MAX];
485 snprintf(oldname, sizeof(oldname), "%s.old",
486 output_name);
487 unlink(oldname);
488 rename(output_name, oldname);
489 }
d20deb64
ACM
490 } else if (rec->write_mode == WRITE_APPEND) {
491 rec->write_mode = WRITE_FORCE;
266e0e21 492 }
97124d5e
PZ
493 }
494
f887f301 495 flags = O_CREAT|O_RDWR;
d20deb64
ACM
496 if (rec->write_mode == WRITE_APPEND)
497 rec->file_new = 0;
abaff32a
IM
498 else
499 flags |= O_TRUNC;
500
d20deb64 501 if (opts->pipe_output)
529870e3
TZ
502 output = STDOUT_FILENO;
503 else
504 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
505 if (output < 0) {
506 perror("failed to create output file");
8d3eca20 507 return -1;
de9ac07b
PZ
508 }
509
d20deb64
ACM
510 rec->output = output;
511
7865e817 512 session = perf_session__new(output_name, O_WRONLY,
d20deb64 513 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 514 if (session == NULL) {
a9a70bbc
ACM
515 pr_err("Not enough memory for reading perf file header\n");
516 return -1;
517 }
518
d20deb64
ACM
519 rec->session = session;
520
781ba9d2
RR
521 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
522 perf_header__set_feat(&session->header, feat);
523
524 if (rec->no_buildid)
525 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
526
527 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 528 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 529
330aa675
SE
530 if (!rec->opts.branch_stack)
531 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
532
d20deb64 533 if (!rec->file_new) {
a91e5431 534 err = perf_session__read_header(session, output);
4dc0a04b 535 if (err < 0)
39d17dac 536 goto out_delete_session;
4dc0a04b
ACM
537 }
538
d4db3f16 539 if (forks) {
d20deb64 540 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
541 if (err < 0) {
542 pr_err("Couldn't run the workload!\n");
543 goto out_delete_session;
856e9660 544 }
856e9660
PZ
545 }
546
8d3eca20
DA
547 if (perf_record__open(rec) != 0) {
548 err = -1;
549 goto out_delete_session;
550 }
de9ac07b 551
712a4b60 552 /*
d20deb64 553 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 554 */
d20deb64 555 on_exit(perf_record__exit, rec);
712a4b60 556
d20deb64 557 if (opts->pipe_output) {
529870e3
TZ
558 err = perf_header__write_pipe(output);
559 if (err < 0)
8d3eca20 560 goto out_delete_session;
d20deb64 561 } else if (rec->file_new) {
a91e5431
ACM
562 err = perf_session__write_header(session, evsel_list,
563 output, false);
d5eed904 564 if (err < 0)
8d3eca20 565 goto out_delete_session;
56b03f3c
ACM
566 }
567
d3665498 568 if (!rec->no_buildid
e20960c0 569 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 570 pr_err("Couldn't generate buildids. "
e20960c0 571 "Use --no-buildid to profile anyway.\n");
8d3eca20
DA
572 err = -1;
573 goto out_delete_session;
e20960c0
RR
574 }
575
d20deb64 576 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 577
743eb868
ACM
578 machine = perf_session__find_host_machine(session);
579 if (!machine) {
580 pr_err("Couldn't find native kernel information.\n");
8d3eca20
DA
581 err = -1;
582 goto out_delete_session;
743eb868
ACM
583 }
584
d20deb64 585 if (opts->pipe_output) {
45694aa7 586 err = perf_event__synthesize_attrs(tool, session,
d20deb64 587 process_synthesized_event);
2c46dbb5
TZ
588 if (err < 0) {
589 pr_err("Couldn't synthesize attrs.\n");
8d3eca20 590 goto out_delete_session;
2c46dbb5 591 }
cd19a035 592
45694aa7 593 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 594 machine);
cd19a035
TZ
595 if (err < 0) {
596 pr_err("Couldn't synthesize event_types.\n");
8d3eca20 597 goto out_delete_session;
cd19a035 598 }
9215545e 599
361c99a6 600 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
601 /*
602 * FIXME err <= 0 here actually means that
603 * there were no tracepoints so its not really
604 * an error, just that we don't need to
605 * synthesize anything. We really have to
606 * return this more properly and also
607 * propagate errors that now are calling die()
608 */
45694aa7 609 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 610 process_synthesized_event);
63e0c771
TZ
611 if (err <= 0) {
612 pr_err("Couldn't record tracing data.\n");
8d3eca20 613 goto out_delete_session;
63e0c771 614 }
d20deb64 615 advance_output(rec, err);
63e0c771 616 }
2c46dbb5
TZ
617 }
618
45694aa7 619 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 620 machine, "_text");
70162138 621 if (err < 0)
45694aa7 622 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 623 machine, "_stext");
c1a3a4b9
ACM
624 if (err < 0)
625 pr_err("Couldn't record kernel reference relocation symbol\n"
626 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
627 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 628
45694aa7 629 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 630 machine);
c1a3a4b9
ACM
631 if (err < 0)
632 pr_err("Couldn't record kernel module information.\n"
633 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
634 "Check /proc/modules permission or run as root.\n");
635
a1645ce1 636 if (perf_guest)
45694aa7 637 perf_session__process_machines(session, tool,
8115d60c 638 perf_event__synthesize_guest_os);
7c6a1c65 639
bea03405 640 if (!opts->target.system_wide)
8d3eca20 641 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 642 process_synthesized_event,
743eb868 643 machine);
234fbbf5 644 else
8d3eca20 645 err = perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 646 machine);
7c6a1c65 647
8d3eca20
DA
648 if (err != 0)
649 goto out_delete_session;
650
d20deb64 651 if (rec->realtime_prio) {
de9ac07b
PZ
652 struct sched_param param;
653
d20deb64 654 param.sched_priority = rec->realtime_prio;
de9ac07b 655 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 656 pr_err("Could not set realtime priority.\n");
8d3eca20
DA
657 err = -1;
658 goto out_delete_session;
de9ac07b
PZ
659 }
660 }
661
774cb499
JO
662 /*
663 * When perf is starting the traced process, all the events
664 * (apart from group members) have enable_on_exec=1 set,
665 * so don't spoil it by prematurely enabling them.
666 */
667 if (!perf_target__none(&opts->target))
668 perf_evlist__enable(evsel_list);
764e16a3 669
856e9660
PZ
670 /*
671 * Let the child rip
672 */
d4db3f16 673 if (forks)
35b9d88e 674 perf_evlist__start_workload(evsel_list);
856e9660 675
649c48a9 676 for (;;) {
d20deb64 677 int hits = rec->samples;
de9ac07b 678
8d3eca20
DA
679 if (perf_record__mmap_read_all(rec) < 0) {
680 err = -1;
681 goto out_delete_session;
682 }
de9ac07b 683
d20deb64 684 if (hits == rec->samples) {
649c48a9
PZ
685 if (done)
686 break;
5c581041 687 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
688 waking++;
689 }
690
774cb499
JO
691 /*
692 * When perf is starting the traced process, at the end events
693 * die with the process and we wait for that. Thus no need to
694 * disable events in this case.
695 */
2711926a 696 if (done && !disabled && !perf_target__none(&opts->target)) {
4152ab37 697 perf_evlist__disable(evsel_list);
2711926a
JO
698 disabled = true;
699 }
de9ac07b
PZ
700 }
701
18483b81 702 if (quiet || signr == SIGUSR1)
b44308f5
ACM
703 return 0;
704
8b412664
PZ
705 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
706
021e9f47
IM
707 /*
708 * Approximate RIP event size: 24 bytes.
709 */
710 fprintf(stderr,
9486aa38 711 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 712 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 713 output_name,
d20deb64 714 rec->bytes_written / 24);
addc2785 715
de9ac07b 716 return 0;
39d17dac
ACM
717
718out_delete_session:
719 perf_session__delete(session);
720 return err;
de9ac07b 721}
0e9b20b8 722
bdfebd84
RAV
723#define BRANCH_OPT(n, m) \
724 { .name = n, .mode = (m) }
725
726#define BRANCH_END { .name = NULL }
727
728struct branch_mode {
729 const char *name;
730 int mode;
731};
732
733static const struct branch_mode branch_modes[] = {
734 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
735 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
736 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
737 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
738 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
739 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
740 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
741 BRANCH_END
742};
743
744static int
a5aabdac 745parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
746{
747#define ONLY_PLM \
748 (PERF_SAMPLE_BRANCH_USER |\
749 PERF_SAMPLE_BRANCH_KERNEL |\
750 PERF_SAMPLE_BRANCH_HV)
751
752 uint64_t *mode = (uint64_t *)opt->value;
753 const struct branch_mode *br;
a5aabdac 754 char *s, *os = NULL, *p;
bdfebd84
RAV
755 int ret = -1;
756
a5aabdac
SE
757 if (unset)
758 return 0;
bdfebd84 759
a5aabdac
SE
760 /*
761 * cannot set it twice, -b + --branch-filter for instance
762 */
763 if (*mode)
bdfebd84
RAV
764 return -1;
765
a5aabdac
SE
766 /* str may be NULL in case no arg is passed to -b */
767 if (str) {
768 /* because str is read-only */
769 s = os = strdup(str);
770 if (!s)
771 return -1;
772
773 for (;;) {
774 p = strchr(s, ',');
775 if (p)
776 *p = '\0';
777
778 for (br = branch_modes; br->name; br++) {
779 if (!strcasecmp(s, br->name))
780 break;
781 }
782 if (!br->name) {
783 ui__warning("unknown branch filter %s,"
784 " check man page\n", s);
785 goto error;
786 }
bdfebd84 787
a5aabdac 788 *mode |= br->mode;
bdfebd84 789
a5aabdac
SE
790 if (!p)
791 break;
bdfebd84 792
a5aabdac
SE
793 s = p + 1;
794 }
bdfebd84
RAV
795 }
796 ret = 0;
797
a5aabdac 798 /* default to any branch */
bdfebd84 799 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 800 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
801 }
802error:
803 free(os);
804 return ret;
805}
806
95485b1c 807#ifdef LIBUNWIND_SUPPORT
26d33022
JO
808static int get_stack_size(char *str, unsigned long *_size)
809{
810 char *endptr;
811 unsigned long size;
812 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
813
814 size = strtoul(str, &endptr, 0);
815
816 do {
817 if (*endptr)
818 break;
819
820 size = round_up(size, sizeof(u64));
821 if (!size || size > max_size)
822 break;
823
824 *_size = size;
825 return 0;
826
827 } while (0);
828
829 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
830 max_size, str);
831 return -1;
832}
95485b1c 833#endif /* LIBUNWIND_SUPPORT */
26d33022 834
75d9a108
ACM
835int record_parse_callchain_opt(const struct option *opt,
836 const char *arg, int unset)
26d33022 837{
c5ff78c3 838 struct perf_record_opts *opts = opt->value;
26d33022
JO
839 char *tok, *name, *saveptr = NULL;
840 char *buf;
841 int ret = -1;
842
843 /* --no-call-graph */
844 if (unset)
845 return 0;
846
847 /* We specified default option if none is provided. */
848 BUG_ON(!arg);
849
850 /* We need buffer that we know we can write to. */
851 buf = malloc(strlen(arg) + 1);
852 if (!buf)
853 return -ENOMEM;
854
855 strcpy(buf, arg);
856
857 tok = strtok_r((char *)buf, ",", &saveptr);
858 name = tok ? : (char *)buf;
859
860 do {
861 /* Framepointer style */
862 if (!strncmp(name, "fp", sizeof("fp"))) {
863 if (!strtok_r(NULL, ",", &saveptr)) {
c5ff78c3 864 opts->call_graph = CALLCHAIN_FP;
26d33022
JO
865 ret = 0;
866 } else
867 pr_err("callchain: No more arguments "
868 "needed for -g fp\n");
869 break;
870
95485b1c 871#ifdef LIBUNWIND_SUPPORT
26d33022
JO
872 /* Dwarf style */
873 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
61eaa3be
ACM
874 const unsigned long default_stack_dump_size = 8192;
875
26d33022 876 ret = 0;
c5ff78c3
ACM
877 opts->call_graph = CALLCHAIN_DWARF;
878 opts->stack_dump_size = default_stack_dump_size;
26d33022
JO
879
880 tok = strtok_r(NULL, ",", &saveptr);
881 if (tok) {
882 unsigned long size = 0;
883
884 ret = get_stack_size(tok, &size);
c5ff78c3 885 opts->stack_dump_size = size;
26d33022
JO
886 }
887
888 if (!ret)
889 pr_debug("callchain: stack dump size %d\n",
c5ff78c3 890 opts->stack_dump_size);
95485b1c 891#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
892 } else {
893 pr_err("callchain: Unknown -g option "
894 "value: %s\n", arg);
895 break;
896 }
897
898 } while (0);
899
900 free(buf);
901
902 if (!ret)
c5ff78c3 903 pr_debug("callchain: type %d\n", opts->call_graph);
26d33022
JO
904
905 return ret;
906}
907
0e9b20b8 908static const char * const record_usage[] = {
9e096753
MG
909 "perf record [<options>] [<command>]",
910 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
911 NULL
912};
913
d20deb64
ACM
914/*
915 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
916 * because we need to have access to it in perf_record__exit, that is called
917 * after cmd_record() exits, but since record_options need to be accessible to
918 * builtin-script, leave it here.
919 *
920 * At least we don't ouch it in all the other functions here directly.
921 *
922 * Just say no to tons of global variables, sigh.
923 */
924static struct perf_record record = {
925 .opts = {
d20deb64
ACM
926 .mmap_pages = UINT_MAX,
927 .user_freq = UINT_MAX,
928 .user_interval = ULLONG_MAX,
447a6013 929 .freq = 4000,
d1cb9fce
NK
930 .target = {
931 .uses_mmap = true,
932 },
d20deb64
ACM
933 },
934 .write_mode = WRITE_FORCE,
935 .file_new = true,
936};
7865e817 937
61eaa3be
ACM
938#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
939
940#ifdef LIBUNWIND_SUPPORT
75d9a108 941const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
61eaa3be 942#else
75d9a108 943const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
61eaa3be
ACM
944#endif
945
d20deb64
ACM
946/*
947 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
948 * with it and switch to use the library functions in perf_evlist that came
949 * from builtin-record.c, i.e. use perf_record_opts,
950 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
951 * using pipes, etc.
952 */
bca647aa 953const struct option record_options[] = {
d20deb64 954 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 955 "event selector. use 'perf list' to list available events",
f120f9d5 956 parse_events_option),
d20deb64 957 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 958 "event filter", parse_filter),
bea03405 959 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 960 "record events on existing process id"),
bea03405 961 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 962 "record events on existing thread id"),
d20deb64 963 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 964 "collect data with this RT SCHED_FIFO priority"),
d20deb64 965 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 966 "collect data without buffering"),
d20deb64 967 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 968 "collect raw sample records from all opened counters"),
bea03405 969 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 970 "system-wide collection from all CPUs"),
d20deb64 971 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 972 "append to the output file to do incremental profiling"),
bea03405 973 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 974 "list of cpus to monitor"),
d20deb64 975 OPT_BOOLEAN('f', "force", &record.force,
7865e817 976 "overwrite existing data file (deprecated)"),
d20deb64
ACM
977 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
978 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 979 "output file name"),
d20deb64 980 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 981 "child tasks do not inherit counters"),
d20deb64
ACM
982 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
983 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 984 "number of mmap data pages"),
d20deb64 985 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 986 "put the counters into a counter group"),
c5ff78c3 987 OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
75d9a108
ACM
988 "mode[,dump_size]", record_callchain_help,
989 &record_parse_callchain_opt, "fp"),
c0555642 990 OPT_INCR('v', "verbose", &verbose,
3da297a6 991 "be more verbose (show counter open errors, etc)"),
b44308f5 992 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 993 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 994 "per thread counts"),
d20deb64 995 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 996 "Sample addresses"),
d20deb64 997 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 998 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 999 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 1000 "don't sample"),
d20deb64 1001 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 1002 "do not update the buildid cache"),
d20deb64 1003 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 1004 "do not collect buildids in perf.data"),
d20deb64 1005 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
1006 "monitor event in cgroup name only",
1007 parse_cgroups),
bea03405
NK
1008 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1009 "user to profile"),
a5aabdac
SE
1010
1011 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1012 "branch any", "sample any taken branches",
1013 parse_branch_stack),
1014
1015 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1016 "branch filter mask", "branch stack filter modes",
bdfebd84 1017 parse_branch_stack),
0e9b20b8
IM
1018 OPT_END()
1019};
1020
1d037ca1 1021int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
0e9b20b8 1022{
69aad6f1
ACM
1023 int err = -ENOMEM;
1024 struct perf_evsel *pos;
d20deb64
ACM
1025 struct perf_evlist *evsel_list;
1026 struct perf_record *rec = &record;
16ad2ffb 1027 char errbuf[BUFSIZ];
0e9b20b8 1028
7e2ed097 1029 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
1030 if (evsel_list == NULL)
1031 return -ENOMEM;
1032
d20deb64
ACM
1033 rec->evlist = evsel_list;
1034
bca647aa 1035 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 1036 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 1037 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 1038 usage_with_options(record_usage, record_options);
0e9b20b8 1039
d20deb64 1040 if (rec->force && rec->append_file) {
3780f488
NK
1041 ui__error("Can't overwrite and append at the same time."
1042 " You need to choose between -f and -A");
bca647aa 1043 usage_with_options(record_usage, record_options);
d20deb64
ACM
1044 } else if (rec->append_file) {
1045 rec->write_mode = WRITE_APPEND;
7865e817 1046 } else {
d20deb64 1047 rec->write_mode = WRITE_FORCE;
7865e817
FW
1048 }
1049
bea03405 1050 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
1051 ui__error("cgroup monitoring only available in"
1052 " system-wide mode\n");
023695d9
SE
1053 usage_with_options(record_usage, record_options);
1054 }
1055
655000e7 1056 symbol__init();
baa2f6ce 1057
ec80fde7 1058 if (symbol_conf.kptr_restrict)
646aaea6
ACM
1059 pr_warning(
1060"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1061"check /proc/sys/kernel/kptr_restrict.\n\n"
1062"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1063"file is not found in the buildid cache or in the vmlinux path.\n\n"
1064"Samples in kernel modules won't be resolved at all.\n\n"
1065"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1066"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 1067
d20deb64 1068 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 1069 disable_buildid_cache();
655000e7 1070
361c99a6
ACM
1071 if (evsel_list->nr_entries == 0 &&
1072 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
1073 pr_err("Not enough memory for event selector list\n");
1074 goto out_symbol_exit;
bbd36e5e 1075 }
0e9b20b8 1076
16ad2ffb
NK
1077 err = perf_target__validate(&rec->opts.target);
1078 if (err) {
1079 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1080 ui__warning("%s", errbuf);
1081 }
1082
1083 err = perf_target__parse_uid(&rec->opts.target);
1084 if (err) {
1085 int saved_errno = errno;
4bd0f2d2 1086
16ad2ffb 1087 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1088 ui__error("%s", errbuf);
16ad2ffb
NK
1089
1090 err = -saved_errno;
0d37aa34 1091 goto out_free_fd;
16ad2ffb 1092 }
0d37aa34 1093
16ad2ffb 1094 err = -ENOMEM;
b809ac10 1095 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 1096 usage_with_options(record_usage, record_options);
69aad6f1 1097
361c99a6 1098 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 1099 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 1100 goto out_free_fd;
d6d901c2 1101 }
5c581041 1102
d20deb64
ACM
1103 if (rec->opts.user_interval != ULLONG_MAX)
1104 rec->opts.default_interval = rec->opts.user_interval;
1105 if (rec->opts.user_freq != UINT_MAX)
1106 rec->opts.freq = rec->opts.user_freq;
f9212819 1107
7e4ff9e3
MG
1108 /*
1109 * User specified count overrides default frequency.
1110 */
d20deb64
ACM
1111 if (rec->opts.default_interval)
1112 rec->opts.freq = 0;
1113 else if (rec->opts.freq) {
1114 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 1115 } else {
3780f488 1116 ui__error("frequency and count are zero, aborting\n");
39d17dac 1117 err = -EINVAL;
5c581041 1118 goto out_free_fd;
7e4ff9e3
MG
1119 }
1120
d20deb64 1121 err = __cmd_record(&record, argc, argv);
39d17dac 1122out_free_fd:
7e2ed097 1123 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
1124out_symbol_exit:
1125 symbol__exit();
39d17dac 1126 return err;
0e9b20b8 1127}