perf tools: Use sscanf for parsing /proc/pid/maps
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
78da39fa
BR
34#ifndef HAVE_ON_EXIT
35#ifndef ATEXIT_MAX
36#define ATEXIT_MAX 32
37#endif
38static int __on_exit_count = 0;
39typedef void (*on_exit_func_t) (int, void *);
40static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41static void *__on_exit_args[ATEXIT_MAX];
42static int __exitcode = 0;
43static void __handle_on_exit_funcs(void);
44static int on_exit(on_exit_func_t function, void *arg);
45#define exit(x) (exit)(__exitcode = (x))
46
47static int on_exit(on_exit_func_t function, void *arg)
48{
49 if (__on_exit_count == ATEXIT_MAX)
50 return -ENOMEM;
51 else if (__on_exit_count == 0)
52 atexit(__handle_on_exit_funcs);
53 __on_exit_funcs[__on_exit_count] = function;
54 __on_exit_args[__on_exit_count++] = arg;
55 return 0;
56}
57
58static void __handle_on_exit_funcs(void)
59{
60 int i;
61 for (i = 0; i < __on_exit_count; i++)
62 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63}
64#endif
65
7865e817
FW
66enum write_mode_t {
67 WRITE_FORCE,
68 WRITE_APPEND
69};
70
d20deb64 71struct perf_record {
45694aa7 72 struct perf_tool tool;
d20deb64
ACM
73 struct perf_record_opts opts;
74 u64 bytes_written;
75 const char *output_name;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
78 const char *progname;
79 int output;
80 unsigned int page_size;
81 int realtime_prio;
82 enum write_mode_t write_mode;
83 bool no_buildid;
84 bool no_buildid_cache;
85 bool force;
86 bool file_new;
87 bool append_file;
88 long samples;
89 off_t post_processing_offset;
0f82ebc4 90};
a21ca2ca 91
d20deb64 92static void advance_output(struct perf_record *rec, size_t size)
9215545e 93{
d20deb64 94 rec->bytes_written += size;
9215545e
TZ
95}
96
8d3eca20 97static int write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
98{
99 while (size) {
d20deb64 100 int ret = write(rec->output, buf, size);
f5970550 101
8d3eca20
DA
102 if (ret < 0) {
103 pr_err("failed to write\n");
104 return -1;
105 }
f5970550
PZ
106
107 size -= ret;
108 buf += ret;
109
d20deb64 110 rec->bytes_written += ret;
f5970550 111 }
8d3eca20
DA
112
113 return 0;
f5970550
PZ
114}
115
45694aa7 116static int process_synthesized_event(struct perf_tool *tool,
d20deb64 117 union perf_event *event,
1d037ca1
IT
118 struct perf_sample *sample __maybe_unused,
119 struct machine *machine __maybe_unused)
234fbbf5 120{
45694aa7 121 struct perf_record *rec = container_of(tool, struct perf_record, tool);
8d3eca20
DA
122 if (write_output(rec, event, event->header.size) < 0)
123 return -1;
124
234fbbf5
ACM
125 return 0;
126}
127
8d3eca20 128static int perf_record__mmap_read(struct perf_record *rec,
d20deb64 129 struct perf_mmap *md)
de9ac07b 130{
744bd8aa 131 unsigned int head = perf_mmap__read_head(md);
de9ac07b 132 unsigned int old = md->prev;
d20deb64 133 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
134 unsigned long size;
135 void *buf;
8d3eca20 136 int rc = 0;
de9ac07b 137
dc82009a 138 if (old == head)
8d3eca20 139 return 0;
dc82009a 140
d20deb64 141 rec->samples++;
de9ac07b
PZ
142
143 size = head - old;
144
145 if ((old & md->mask) + size != (head & md->mask)) {
146 buf = &data[old & md->mask];
147 size = md->mask + 1 - (old & md->mask);
148 old += size;
021e9f47 149
8d3eca20
DA
150 if (write_output(rec, buf, size) < 0) {
151 rc = -1;
152 goto out;
153 }
de9ac07b
PZ
154 }
155
156 buf = &data[old & md->mask];
157 size = head - old;
158 old += size;
021e9f47 159
8d3eca20
DA
160 if (write_output(rec, buf, size) < 0) {
161 rc = -1;
162 goto out;
163 }
de9ac07b
PZ
164
165 md->prev = old;
115d2d89 166 perf_mmap__write_tail(md, old);
8d3eca20
DA
167
168out:
169 return rc;
de9ac07b
PZ
170}
171
172static volatile int done = 0;
f7b7c26e 173static volatile int signr = -1;
33e49ea7 174static volatile int child_finished = 0;
de9ac07b 175
16c8a109 176static void sig_handler(int sig)
de9ac07b 177{
33e49ea7
AK
178 if (sig == SIGCHLD)
179 child_finished = 1;
180
16c8a109 181 done = 1;
f7b7c26e
PZ
182 signr = sig;
183}
184
1d037ca1 185static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
f7b7c26e 186{
d20deb64 187 struct perf_record *rec = arg;
33e49ea7
AK
188 int status;
189
d20deb64 190 if (rec->evlist->workload.pid > 0) {
33e49ea7 191 if (!child_finished)
d20deb64 192 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
193
194 wait(&status);
195 if (WIFSIGNALED(status))
d20deb64 196 psignal(WTERMSIG(status), rec->progname);
33e49ea7 197 }
933da83a 198
18483b81 199 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
200 return;
201
202 signal(signr, SIG_DFL);
203 kill(getpid(), signr);
de9ac07b
PZ
204}
205
a91e5431
ACM
206static bool perf_evlist__equal(struct perf_evlist *evlist,
207 struct perf_evlist *other)
208{
209 struct perf_evsel *pos, *pair;
210
211 if (evlist->nr_entries != other->nr_entries)
212 return false;
213
0c21f736 214 pair = perf_evlist__first(other);
a91e5431
ACM
215
216 list_for_each_entry(pos, &evlist->entries, node) {
217 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218 return false;
0c21f736 219 pair = perf_evsel__next(pair);
a91e5431
ACM
220 }
221
222 return true;
223}
224
8d3eca20 225static int perf_record__open(struct perf_record *rec)
dd7927f4 226{
6a4bb04c 227 struct perf_evsel *pos;
d20deb64
ACM
228 struct perf_evlist *evlist = rec->evlist;
229 struct perf_session *session = rec->session;
230 struct perf_record_opts *opts = &rec->opts;
8d3eca20 231 int rc = 0;
dd7927f4 232
d20deb64 233 perf_evlist__config_attrs(evlist, opts);
0f82ebc4 234
6a4bb04c 235 if (opts->group)
63dab225 236 perf_evlist__set_leader(evlist);
6a4bb04c 237
dd7927f4
ACM
238 list_for_each_entry(pos, &evlist->entries, node) {
239 struct perf_event_attr *attr = &pos->attr;
240 /*
241 * Check if parse_single_tracepoint_event has already asked for
242 * PERF_SAMPLE_TIME.
243 *
244 * XXX this is kludgy but short term fix for problems introduced by
245 * eac23d1c that broke 'perf script' by having different sample_types
246 * when using multiple tracepoint events when we use a perf binary
247 * that tries to use sample_id_all on an older kernel.
248 *
249 * We need to move counter creation to perf_session, support
250 * different sample_types, etc.
251 */
252 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
d6d901c2 253
0c978128
ACM
254fallback_missing_features:
255 if (opts->exclude_guest_missing)
256 attr->exclude_guest = attr->exclude_host = 0;
dd7927f4 257retry_sample_id:
808e1226 258 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
dd7927f4 259try_again:
6a4bb04c 260 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
d6d901c2
ZY
261 int err = errno;
262
c286c419 263 if (err == EPERM || err == EACCES) {
b8631e6e 264 ui__error_paranoid();
8d3eca20
DA
265 rc = -err;
266 goto out;
bea03405 267 } else if (err == ENODEV && opts->target.cpu_list) {
8d3eca20
DA
268 pr_err("No such device - did you specify"
269 " an out-of-range profile CPU?\n");
270 rc = -err;
271 goto out;
0c978128
ACM
272 } else if (err == EINVAL) {
273 if (!opts->exclude_guest_missing &&
274 (attr->exclude_guest || attr->exclude_host)) {
275 pr_debug("Old kernel, cannot exclude "
276 "guest or host samples.\n");
277 opts->exclude_guest_missing = true;
278 goto fallback_missing_features;
808e1226 279 } else if (!opts->sample_id_all_missing) {
0c978128
ACM
280 /*
281 * Old kernel, no attr->sample_id_type_all field
282 */
808e1226 283 opts->sample_id_all_missing = true;
0c978128
ACM
284 if (!opts->sample_time && !opts->raw_samples && !time_needed)
285 attr->sample_type &= ~PERF_SAMPLE_TIME;
286
287 goto retry_sample_id;
288 }
d6d901c2 289 }
3da297a6 290
d6d901c2
ZY
291 /*
292 * If it's cycles then fall back to hrtimer
293 * based cpu-clock-tick sw counter, which
028d455b
DA
294 * is always available even if no PMU support.
295 *
296 * PPC returns ENXIO until 2.6.37 (behavior changed
297 * with commit b0a873e).
d6d901c2 298 */
028d455b
DA
299 if ((err == ENOENT || err == ENXIO)
300 && attr->type == PERF_TYPE_HARDWARE
d6d901c2
ZY
301 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
302
303 if (verbose)
ca6a4258
DA
304 ui__warning("The cycles event is not supported, "
305 "trying to fall back to cpu-clock-ticks\n");
d6d901c2
ZY
306 attr->type = PERF_TYPE_SOFTWARE;
307 attr->config = PERF_COUNT_SW_CPU_CLOCK;
d1cae34d
DA
308 if (pos->name) {
309 free(pos->name);
310 pos->name = NULL;
311 }
d6d901c2
ZY
312 goto try_again;
313 }
ca6a4258
DA
314
315 if (err == ENOENT) {
3780f488 316 ui__error("The %s event is not supported.\n",
7289f83c 317 perf_evsel__name(pos));
8d3eca20
DA
318 rc = -err;
319 goto out;
2305c82f
DA
320 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
321 ui__error("\'precise\' request may not be supported. "
322 "Try removing 'p' modifier\n");
323 rc = -err;
324 goto out;
ca6a4258
DA
325 }
326
d6d901c2 327 printf("\n");
1863fbbb
SE
328 error("sys_perf_event_open() syscall returned with %d "
329 "(%s) for event %s. /bin/dmesg may provide "
330 "additional information.\n",
331 err, strerror(err), perf_evsel__name(pos));
bfd45118
SK
332
333#if defined(__i386__) || defined(__x86_64__)
8d3eca20
DA
334 if (attr->type == PERF_TYPE_HARDWARE &&
335 err == EOPNOTSUPP) {
336 pr_err("No hardware sampling interrupt available."
337 " No APIC? If so then you can boot the kernel"
338 " with the \"lapic\" boot parameter to"
339 " force-enable it.\n");
340 rc = -err;
341 goto out;
342 }
bfd45118
SK
343#endif
344
8d3eca20
DA
345 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
346 rc = -err;
347 goto out;
c171b552
LZ
348 }
349 }
a43d3f08 350
1491a632 351 if (perf_evlist__apply_filters(evlist)) {
0a102479
FW
352 error("failed to set filter with %d (%s)\n", errno,
353 strerror(errno));
8d3eca20
DA
354 rc = -1;
355 goto out;
0a102479
FW
356 }
357
18e60939 358 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
8d3eca20
DA
359 if (errno == EPERM) {
360 pr_err("Permission error mapping pages.\n"
361 "Consider increasing "
362 "/proc/sys/kernel/perf_event_mlock_kb,\n"
363 "or try again with a smaller value of -m/--mmap_pages.\n"
364 "(current value: %d)\n", opts->mmap_pages);
365 rc = -errno;
0089fa98
JO
366 } else if (!is_power_of_2(opts->mmap_pages) &&
367 (opts->mmap_pages != UINT_MAX)) {
8d3eca20
DA
368 pr_err("--mmap_pages/-m value must be a power of two.");
369 rc = -EINVAL;
370 } else {
371 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
372 rc = -errno;
373 }
374 goto out;
18e60939 375 }
0a27d7f9 376
d20deb64 377 if (rec->file_new)
a91e5431
ACM
378 session->evlist = evlist;
379 else {
380 if (!perf_evlist__equal(session->evlist, evlist)) {
381 fprintf(stderr, "incompatible append\n");
8d3eca20
DA
382 rc = -1;
383 goto out;
a91e5431
ACM
384 }
385 }
386
7b56cce2 387 perf_session__set_id_hdr_size(session);
8d3eca20
DA
388out:
389 return rc;
16c8a109
PZ
390}
391
d20deb64 392static int process_buildids(struct perf_record *rec)
6122e4e4 393{
d20deb64 394 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 395
9f591fd7
ACM
396 if (size == 0)
397 return 0;
398
d20deb64
ACM
399 rec->session->fd = rec->output;
400 return __perf_session__process_events(rec->session, rec->post_processing_offset,
401 size - rec->post_processing_offset,
6122e4e4
ACM
402 size, &build_id__mark_dso_hit_ops);
403}
404
8d3eca20 405static void perf_record__exit(int status, void *arg)
f5970550 406{
d20deb64
ACM
407 struct perf_record *rec = arg;
408
8d3eca20
DA
409 if (status != 0)
410 return;
411
d20deb64
ACM
412 if (!rec->opts.pipe_output) {
413 rec->session->header.data_size += rec->bytes_written;
414
415 if (!rec->no_buildid)
416 process_buildids(rec);
417 perf_session__write_header(rec->session, rec->evlist,
418 rec->output, true);
419 perf_session__delete(rec->session);
420 perf_evlist__delete(rec->evlist);
d65a458b 421 symbol__exit();
c7929e47 422 }
f5970550
PZ
423}
424
8115d60c 425static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
426{
427 int err;
45694aa7 428 struct perf_tool *tool = data;
a1645ce1 429
23346f21 430 if (machine__is_host(machine))
a1645ce1
ZY
431 return;
432
433 /*
434 *As for guest kernel when processing subcommand record&report,
435 *we arrange module mmap prior to guest kernel mmap and trigger
436 *a preload dso because default guest module symbols are loaded
437 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
438 *method is used to avoid symbol missing when the first addr is
439 *in module instead of in guest kernel.
440 */
45694aa7 441 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 442 machine);
a1645ce1
ZY
443 if (err < 0)
444 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 445 " relocation symbol.\n", machine->pid);
a1645ce1 446
a1645ce1
ZY
447 /*
448 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
449 * have no _text sometimes.
450 */
45694aa7 451 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 452 machine, "_text");
a1645ce1 453 if (err < 0)
45694aa7 454 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 455 machine, "_stext");
a1645ce1
ZY
456 if (err < 0)
457 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 458 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
459}
460
98402807
FW
461static struct perf_event_header finished_round_event = {
462 .size = sizeof(struct perf_event_header),
463 .type = PERF_RECORD_FINISHED_ROUND,
464};
465
8d3eca20 466static int perf_record__mmap_read_all(struct perf_record *rec)
98402807 467{
0e2e63dd 468 int i;
8d3eca20 469 int rc = 0;
98402807 470
d20deb64 471 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
8d3eca20
DA
472 if (rec->evlist->mmap[i].base) {
473 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
474 rc = -1;
475 goto out;
476 }
477 }
98402807
FW
478 }
479
2eeaaa09 480 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
8d3eca20
DA
481 rc = write_output(rec, &finished_round_event,
482 sizeof(finished_round_event));
483
484out:
485 return rc;
98402807
FW
486}
487
d20deb64 488static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 489{
abaff32a 490 struct stat st;
abaff32a 491 int flags;
781ba9d2 492 int err, output, feat;
8b412664 493 unsigned long waking = 0;
46be604b 494 const bool forks = argc > 0;
23346f21 495 struct machine *machine;
45694aa7 496 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
497 struct perf_record_opts *opts = &rec->opts;
498 struct perf_evlist *evsel_list = rec->evlist;
499 const char *output_name = rec->output_name;
500 struct perf_session *session;
de9ac07b 501
d20deb64 502 rec->progname = argv[0];
33e49ea7 503
d20deb64 504 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 505
d20deb64 506 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
507 signal(SIGCHLD, sig_handler);
508 signal(SIGINT, sig_handler);
18483b81 509 signal(SIGUSR1, sig_handler);
f5970550 510
d7065adb
FBH
511 if (!output_name) {
512 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 513 opts->pipe_output = true;
d7065adb 514 else
d20deb64 515 rec->output_name = output_name = "perf.data";
d7065adb
FBH
516 }
517 if (output_name) {
518 if (!strcmp(output_name, "-"))
d20deb64 519 opts->pipe_output = true;
d7065adb 520 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 521 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
522 char oldname[PATH_MAX];
523 snprintf(oldname, sizeof(oldname), "%s.old",
524 output_name);
525 unlink(oldname);
526 rename(output_name, oldname);
527 }
d20deb64
ACM
528 } else if (rec->write_mode == WRITE_APPEND) {
529 rec->write_mode = WRITE_FORCE;
266e0e21 530 }
97124d5e
PZ
531 }
532
f887f301 533 flags = O_CREAT|O_RDWR;
d20deb64
ACM
534 if (rec->write_mode == WRITE_APPEND)
535 rec->file_new = 0;
abaff32a
IM
536 else
537 flags |= O_TRUNC;
538
d20deb64 539 if (opts->pipe_output)
529870e3
TZ
540 output = STDOUT_FILENO;
541 else
542 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
543 if (output < 0) {
544 perror("failed to create output file");
8d3eca20 545 return -1;
de9ac07b
PZ
546 }
547
d20deb64
ACM
548 rec->output = output;
549
7865e817 550 session = perf_session__new(output_name, O_WRONLY,
d20deb64 551 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 552 if (session == NULL) {
a9a70bbc
ACM
553 pr_err("Not enough memory for reading perf file header\n");
554 return -1;
555 }
556
d20deb64
ACM
557 rec->session = session;
558
781ba9d2
RR
559 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
560 perf_header__set_feat(&session->header, feat);
561
562 if (rec->no_buildid)
563 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
564
565 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 566 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 567
330aa675
SE
568 if (!rec->opts.branch_stack)
569 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
570
d20deb64 571 if (!rec->file_new) {
a91e5431 572 err = perf_session__read_header(session, output);
4dc0a04b 573 if (err < 0)
39d17dac 574 goto out_delete_session;
4dc0a04b
ACM
575 }
576
d4db3f16 577 if (forks) {
d20deb64 578 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
579 if (err < 0) {
580 pr_err("Couldn't run the workload!\n");
581 goto out_delete_session;
856e9660 582 }
856e9660
PZ
583 }
584
8d3eca20
DA
585 if (perf_record__open(rec) != 0) {
586 err = -1;
587 goto out_delete_session;
588 }
de9ac07b 589
712a4b60 590 /*
d20deb64 591 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 592 */
d20deb64 593 on_exit(perf_record__exit, rec);
712a4b60 594
d20deb64 595 if (opts->pipe_output) {
529870e3
TZ
596 err = perf_header__write_pipe(output);
597 if (err < 0)
8d3eca20 598 goto out_delete_session;
d20deb64 599 } else if (rec->file_new) {
a91e5431
ACM
600 err = perf_session__write_header(session, evsel_list,
601 output, false);
d5eed904 602 if (err < 0)
8d3eca20 603 goto out_delete_session;
56b03f3c
ACM
604 }
605
d3665498 606 if (!rec->no_buildid
e20960c0 607 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 608 pr_err("Couldn't generate buildids. "
e20960c0 609 "Use --no-buildid to profile anyway.\n");
8d3eca20
DA
610 err = -1;
611 goto out_delete_session;
e20960c0
RR
612 }
613
d20deb64 614 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 615
743eb868
ACM
616 machine = perf_session__find_host_machine(session);
617 if (!machine) {
618 pr_err("Couldn't find native kernel information.\n");
8d3eca20
DA
619 err = -1;
620 goto out_delete_session;
743eb868
ACM
621 }
622
d20deb64 623 if (opts->pipe_output) {
45694aa7 624 err = perf_event__synthesize_attrs(tool, session,
d20deb64 625 process_synthesized_event);
2c46dbb5
TZ
626 if (err < 0) {
627 pr_err("Couldn't synthesize attrs.\n");
8d3eca20 628 goto out_delete_session;
2c46dbb5 629 }
cd19a035 630
45694aa7 631 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 632 machine);
cd19a035
TZ
633 if (err < 0) {
634 pr_err("Couldn't synthesize event_types.\n");
8d3eca20 635 goto out_delete_session;
cd19a035 636 }
9215545e 637
361c99a6 638 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
639 /*
640 * FIXME err <= 0 here actually means that
641 * there were no tracepoints so its not really
642 * an error, just that we don't need to
643 * synthesize anything. We really have to
644 * return this more properly and also
645 * propagate errors that now are calling die()
646 */
45694aa7 647 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 648 process_synthesized_event);
63e0c771
TZ
649 if (err <= 0) {
650 pr_err("Couldn't record tracing data.\n");
8d3eca20 651 goto out_delete_session;
63e0c771 652 }
d20deb64 653 advance_output(rec, err);
63e0c771 654 }
2c46dbb5
TZ
655 }
656
45694aa7 657 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 658 machine, "_text");
70162138 659 if (err < 0)
45694aa7 660 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 661 machine, "_stext");
c1a3a4b9
ACM
662 if (err < 0)
663 pr_err("Couldn't record kernel reference relocation symbol\n"
664 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
665 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 666
45694aa7 667 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 668 machine);
c1a3a4b9
ACM
669 if (err < 0)
670 pr_err("Couldn't record kernel module information.\n"
671 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
672 "Check /proc/modules permission or run as root.\n");
673
a1645ce1 674 if (perf_guest)
45694aa7 675 perf_session__process_machines(session, tool,
8115d60c 676 perf_event__synthesize_guest_os);
7c6a1c65 677
bea03405 678 if (!opts->target.system_wide)
8d3eca20 679 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 680 process_synthesized_event,
743eb868 681 machine);
234fbbf5 682 else
8d3eca20 683 err = perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 684 machine);
7c6a1c65 685
8d3eca20
DA
686 if (err != 0)
687 goto out_delete_session;
688
d20deb64 689 if (rec->realtime_prio) {
de9ac07b
PZ
690 struct sched_param param;
691
d20deb64 692 param.sched_priority = rec->realtime_prio;
de9ac07b 693 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 694 pr_err("Could not set realtime priority.\n");
8d3eca20
DA
695 err = -1;
696 goto out_delete_session;
de9ac07b
PZ
697 }
698 }
699
764e16a3
DA
700 perf_evlist__enable(evsel_list);
701
856e9660
PZ
702 /*
703 * Let the child rip
704 */
d4db3f16 705 if (forks)
35b9d88e 706 perf_evlist__start_workload(evsel_list);
856e9660 707
649c48a9 708 for (;;) {
d20deb64 709 int hits = rec->samples;
de9ac07b 710
8d3eca20
DA
711 if (perf_record__mmap_read_all(rec) < 0) {
712 err = -1;
713 goto out_delete_session;
714 }
de9ac07b 715
d20deb64 716 if (hits == rec->samples) {
649c48a9
PZ
717 if (done)
718 break;
5c581041 719 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
720 waking++;
721 }
722
4152ab37
ACM
723 if (done)
724 perf_evlist__disable(evsel_list);
de9ac07b
PZ
725 }
726
18483b81 727 if (quiet || signr == SIGUSR1)
b44308f5
ACM
728 return 0;
729
8b412664
PZ
730 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
731
021e9f47
IM
732 /*
733 * Approximate RIP event size: 24 bytes.
734 */
735 fprintf(stderr,
9486aa38 736 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 737 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 738 output_name,
d20deb64 739 rec->bytes_written / 24);
addc2785 740
de9ac07b 741 return 0;
39d17dac
ACM
742
743out_delete_session:
744 perf_session__delete(session);
745 return err;
de9ac07b 746}
0e9b20b8 747
bdfebd84
RAV
748#define BRANCH_OPT(n, m) \
749 { .name = n, .mode = (m) }
750
751#define BRANCH_END { .name = NULL }
752
753struct branch_mode {
754 const char *name;
755 int mode;
756};
757
758static const struct branch_mode branch_modes[] = {
759 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
760 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
761 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
762 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
763 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
764 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
765 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
766 BRANCH_END
767};
768
769static int
a5aabdac 770parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
771{
772#define ONLY_PLM \
773 (PERF_SAMPLE_BRANCH_USER |\
774 PERF_SAMPLE_BRANCH_KERNEL |\
775 PERF_SAMPLE_BRANCH_HV)
776
777 uint64_t *mode = (uint64_t *)opt->value;
778 const struct branch_mode *br;
a5aabdac 779 char *s, *os = NULL, *p;
bdfebd84
RAV
780 int ret = -1;
781
a5aabdac
SE
782 if (unset)
783 return 0;
bdfebd84 784
a5aabdac
SE
785 /*
786 * cannot set it twice, -b + --branch-filter for instance
787 */
788 if (*mode)
bdfebd84
RAV
789 return -1;
790
a5aabdac
SE
791 /* str may be NULL in case no arg is passed to -b */
792 if (str) {
793 /* because str is read-only */
794 s = os = strdup(str);
795 if (!s)
796 return -1;
797
798 for (;;) {
799 p = strchr(s, ',');
800 if (p)
801 *p = '\0';
802
803 for (br = branch_modes; br->name; br++) {
804 if (!strcasecmp(s, br->name))
805 break;
806 }
807 if (!br->name) {
808 ui__warning("unknown branch filter %s,"
809 " check man page\n", s);
810 goto error;
811 }
bdfebd84 812
a5aabdac 813 *mode |= br->mode;
bdfebd84 814
a5aabdac
SE
815 if (!p)
816 break;
bdfebd84 817
a5aabdac
SE
818 s = p + 1;
819 }
bdfebd84
RAV
820 }
821 ret = 0;
822
a5aabdac 823 /* default to any branch */
bdfebd84 824 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 825 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
826 }
827error:
828 free(os);
829 return ret;
830}
831
95485b1c 832#ifdef LIBUNWIND_SUPPORT
26d33022
JO
833static int get_stack_size(char *str, unsigned long *_size)
834{
835 char *endptr;
836 unsigned long size;
837 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
838
839 size = strtoul(str, &endptr, 0);
840
841 do {
842 if (*endptr)
843 break;
844
845 size = round_up(size, sizeof(u64));
846 if (!size || size > max_size)
847 break;
848
849 *_size = size;
850 return 0;
851
852 } while (0);
853
854 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
855 max_size, str);
856 return -1;
857}
95485b1c 858#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
859
860static int
1d037ca1 861parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
26d33022
JO
862 int unset)
863{
864 struct perf_record *rec = (struct perf_record *)opt->value;
865 char *tok, *name, *saveptr = NULL;
866 char *buf;
867 int ret = -1;
868
869 /* --no-call-graph */
870 if (unset)
871 return 0;
872
873 /* We specified default option if none is provided. */
874 BUG_ON(!arg);
875
876 /* We need buffer that we know we can write to. */
877 buf = malloc(strlen(arg) + 1);
878 if (!buf)
879 return -ENOMEM;
880
881 strcpy(buf, arg);
882
883 tok = strtok_r((char *)buf, ",", &saveptr);
884 name = tok ? : (char *)buf;
885
886 do {
887 /* Framepointer style */
888 if (!strncmp(name, "fp", sizeof("fp"))) {
889 if (!strtok_r(NULL, ",", &saveptr)) {
890 rec->opts.call_graph = CALLCHAIN_FP;
891 ret = 0;
892 } else
893 pr_err("callchain: No more arguments "
894 "needed for -g fp\n");
895 break;
896
95485b1c 897#ifdef LIBUNWIND_SUPPORT
26d33022
JO
898 /* Dwarf style */
899 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
61eaa3be
ACM
900 const unsigned long default_stack_dump_size = 8192;
901
26d33022
JO
902 ret = 0;
903 rec->opts.call_graph = CALLCHAIN_DWARF;
904 rec->opts.stack_dump_size = default_stack_dump_size;
905
906 tok = strtok_r(NULL, ",", &saveptr);
907 if (tok) {
908 unsigned long size = 0;
909
910 ret = get_stack_size(tok, &size);
911 rec->opts.stack_dump_size = size;
912 }
913
914 if (!ret)
915 pr_debug("callchain: stack dump size %d\n",
916 rec->opts.stack_dump_size);
95485b1c 917#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
918 } else {
919 pr_err("callchain: Unknown -g option "
920 "value: %s\n", arg);
921 break;
922 }
923
924 } while (0);
925
926 free(buf);
927
928 if (!ret)
929 pr_debug("callchain: type %d\n", rec->opts.call_graph);
930
931 return ret;
932}
933
0e9b20b8 934static const char * const record_usage[] = {
9e096753
MG
935 "perf record [<options>] [<command>]",
936 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
937 NULL
938};
939
d20deb64
ACM
940/*
941 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
942 * because we need to have access to it in perf_record__exit, that is called
943 * after cmd_record() exits, but since record_options need to be accessible to
944 * builtin-script, leave it here.
945 *
946 * At least we don't ouch it in all the other functions here directly.
947 *
948 * Just say no to tons of global variables, sigh.
949 */
950static struct perf_record record = {
951 .opts = {
d20deb64
ACM
952 .mmap_pages = UINT_MAX,
953 .user_freq = UINT_MAX,
954 .user_interval = ULLONG_MAX,
447a6013 955 .freq = 4000,
d1cb9fce
NK
956 .target = {
957 .uses_mmap = true,
958 },
d20deb64
ACM
959 },
960 .write_mode = WRITE_FORCE,
961 .file_new = true,
962};
7865e817 963
61eaa3be
ACM
964#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
965
966#ifdef LIBUNWIND_SUPPORT
967static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
968#else
969static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
970#endif
971
d20deb64
ACM
972/*
973 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
974 * with it and switch to use the library functions in perf_evlist that came
975 * from builtin-record.c, i.e. use perf_record_opts,
976 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
977 * using pipes, etc.
978 */
bca647aa 979const struct option record_options[] = {
d20deb64 980 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 981 "event selector. use 'perf list' to list available events",
f120f9d5 982 parse_events_option),
d20deb64 983 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 984 "event filter", parse_filter),
bea03405 985 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 986 "record events on existing process id"),
bea03405 987 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 988 "record events on existing thread id"),
d20deb64 989 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 990 "collect data with this RT SCHED_FIFO priority"),
d20deb64 991 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 992 "collect data without buffering"),
d20deb64 993 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 994 "collect raw sample records from all opened counters"),
bea03405 995 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 996 "system-wide collection from all CPUs"),
d20deb64 997 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 998 "append to the output file to do incremental profiling"),
bea03405 999 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 1000 "list of cpus to monitor"),
d20deb64 1001 OPT_BOOLEAN('f', "force", &record.force,
7865e817 1002 "overwrite existing data file (deprecated)"),
d20deb64
ACM
1003 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1004 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 1005 "output file name"),
d20deb64 1006 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 1007 "child tasks do not inherit counters"),
d20deb64
ACM
1008 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1009 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 1010 "number of mmap data pages"),
d20deb64 1011 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 1012 "put the counters into a counter group"),
26d33022
JO
1013 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
1014 callchain_help, &parse_callchain_opt,
1015 "fp"),
c0555642 1016 OPT_INCR('v', "verbose", &verbose,
3da297a6 1017 "be more verbose (show counter open errors, etc)"),
b44308f5 1018 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 1019 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 1020 "per thread counts"),
d20deb64 1021 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 1022 "Sample addresses"),
d20deb64 1023 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 1024 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 1025 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 1026 "don't sample"),
d20deb64 1027 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 1028 "do not update the buildid cache"),
d20deb64 1029 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 1030 "do not collect buildids in perf.data"),
d20deb64 1031 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
1032 "monitor event in cgroup name only",
1033 parse_cgroups),
bea03405
NK
1034 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1035 "user to profile"),
a5aabdac
SE
1036
1037 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1038 "branch any", "sample any taken branches",
1039 parse_branch_stack),
1040
1041 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1042 "branch filter mask", "branch stack filter modes",
bdfebd84 1043 parse_branch_stack),
0e9b20b8
IM
1044 OPT_END()
1045};
1046
1d037ca1 1047int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
0e9b20b8 1048{
69aad6f1
ACM
1049 int err = -ENOMEM;
1050 struct perf_evsel *pos;
d20deb64
ACM
1051 struct perf_evlist *evsel_list;
1052 struct perf_record *rec = &record;
16ad2ffb 1053 char errbuf[BUFSIZ];
0e9b20b8 1054
7e2ed097 1055 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
1056 if (evsel_list == NULL)
1057 return -ENOMEM;
1058
d20deb64
ACM
1059 rec->evlist = evsel_list;
1060
bca647aa 1061 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 1062 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 1063 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 1064 usage_with_options(record_usage, record_options);
0e9b20b8 1065
d20deb64 1066 if (rec->force && rec->append_file) {
3780f488
NK
1067 ui__error("Can't overwrite and append at the same time."
1068 " You need to choose between -f and -A");
bca647aa 1069 usage_with_options(record_usage, record_options);
d20deb64
ACM
1070 } else if (rec->append_file) {
1071 rec->write_mode = WRITE_APPEND;
7865e817 1072 } else {
d20deb64 1073 rec->write_mode = WRITE_FORCE;
7865e817
FW
1074 }
1075
bea03405 1076 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
1077 ui__error("cgroup monitoring only available in"
1078 " system-wide mode\n");
023695d9
SE
1079 usage_with_options(record_usage, record_options);
1080 }
1081
655000e7 1082 symbol__init();
baa2f6ce 1083
ec80fde7 1084 if (symbol_conf.kptr_restrict)
646aaea6
ACM
1085 pr_warning(
1086"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1087"check /proc/sys/kernel/kptr_restrict.\n\n"
1088"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1089"file is not found in the buildid cache or in the vmlinux path.\n\n"
1090"Samples in kernel modules won't be resolved at all.\n\n"
1091"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1092"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 1093
d20deb64 1094 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 1095 disable_buildid_cache();
655000e7 1096
361c99a6
ACM
1097 if (evsel_list->nr_entries == 0 &&
1098 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
1099 pr_err("Not enough memory for event selector list\n");
1100 goto out_symbol_exit;
bbd36e5e 1101 }
0e9b20b8 1102
16ad2ffb
NK
1103 err = perf_target__validate(&rec->opts.target);
1104 if (err) {
1105 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1106 ui__warning("%s", errbuf);
1107 }
1108
1109 err = perf_target__parse_uid(&rec->opts.target);
1110 if (err) {
1111 int saved_errno = errno;
4bd0f2d2 1112
16ad2ffb 1113 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1114 ui__error("%s", errbuf);
16ad2ffb
NK
1115
1116 err = -saved_errno;
0d37aa34 1117 goto out_free_fd;
16ad2ffb 1118 }
0d37aa34 1119
16ad2ffb 1120 err = -ENOMEM;
b809ac10 1121 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 1122 usage_with_options(record_usage, record_options);
69aad6f1 1123
361c99a6 1124 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 1125 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 1126 goto out_free_fd;
d6d901c2 1127 }
5c581041 1128
d20deb64
ACM
1129 if (rec->opts.user_interval != ULLONG_MAX)
1130 rec->opts.default_interval = rec->opts.user_interval;
1131 if (rec->opts.user_freq != UINT_MAX)
1132 rec->opts.freq = rec->opts.user_freq;
f9212819 1133
7e4ff9e3
MG
1134 /*
1135 * User specified count overrides default frequency.
1136 */
d20deb64
ACM
1137 if (rec->opts.default_interval)
1138 rec->opts.freq = 0;
1139 else if (rec->opts.freq) {
1140 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 1141 } else {
3780f488 1142 ui__error("frequency and count are zero, aborting\n");
39d17dac 1143 err = -EINVAL;
5c581041 1144 goto out_free_fd;
7e4ff9e3
MG
1145 }
1146
d20deb64 1147 err = __cmd_record(&record, argc, argv);
39d17dac 1148out_free_fd:
7e2ed097 1149 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
1150out_symbol_exit:
1151 symbol__exit();
39d17dac 1152 return err;
0e9b20b8 1153}