perf record: Remove use of die/exit
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
26d33022
JO
34#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
35
36#ifdef NO_LIBUNWIND_SUPPORT
37static char callchain_help[] = CALLCHAIN_HELP "[fp]";
38#else
39static unsigned long default_stack_dump_size = 8192;
40static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
41#endif
42
7865e817
FW
43enum write_mode_t {
44 WRITE_FORCE,
45 WRITE_APPEND
46};
47
d20deb64 48struct perf_record {
45694aa7 49 struct perf_tool tool;
d20deb64
ACM
50 struct perf_record_opts opts;
51 u64 bytes_written;
52 const char *output_name;
53 struct perf_evlist *evlist;
54 struct perf_session *session;
55 const char *progname;
56 int output;
57 unsigned int page_size;
58 int realtime_prio;
59 enum write_mode_t write_mode;
60 bool no_buildid;
61 bool no_buildid_cache;
62 bool force;
63 bool file_new;
64 bool append_file;
65 long samples;
66 off_t post_processing_offset;
0f82ebc4 67};
a21ca2ca 68
d20deb64 69static void advance_output(struct perf_record *rec, size_t size)
9215545e 70{
d20deb64 71 rec->bytes_written += size;
9215545e
TZ
72}
73
8d3eca20 74static int write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
75{
76 while (size) {
d20deb64 77 int ret = write(rec->output, buf, size);
f5970550 78
8d3eca20
DA
79 if (ret < 0) {
80 pr_err("failed to write\n");
81 return -1;
82 }
f5970550
PZ
83
84 size -= ret;
85 buf += ret;
86
d20deb64 87 rec->bytes_written += ret;
f5970550 88 }
8d3eca20
DA
89
90 return 0;
f5970550
PZ
91}
92
45694aa7 93static int process_synthesized_event(struct perf_tool *tool,
d20deb64 94 union perf_event *event,
8d50e5b4 95 struct perf_sample *sample __used,
743eb868 96 struct machine *machine __used)
234fbbf5 97{
45694aa7 98 struct perf_record *rec = container_of(tool, struct perf_record, tool);
8d3eca20
DA
99 if (write_output(rec, event, event->header.size) < 0)
100 return -1;
101
234fbbf5
ACM
102 return 0;
103}
104
8d3eca20 105static int perf_record__mmap_read(struct perf_record *rec,
d20deb64 106 struct perf_mmap *md)
de9ac07b 107{
744bd8aa 108 unsigned int head = perf_mmap__read_head(md);
de9ac07b 109 unsigned int old = md->prev;
d20deb64 110 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
111 unsigned long size;
112 void *buf;
8d3eca20 113 int rc = 0;
de9ac07b 114
dc82009a 115 if (old == head)
8d3eca20 116 return 0;
dc82009a 117
d20deb64 118 rec->samples++;
de9ac07b
PZ
119
120 size = head - old;
121
122 if ((old & md->mask) + size != (head & md->mask)) {
123 buf = &data[old & md->mask];
124 size = md->mask + 1 - (old & md->mask);
125 old += size;
021e9f47 126
8d3eca20
DA
127 if (write_output(rec, buf, size) < 0) {
128 rc = -1;
129 goto out;
130 }
de9ac07b
PZ
131 }
132
133 buf = &data[old & md->mask];
134 size = head - old;
135 old += size;
021e9f47 136
8d3eca20
DA
137 if (write_output(rec, buf, size) < 0) {
138 rc = -1;
139 goto out;
140 }
de9ac07b
PZ
141
142 md->prev = old;
115d2d89 143 perf_mmap__write_tail(md, old);
8d3eca20
DA
144
145out:
146 return rc;
de9ac07b
PZ
147}
148
149static volatile int done = 0;
f7b7c26e 150static volatile int signr = -1;
33e49ea7 151static volatile int child_finished = 0;
de9ac07b 152
16c8a109 153static void sig_handler(int sig)
de9ac07b 154{
33e49ea7
AK
155 if (sig == SIGCHLD)
156 child_finished = 1;
157
16c8a109 158 done = 1;
f7b7c26e
PZ
159 signr = sig;
160}
161
d20deb64 162static void perf_record__sig_exit(int exit_status __used, void *arg)
f7b7c26e 163{
d20deb64 164 struct perf_record *rec = arg;
33e49ea7
AK
165 int status;
166
d20deb64 167 if (rec->evlist->workload.pid > 0) {
33e49ea7 168 if (!child_finished)
d20deb64 169 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
170
171 wait(&status);
172 if (WIFSIGNALED(status))
d20deb64 173 psignal(WTERMSIG(status), rec->progname);
33e49ea7 174 }
933da83a 175
18483b81 176 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
177 return;
178
179 signal(signr, SIG_DFL);
180 kill(getpid(), signr);
de9ac07b
PZ
181}
182
a91e5431
ACM
183static bool perf_evlist__equal(struct perf_evlist *evlist,
184 struct perf_evlist *other)
185{
186 struct perf_evsel *pos, *pair;
187
188 if (evlist->nr_entries != other->nr_entries)
189 return false;
190
0c21f736 191 pair = perf_evlist__first(other);
a91e5431
ACM
192
193 list_for_each_entry(pos, &evlist->entries, node) {
194 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
195 return false;
0c21f736 196 pair = perf_evsel__next(pair);
a91e5431
ACM
197 }
198
199 return true;
200}
201
8d3eca20 202static int perf_record__open(struct perf_record *rec)
dd7927f4 203{
6a4bb04c 204 struct perf_evsel *pos;
d20deb64
ACM
205 struct perf_evlist *evlist = rec->evlist;
206 struct perf_session *session = rec->session;
207 struct perf_record_opts *opts = &rec->opts;
8d3eca20 208 int rc = 0;
dd7927f4 209
d20deb64 210 perf_evlist__config_attrs(evlist, opts);
0f82ebc4 211
6a4bb04c 212 if (opts->group)
63dab225 213 perf_evlist__set_leader(evlist);
6a4bb04c 214
dd7927f4
ACM
215 list_for_each_entry(pos, &evlist->entries, node) {
216 struct perf_event_attr *attr = &pos->attr;
217 /*
218 * Check if parse_single_tracepoint_event has already asked for
219 * PERF_SAMPLE_TIME.
220 *
221 * XXX this is kludgy but short term fix for problems introduced by
222 * eac23d1c that broke 'perf script' by having different sample_types
223 * when using multiple tracepoint events when we use a perf binary
224 * that tries to use sample_id_all on an older kernel.
225 *
226 * We need to move counter creation to perf_session, support
227 * different sample_types, etc.
228 */
229 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
d6d901c2 230
0c978128
ACM
231fallback_missing_features:
232 if (opts->exclude_guest_missing)
233 attr->exclude_guest = attr->exclude_host = 0;
dd7927f4 234retry_sample_id:
808e1226 235 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
dd7927f4 236try_again:
6a4bb04c 237 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
d6d901c2
ZY
238 int err = errno;
239
c286c419 240 if (err == EPERM || err == EACCES) {
b8631e6e 241 ui__error_paranoid();
8d3eca20
DA
242 rc = -err;
243 goto out;
bea03405 244 } else if (err == ENODEV && opts->target.cpu_list) {
8d3eca20
DA
245 pr_err("No such device - did you specify"
246 " an out-of-range profile CPU?\n");
247 rc = -err;
248 goto out;
0c978128
ACM
249 } else if (err == EINVAL) {
250 if (!opts->exclude_guest_missing &&
251 (attr->exclude_guest || attr->exclude_host)) {
252 pr_debug("Old kernel, cannot exclude "
253 "guest or host samples.\n");
254 opts->exclude_guest_missing = true;
255 goto fallback_missing_features;
808e1226 256 } else if (!opts->sample_id_all_missing) {
0c978128
ACM
257 /*
258 * Old kernel, no attr->sample_id_type_all field
259 */
808e1226 260 opts->sample_id_all_missing = true;
0c978128
ACM
261 if (!opts->sample_time && !opts->raw_samples && !time_needed)
262 attr->sample_type &= ~PERF_SAMPLE_TIME;
263
264 goto retry_sample_id;
265 }
d6d901c2 266 }
3da297a6 267
d6d901c2
ZY
268 /*
269 * If it's cycles then fall back to hrtimer
270 * based cpu-clock-tick sw counter, which
028d455b
DA
271 * is always available even if no PMU support.
272 *
273 * PPC returns ENXIO until 2.6.37 (behavior changed
274 * with commit b0a873e).
d6d901c2 275 */
028d455b
DA
276 if ((err == ENOENT || err == ENXIO)
277 && attr->type == PERF_TYPE_HARDWARE
d6d901c2
ZY
278 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
279
280 if (verbose)
ca6a4258
DA
281 ui__warning("The cycles event is not supported, "
282 "trying to fall back to cpu-clock-ticks\n");
d6d901c2
ZY
283 attr->type = PERF_TYPE_SOFTWARE;
284 attr->config = PERF_COUNT_SW_CPU_CLOCK;
d1cae34d
DA
285 if (pos->name) {
286 free(pos->name);
287 pos->name = NULL;
288 }
d6d901c2
ZY
289 goto try_again;
290 }
ca6a4258
DA
291
292 if (err == ENOENT) {
3780f488 293 ui__error("The %s event is not supported.\n",
7289f83c 294 perf_evsel__name(pos));
8d3eca20
DA
295 rc = -err;
296 goto out;
ca6a4258
DA
297 }
298
d6d901c2 299 printf("\n");
d9cf837e 300 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
dd7927f4 301 err, strerror(err));
bfd45118
SK
302
303#if defined(__i386__) || defined(__x86_64__)
8d3eca20
DA
304 if (attr->type == PERF_TYPE_HARDWARE &&
305 err == EOPNOTSUPP) {
306 pr_err("No hardware sampling interrupt available."
307 " No APIC? If so then you can boot the kernel"
308 " with the \"lapic\" boot parameter to"
309 " force-enable it.\n");
310 rc = -err;
311 goto out;
312 }
bfd45118
SK
313#endif
314
8d3eca20
DA
315 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
316 rc = -err;
317 goto out;
c171b552
LZ
318 }
319 }
a43d3f08 320
0a102479
FW
321 if (perf_evlist__set_filters(evlist)) {
322 error("failed to set filter with %d (%s)\n", errno,
323 strerror(errno));
8d3eca20
DA
324 rc = -1;
325 goto out;
0a102479
FW
326 }
327
18e60939 328 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
8d3eca20
DA
329 if (errno == EPERM) {
330 pr_err("Permission error mapping pages.\n"
331 "Consider increasing "
332 "/proc/sys/kernel/perf_event_mlock_kb,\n"
333 "or try again with a smaller value of -m/--mmap_pages.\n"
334 "(current value: %d)\n", opts->mmap_pages);
335 rc = -errno;
336 } else if (!is_power_of_2(opts->mmap_pages)) {
337 pr_err("--mmap_pages/-m value must be a power of two.");
338 rc = -EINVAL;
339 } else {
340 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
341 rc = -errno;
342 }
343 goto out;
18e60939 344 }
0a27d7f9 345
d20deb64 346 if (rec->file_new)
a91e5431
ACM
347 session->evlist = evlist;
348 else {
349 if (!perf_evlist__equal(session->evlist, evlist)) {
350 fprintf(stderr, "incompatible append\n");
8d3eca20
DA
351 rc = -1;
352 goto out;
a91e5431
ACM
353 }
354 }
355
7b56cce2 356 perf_session__set_id_hdr_size(session);
8d3eca20
DA
357out:
358 return rc;
16c8a109
PZ
359}
360
d20deb64 361static int process_buildids(struct perf_record *rec)
6122e4e4 362{
d20deb64 363 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 364
9f591fd7
ACM
365 if (size == 0)
366 return 0;
367
d20deb64
ACM
368 rec->session->fd = rec->output;
369 return __perf_session__process_events(rec->session, rec->post_processing_offset,
370 size - rec->post_processing_offset,
6122e4e4
ACM
371 size, &build_id__mark_dso_hit_ops);
372}
373
8d3eca20 374static void perf_record__exit(int status, void *arg)
f5970550 375{
d20deb64
ACM
376 struct perf_record *rec = arg;
377
8d3eca20
DA
378 if (status != 0)
379 return;
380
d20deb64
ACM
381 if (!rec->opts.pipe_output) {
382 rec->session->header.data_size += rec->bytes_written;
383
384 if (!rec->no_buildid)
385 process_buildids(rec);
386 perf_session__write_header(rec->session, rec->evlist,
387 rec->output, true);
388 perf_session__delete(rec->session);
389 perf_evlist__delete(rec->evlist);
d65a458b 390 symbol__exit();
c7929e47 391 }
f5970550
PZ
392}
393
8115d60c 394static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
395{
396 int err;
45694aa7 397 struct perf_tool *tool = data;
a1645ce1 398
23346f21 399 if (machine__is_host(machine))
a1645ce1
ZY
400 return;
401
402 /*
403 *As for guest kernel when processing subcommand record&report,
404 *we arrange module mmap prior to guest kernel mmap and trigger
405 *a preload dso because default guest module symbols are loaded
406 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
407 *method is used to avoid symbol missing when the first addr is
408 *in module instead of in guest kernel.
409 */
45694aa7 410 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 411 machine);
a1645ce1
ZY
412 if (err < 0)
413 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 414 " relocation symbol.\n", machine->pid);
a1645ce1 415
a1645ce1
ZY
416 /*
417 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
418 * have no _text sometimes.
419 */
45694aa7 420 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 421 machine, "_text");
a1645ce1 422 if (err < 0)
45694aa7 423 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 424 machine, "_stext");
a1645ce1
ZY
425 if (err < 0)
426 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 427 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
428}
429
98402807
FW
430static struct perf_event_header finished_round_event = {
431 .size = sizeof(struct perf_event_header),
432 .type = PERF_RECORD_FINISHED_ROUND,
433};
434
8d3eca20 435static int perf_record__mmap_read_all(struct perf_record *rec)
98402807 436{
0e2e63dd 437 int i;
8d3eca20 438 int rc = 0;
98402807 439
d20deb64 440 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
8d3eca20
DA
441 if (rec->evlist->mmap[i].base) {
442 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
443 rc = -1;
444 goto out;
445 }
446 }
98402807
FW
447 }
448
2eeaaa09 449 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
8d3eca20
DA
450 rc = write_output(rec, &finished_round_event,
451 sizeof(finished_round_event));
452
453out:
454 return rc;
98402807
FW
455}
456
d20deb64 457static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 458{
abaff32a 459 struct stat st;
abaff32a 460 int flags;
781ba9d2 461 int err, output, feat;
8b412664 462 unsigned long waking = 0;
46be604b 463 const bool forks = argc > 0;
23346f21 464 struct machine *machine;
45694aa7 465 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
466 struct perf_record_opts *opts = &rec->opts;
467 struct perf_evlist *evsel_list = rec->evlist;
468 const char *output_name = rec->output_name;
469 struct perf_session *session;
de9ac07b 470
d20deb64 471 rec->progname = argv[0];
33e49ea7 472
d20deb64 473 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 474
d20deb64 475 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
476 signal(SIGCHLD, sig_handler);
477 signal(SIGINT, sig_handler);
18483b81 478 signal(SIGUSR1, sig_handler);
f5970550 479
d7065adb
FBH
480 if (!output_name) {
481 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 482 opts->pipe_output = true;
d7065adb 483 else
d20deb64 484 rec->output_name = output_name = "perf.data";
d7065adb
FBH
485 }
486 if (output_name) {
487 if (!strcmp(output_name, "-"))
d20deb64 488 opts->pipe_output = true;
d7065adb 489 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 490 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
491 char oldname[PATH_MAX];
492 snprintf(oldname, sizeof(oldname), "%s.old",
493 output_name);
494 unlink(oldname);
495 rename(output_name, oldname);
496 }
d20deb64
ACM
497 } else if (rec->write_mode == WRITE_APPEND) {
498 rec->write_mode = WRITE_FORCE;
266e0e21 499 }
97124d5e
PZ
500 }
501
f887f301 502 flags = O_CREAT|O_RDWR;
d20deb64
ACM
503 if (rec->write_mode == WRITE_APPEND)
504 rec->file_new = 0;
abaff32a
IM
505 else
506 flags |= O_TRUNC;
507
d20deb64 508 if (opts->pipe_output)
529870e3
TZ
509 output = STDOUT_FILENO;
510 else
511 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
512 if (output < 0) {
513 perror("failed to create output file");
8d3eca20 514 return -1;
de9ac07b
PZ
515 }
516
d20deb64
ACM
517 rec->output = output;
518
7865e817 519 session = perf_session__new(output_name, O_WRONLY,
d20deb64 520 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 521 if (session == NULL) {
a9a70bbc
ACM
522 pr_err("Not enough memory for reading perf file header\n");
523 return -1;
524 }
525
d20deb64
ACM
526 rec->session = session;
527
781ba9d2
RR
528 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
529 perf_header__set_feat(&session->header, feat);
530
531 if (rec->no_buildid)
532 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
533
534 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 535 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 536
330aa675
SE
537 if (!rec->opts.branch_stack)
538 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
539
d20deb64 540 if (!rec->file_new) {
a91e5431 541 err = perf_session__read_header(session, output);
4dc0a04b 542 if (err < 0)
39d17dac 543 goto out_delete_session;
4dc0a04b
ACM
544 }
545
d4db3f16 546 if (forks) {
d20deb64 547 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
548 if (err < 0) {
549 pr_err("Couldn't run the workload!\n");
550 goto out_delete_session;
856e9660 551 }
856e9660
PZ
552 }
553
8d3eca20
DA
554 if (perf_record__open(rec) != 0) {
555 err = -1;
556 goto out_delete_session;
557 }
de9ac07b 558
712a4b60 559 /*
d20deb64 560 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 561 */
d20deb64 562 on_exit(perf_record__exit, rec);
712a4b60 563
d20deb64 564 if (opts->pipe_output) {
529870e3
TZ
565 err = perf_header__write_pipe(output);
566 if (err < 0)
8d3eca20 567 goto out_delete_session;
d20deb64 568 } else if (rec->file_new) {
a91e5431
ACM
569 err = perf_session__write_header(session, evsel_list,
570 output, false);
d5eed904 571 if (err < 0)
8d3eca20 572 goto out_delete_session;
56b03f3c
ACM
573 }
574
d3665498 575 if (!rec->no_buildid
e20960c0 576 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 577 pr_err("Couldn't generate buildids. "
e20960c0 578 "Use --no-buildid to profile anyway.\n");
8d3eca20
DA
579 err = -1;
580 goto out_delete_session;
e20960c0
RR
581 }
582
d20deb64 583 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 584
743eb868
ACM
585 machine = perf_session__find_host_machine(session);
586 if (!machine) {
587 pr_err("Couldn't find native kernel information.\n");
8d3eca20
DA
588 err = -1;
589 goto out_delete_session;
743eb868
ACM
590 }
591
d20deb64 592 if (opts->pipe_output) {
45694aa7 593 err = perf_event__synthesize_attrs(tool, session,
d20deb64 594 process_synthesized_event);
2c46dbb5
TZ
595 if (err < 0) {
596 pr_err("Couldn't synthesize attrs.\n");
8d3eca20 597 goto out_delete_session;
2c46dbb5 598 }
cd19a035 599
45694aa7 600 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 601 machine);
cd19a035
TZ
602 if (err < 0) {
603 pr_err("Couldn't synthesize event_types.\n");
8d3eca20 604 goto out_delete_session;
cd19a035 605 }
9215545e 606
361c99a6 607 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
608 /*
609 * FIXME err <= 0 here actually means that
610 * there were no tracepoints so its not really
611 * an error, just that we don't need to
612 * synthesize anything. We really have to
613 * return this more properly and also
614 * propagate errors that now are calling die()
615 */
45694aa7 616 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 617 process_synthesized_event);
63e0c771
TZ
618 if (err <= 0) {
619 pr_err("Couldn't record tracing data.\n");
8d3eca20 620 goto out_delete_session;
63e0c771 621 }
d20deb64 622 advance_output(rec, err);
63e0c771 623 }
2c46dbb5
TZ
624 }
625
45694aa7 626 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 627 machine, "_text");
70162138 628 if (err < 0)
45694aa7 629 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 630 machine, "_stext");
c1a3a4b9
ACM
631 if (err < 0)
632 pr_err("Couldn't record kernel reference relocation symbol\n"
633 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
634 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 635
45694aa7 636 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 637 machine);
c1a3a4b9
ACM
638 if (err < 0)
639 pr_err("Couldn't record kernel module information.\n"
640 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
641 "Check /proc/modules permission or run as root.\n");
642
a1645ce1 643 if (perf_guest)
45694aa7 644 perf_session__process_machines(session, tool,
8115d60c 645 perf_event__synthesize_guest_os);
7c6a1c65 646
bea03405 647 if (!opts->target.system_wide)
8d3eca20 648 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 649 process_synthesized_event,
743eb868 650 machine);
234fbbf5 651 else
8d3eca20 652 err = perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 653 machine);
7c6a1c65 654
8d3eca20
DA
655 if (err != 0)
656 goto out_delete_session;
657
d20deb64 658 if (rec->realtime_prio) {
de9ac07b
PZ
659 struct sched_param param;
660
d20deb64 661 param.sched_priority = rec->realtime_prio;
de9ac07b 662 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 663 pr_err("Could not set realtime priority.\n");
8d3eca20
DA
664 err = -1;
665 goto out_delete_session;
de9ac07b
PZ
666 }
667 }
668
764e16a3
DA
669 perf_evlist__enable(evsel_list);
670
856e9660
PZ
671 /*
672 * Let the child rip
673 */
d4db3f16 674 if (forks)
35b9d88e 675 perf_evlist__start_workload(evsel_list);
856e9660 676
649c48a9 677 for (;;) {
d20deb64 678 int hits = rec->samples;
de9ac07b 679
8d3eca20
DA
680 if (perf_record__mmap_read_all(rec) < 0) {
681 err = -1;
682 goto out_delete_session;
683 }
de9ac07b 684
d20deb64 685 if (hits == rec->samples) {
649c48a9
PZ
686 if (done)
687 break;
5c581041 688 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
689 waking++;
690 }
691
4152ab37
ACM
692 if (done)
693 perf_evlist__disable(evsel_list);
de9ac07b
PZ
694 }
695
18483b81 696 if (quiet || signr == SIGUSR1)
b44308f5
ACM
697 return 0;
698
8b412664
PZ
699 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
700
021e9f47
IM
701 /*
702 * Approximate RIP event size: 24 bytes.
703 */
704 fprintf(stderr,
9486aa38 705 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 706 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 707 output_name,
d20deb64 708 rec->bytes_written / 24);
addc2785 709
de9ac07b 710 return 0;
39d17dac
ACM
711
712out_delete_session:
713 perf_session__delete(session);
714 return err;
de9ac07b 715}
0e9b20b8 716
bdfebd84
RAV
717#define BRANCH_OPT(n, m) \
718 { .name = n, .mode = (m) }
719
720#define BRANCH_END { .name = NULL }
721
722struct branch_mode {
723 const char *name;
724 int mode;
725};
726
727static const struct branch_mode branch_modes[] = {
728 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
729 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
730 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
731 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
732 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
733 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
734 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
735 BRANCH_END
736};
737
738static int
a5aabdac 739parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
740{
741#define ONLY_PLM \
742 (PERF_SAMPLE_BRANCH_USER |\
743 PERF_SAMPLE_BRANCH_KERNEL |\
744 PERF_SAMPLE_BRANCH_HV)
745
746 uint64_t *mode = (uint64_t *)opt->value;
747 const struct branch_mode *br;
a5aabdac 748 char *s, *os = NULL, *p;
bdfebd84
RAV
749 int ret = -1;
750
a5aabdac
SE
751 if (unset)
752 return 0;
bdfebd84 753
a5aabdac
SE
754 /*
755 * cannot set it twice, -b + --branch-filter for instance
756 */
757 if (*mode)
bdfebd84
RAV
758 return -1;
759
a5aabdac
SE
760 /* str may be NULL in case no arg is passed to -b */
761 if (str) {
762 /* because str is read-only */
763 s = os = strdup(str);
764 if (!s)
765 return -1;
766
767 for (;;) {
768 p = strchr(s, ',');
769 if (p)
770 *p = '\0';
771
772 for (br = branch_modes; br->name; br++) {
773 if (!strcasecmp(s, br->name))
774 break;
775 }
776 if (!br->name) {
777 ui__warning("unknown branch filter %s,"
778 " check man page\n", s);
779 goto error;
780 }
bdfebd84 781
a5aabdac 782 *mode |= br->mode;
bdfebd84 783
a5aabdac
SE
784 if (!p)
785 break;
bdfebd84 786
a5aabdac
SE
787 s = p + 1;
788 }
bdfebd84
RAV
789 }
790 ret = 0;
791
a5aabdac 792 /* default to any branch */
bdfebd84 793 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 794 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
795 }
796error:
797 free(os);
798 return ret;
799}
800
26d33022
JO
801#ifndef NO_LIBUNWIND_SUPPORT
802static int get_stack_size(char *str, unsigned long *_size)
803{
804 char *endptr;
805 unsigned long size;
806 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
807
808 size = strtoul(str, &endptr, 0);
809
810 do {
811 if (*endptr)
812 break;
813
814 size = round_up(size, sizeof(u64));
815 if (!size || size > max_size)
816 break;
817
818 *_size = size;
819 return 0;
820
821 } while (0);
822
823 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
824 max_size, str);
825 return -1;
826}
827#endif /* !NO_LIBUNWIND_SUPPORT */
828
829static int
830parse_callchain_opt(const struct option *opt __used, const char *arg,
831 int unset)
832{
833 struct perf_record *rec = (struct perf_record *)opt->value;
834 char *tok, *name, *saveptr = NULL;
835 char *buf;
836 int ret = -1;
837
838 /* --no-call-graph */
839 if (unset)
840 return 0;
841
842 /* We specified default option if none is provided. */
843 BUG_ON(!arg);
844
845 /* We need buffer that we know we can write to. */
846 buf = malloc(strlen(arg) + 1);
847 if (!buf)
848 return -ENOMEM;
849
850 strcpy(buf, arg);
851
852 tok = strtok_r((char *)buf, ",", &saveptr);
853 name = tok ? : (char *)buf;
854
855 do {
856 /* Framepointer style */
857 if (!strncmp(name, "fp", sizeof("fp"))) {
858 if (!strtok_r(NULL, ",", &saveptr)) {
859 rec->opts.call_graph = CALLCHAIN_FP;
860 ret = 0;
861 } else
862 pr_err("callchain: No more arguments "
863 "needed for -g fp\n");
864 break;
865
866#ifndef NO_LIBUNWIND_SUPPORT
867 /* Dwarf style */
868 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
869 ret = 0;
870 rec->opts.call_graph = CALLCHAIN_DWARF;
871 rec->opts.stack_dump_size = default_stack_dump_size;
872
873 tok = strtok_r(NULL, ",", &saveptr);
874 if (tok) {
875 unsigned long size = 0;
876
877 ret = get_stack_size(tok, &size);
878 rec->opts.stack_dump_size = size;
879 }
880
881 if (!ret)
882 pr_debug("callchain: stack dump size %d\n",
883 rec->opts.stack_dump_size);
884#endif /* !NO_LIBUNWIND_SUPPORT */
885 } else {
886 pr_err("callchain: Unknown -g option "
887 "value: %s\n", arg);
888 break;
889 }
890
891 } while (0);
892
893 free(buf);
894
895 if (!ret)
896 pr_debug("callchain: type %d\n", rec->opts.call_graph);
897
898 return ret;
899}
900
0e9b20b8 901static const char * const record_usage[] = {
9e096753
MG
902 "perf record [<options>] [<command>]",
903 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
904 NULL
905};
906
d20deb64
ACM
907/*
908 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
909 * because we need to have access to it in perf_record__exit, that is called
910 * after cmd_record() exits, but since record_options need to be accessible to
911 * builtin-script, leave it here.
912 *
913 * At least we don't ouch it in all the other functions here directly.
914 *
915 * Just say no to tons of global variables, sigh.
916 */
917static struct perf_record record = {
918 .opts = {
d20deb64
ACM
919 .mmap_pages = UINT_MAX,
920 .user_freq = UINT_MAX,
921 .user_interval = ULLONG_MAX,
447a6013 922 .freq = 4000,
d1cb9fce
NK
923 .target = {
924 .uses_mmap = true,
925 },
d20deb64
ACM
926 },
927 .write_mode = WRITE_FORCE,
928 .file_new = true,
929};
7865e817 930
d20deb64
ACM
931/*
932 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
933 * with it and switch to use the library functions in perf_evlist that came
934 * from builtin-record.c, i.e. use perf_record_opts,
935 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
936 * using pipes, etc.
937 */
bca647aa 938const struct option record_options[] = {
d20deb64 939 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 940 "event selector. use 'perf list' to list available events",
f120f9d5 941 parse_events_option),
d20deb64 942 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 943 "event filter", parse_filter),
bea03405 944 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 945 "record events on existing process id"),
bea03405 946 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 947 "record events on existing thread id"),
d20deb64 948 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 949 "collect data with this RT SCHED_FIFO priority"),
d20deb64 950 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 951 "collect data without buffering"),
d20deb64 952 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 953 "collect raw sample records from all opened counters"),
bea03405 954 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 955 "system-wide collection from all CPUs"),
d20deb64 956 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 957 "append to the output file to do incremental profiling"),
bea03405 958 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 959 "list of cpus to monitor"),
d20deb64 960 OPT_BOOLEAN('f', "force", &record.force,
7865e817 961 "overwrite existing data file (deprecated)"),
d20deb64
ACM
962 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
963 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 964 "output file name"),
d20deb64 965 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 966 "child tasks do not inherit counters"),
d20deb64
ACM
967 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
968 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 969 "number of mmap data pages"),
d20deb64 970 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 971 "put the counters into a counter group"),
26d33022
JO
972 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
973 callchain_help, &parse_callchain_opt,
974 "fp"),
c0555642 975 OPT_INCR('v', "verbose", &verbose,
3da297a6 976 "be more verbose (show counter open errors, etc)"),
b44308f5 977 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 978 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 979 "per thread counts"),
d20deb64 980 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 981 "Sample addresses"),
d20deb64 982 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 983 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 984 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 985 "don't sample"),
d20deb64 986 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 987 "do not update the buildid cache"),
d20deb64 988 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 989 "do not collect buildids in perf.data"),
d20deb64 990 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
991 "monitor event in cgroup name only",
992 parse_cgroups),
bea03405
NK
993 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
994 "user to profile"),
a5aabdac
SE
995
996 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
997 "branch any", "sample any taken branches",
998 parse_branch_stack),
999
1000 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1001 "branch filter mask", "branch stack filter modes",
bdfebd84 1002 parse_branch_stack),
0e9b20b8
IM
1003 OPT_END()
1004};
1005
f37a291c 1006int cmd_record(int argc, const char **argv, const char *prefix __used)
0e9b20b8 1007{
69aad6f1
ACM
1008 int err = -ENOMEM;
1009 struct perf_evsel *pos;
d20deb64
ACM
1010 struct perf_evlist *evsel_list;
1011 struct perf_record *rec = &record;
16ad2ffb 1012 char errbuf[BUFSIZ];
0e9b20b8 1013
7e2ed097 1014 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
1015 if (evsel_list == NULL)
1016 return -ENOMEM;
1017
d20deb64
ACM
1018 rec->evlist = evsel_list;
1019
bca647aa 1020 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 1021 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 1022 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 1023 usage_with_options(record_usage, record_options);
0e9b20b8 1024
d20deb64 1025 if (rec->force && rec->append_file) {
3780f488
NK
1026 ui__error("Can't overwrite and append at the same time."
1027 " You need to choose between -f and -A");
bca647aa 1028 usage_with_options(record_usage, record_options);
d20deb64
ACM
1029 } else if (rec->append_file) {
1030 rec->write_mode = WRITE_APPEND;
7865e817 1031 } else {
d20deb64 1032 rec->write_mode = WRITE_FORCE;
7865e817
FW
1033 }
1034
bea03405 1035 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
1036 ui__error("cgroup monitoring only available in"
1037 " system-wide mode\n");
023695d9
SE
1038 usage_with_options(record_usage, record_options);
1039 }
1040
655000e7 1041 symbol__init();
baa2f6ce 1042
ec80fde7 1043 if (symbol_conf.kptr_restrict)
646aaea6
ACM
1044 pr_warning(
1045"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1046"check /proc/sys/kernel/kptr_restrict.\n\n"
1047"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1048"file is not found in the buildid cache or in the vmlinux path.\n\n"
1049"Samples in kernel modules won't be resolved at all.\n\n"
1050"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1051"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 1052
d20deb64 1053 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 1054 disable_buildid_cache();
655000e7 1055
361c99a6
ACM
1056 if (evsel_list->nr_entries == 0 &&
1057 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
1058 pr_err("Not enough memory for event selector list\n");
1059 goto out_symbol_exit;
bbd36e5e 1060 }
0e9b20b8 1061
16ad2ffb
NK
1062 err = perf_target__validate(&rec->opts.target);
1063 if (err) {
1064 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1065 ui__warning("%s", errbuf);
1066 }
1067
1068 err = perf_target__parse_uid(&rec->opts.target);
1069 if (err) {
1070 int saved_errno = errno;
4bd0f2d2 1071
16ad2ffb 1072 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1073 ui__error("%s", errbuf);
16ad2ffb
NK
1074
1075 err = -saved_errno;
0d37aa34 1076 goto out_free_fd;
16ad2ffb 1077 }
0d37aa34 1078
16ad2ffb 1079 err = -ENOMEM;
b809ac10 1080 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 1081 usage_with_options(record_usage, record_options);
69aad6f1 1082
361c99a6 1083 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 1084 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 1085 goto out_free_fd;
d6d901c2 1086 }
5c581041 1087
d20deb64
ACM
1088 if (rec->opts.user_interval != ULLONG_MAX)
1089 rec->opts.default_interval = rec->opts.user_interval;
1090 if (rec->opts.user_freq != UINT_MAX)
1091 rec->opts.freq = rec->opts.user_freq;
f9212819 1092
7e4ff9e3
MG
1093 /*
1094 * User specified count overrides default frequency.
1095 */
d20deb64
ACM
1096 if (rec->opts.default_interval)
1097 rec->opts.freq = 0;
1098 else if (rec->opts.freq) {
1099 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 1100 } else {
3780f488 1101 ui__error("frequency and count are zero, aborting\n");
39d17dac 1102 err = -EINVAL;
5c581041 1103 goto out_free_fd;
7e4ff9e3
MG
1104 }
1105
d20deb64 1106 err = __cmd_record(&record, argc, argv);
39d17dac 1107out_free_fd:
7e2ed097 1108 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
1109out_symbol_exit:
1110 symbol__exit();
39d17dac 1111 return err;
0e9b20b8 1112}