perf session: Use perf_evlist__sample_id_all more extensively
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
7865e817
FW
34enum write_mode_t {
35 WRITE_FORCE,
36 WRITE_APPEND
37};
38
d20deb64 39struct perf_record {
45694aa7 40 struct perf_tool tool;
d20deb64
ACM
41 struct perf_record_opts opts;
42 u64 bytes_written;
43 const char *output_name;
44 struct perf_evlist *evlist;
45 struct perf_session *session;
46 const char *progname;
47 int output;
48 unsigned int page_size;
49 int realtime_prio;
50 enum write_mode_t write_mode;
51 bool no_buildid;
52 bool no_buildid_cache;
53 bool force;
54 bool file_new;
55 bool append_file;
56 long samples;
57 off_t post_processing_offset;
0f82ebc4 58};
a21ca2ca 59
d20deb64 60static void advance_output(struct perf_record *rec, size_t size)
9215545e 61{
d20deb64 62 rec->bytes_written += size;
9215545e
TZ
63}
64
d20deb64 65static void write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
66{
67 while (size) {
d20deb64 68 int ret = write(rec->output, buf, size);
f5970550
PZ
69
70 if (ret < 0)
71 die("failed to write");
72
73 size -= ret;
74 buf += ret;
75
d20deb64 76 rec->bytes_written += ret;
f5970550
PZ
77 }
78}
79
45694aa7 80static int process_synthesized_event(struct perf_tool *tool,
d20deb64 81 union perf_event *event,
8d50e5b4 82 struct perf_sample *sample __used,
743eb868 83 struct machine *machine __used)
234fbbf5 84{
45694aa7 85 struct perf_record *rec = container_of(tool, struct perf_record, tool);
d20deb64 86 write_output(rec, event, event->header.size);
234fbbf5
ACM
87 return 0;
88}
89
d20deb64
ACM
90static void perf_record__mmap_read(struct perf_record *rec,
91 struct perf_mmap *md)
de9ac07b 92{
744bd8aa 93 unsigned int head = perf_mmap__read_head(md);
de9ac07b 94 unsigned int old = md->prev;
d20deb64 95 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
96 unsigned long size;
97 void *buf;
de9ac07b 98
dc82009a
ACM
99 if (old == head)
100 return;
101
d20deb64 102 rec->samples++;
de9ac07b
PZ
103
104 size = head - old;
105
106 if ((old & md->mask) + size != (head & md->mask)) {
107 buf = &data[old & md->mask];
108 size = md->mask + 1 - (old & md->mask);
109 old += size;
021e9f47 110
d20deb64 111 write_output(rec, buf, size);
de9ac07b
PZ
112 }
113
114 buf = &data[old & md->mask];
115 size = head - old;
116 old += size;
021e9f47 117
d20deb64 118 write_output(rec, buf, size);
de9ac07b
PZ
119
120 md->prev = old;
115d2d89 121 perf_mmap__write_tail(md, old);
de9ac07b
PZ
122}
123
124static volatile int done = 0;
f7b7c26e 125static volatile int signr = -1;
33e49ea7 126static volatile int child_finished = 0;
de9ac07b 127
16c8a109 128static void sig_handler(int sig)
de9ac07b 129{
33e49ea7
AK
130 if (sig == SIGCHLD)
131 child_finished = 1;
132
16c8a109 133 done = 1;
f7b7c26e
PZ
134 signr = sig;
135}
136
d20deb64 137static void perf_record__sig_exit(int exit_status __used, void *arg)
f7b7c26e 138{
d20deb64 139 struct perf_record *rec = arg;
33e49ea7
AK
140 int status;
141
d20deb64 142 if (rec->evlist->workload.pid > 0) {
33e49ea7 143 if (!child_finished)
d20deb64 144 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
145
146 wait(&status);
147 if (WIFSIGNALED(status))
d20deb64 148 psignal(WTERMSIG(status), rec->progname);
33e49ea7 149 }
933da83a 150
18483b81 151 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
152 return;
153
154 signal(signr, SIG_DFL);
155 kill(getpid(), signr);
de9ac07b
PZ
156}
157
a91e5431
ACM
158static bool perf_evlist__equal(struct perf_evlist *evlist,
159 struct perf_evlist *other)
160{
161 struct perf_evsel *pos, *pair;
162
163 if (evlist->nr_entries != other->nr_entries)
164 return false;
165
166 pair = list_entry(other->entries.next, struct perf_evsel, node);
167
168 list_for_each_entry(pos, &evlist->entries, node) {
169 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170 return false;
171 pair = list_entry(pair->node.next, struct perf_evsel, node);
172 }
173
174 return true;
175}
176
d20deb64 177static void perf_record__open(struct perf_record *rec)
dd7927f4 178{
727ab04e 179 struct perf_evsel *pos, *first;
d20deb64
ACM
180 struct perf_evlist *evlist = rec->evlist;
181 struct perf_session *session = rec->session;
182 struct perf_record_opts *opts = &rec->opts;
dd7927f4 183
727ab04e
ACM
184 first = list_entry(evlist->entries.next, struct perf_evsel, node);
185
d20deb64 186 perf_evlist__config_attrs(evlist, opts);
0f82ebc4 187
dd7927f4
ACM
188 list_for_each_entry(pos, &evlist->entries, node) {
189 struct perf_event_attr *attr = &pos->attr;
727ab04e 190 struct xyarray *group_fd = NULL;
dd7927f4
ACM
191 /*
192 * Check if parse_single_tracepoint_event has already asked for
193 * PERF_SAMPLE_TIME.
194 *
195 * XXX this is kludgy but short term fix for problems introduced by
196 * eac23d1c that broke 'perf script' by having different sample_types
197 * when using multiple tracepoint events when we use a perf binary
198 * that tries to use sample_id_all on an older kernel.
199 *
200 * We need to move counter creation to perf_session, support
201 * different sample_types, etc.
202 */
203 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
d6d901c2 204
d20deb64 205 if (opts->group && pos != first)
727ab04e 206 group_fd = first->fd;
0c978128
ACM
207fallback_missing_features:
208 if (opts->exclude_guest_missing)
209 attr->exclude_guest = attr->exclude_host = 0;
dd7927f4 210retry_sample_id:
808e1226 211 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
dd7927f4 212try_again:
ed80f581 213 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
d20deb64 214 opts->group, group_fd) < 0) {
d6d901c2
ZY
215 int err = errno;
216
c286c419 217 if (err == EPERM || err == EACCES) {
b8631e6e 218 ui__error_paranoid();
c286c419 219 exit(EXIT_FAILURE);
bea03405 220 } else if (err == ENODEV && opts->target.cpu_list) {
d6d901c2
ZY
221 die("No such device - did you specify"
222 " an out-of-range profile CPU?\n");
0c978128
ACM
223 } else if (err == EINVAL) {
224 if (!opts->exclude_guest_missing &&
225 (attr->exclude_guest || attr->exclude_host)) {
226 pr_debug("Old kernel, cannot exclude "
227 "guest or host samples.\n");
228 opts->exclude_guest_missing = true;
229 goto fallback_missing_features;
808e1226 230 } else if (!opts->sample_id_all_missing) {
0c978128
ACM
231 /*
232 * Old kernel, no attr->sample_id_type_all field
233 */
808e1226 234 opts->sample_id_all_missing = true;
0c978128
ACM
235 if (!opts->sample_time && !opts->raw_samples && !time_needed)
236 attr->sample_type &= ~PERF_SAMPLE_TIME;
237
238 goto retry_sample_id;
239 }
d6d901c2 240 }
3da297a6 241
d6d901c2
ZY
242 /*
243 * If it's cycles then fall back to hrtimer
244 * based cpu-clock-tick sw counter, which
028d455b
DA
245 * is always available even if no PMU support.
246 *
247 * PPC returns ENXIO until 2.6.37 (behavior changed
248 * with commit b0a873e).
d6d901c2 249 */
028d455b
DA
250 if ((err == ENOENT || err == ENXIO)
251 && attr->type == PERF_TYPE_HARDWARE
d6d901c2
ZY
252 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
253
254 if (verbose)
ca6a4258
DA
255 ui__warning("The cycles event is not supported, "
256 "trying to fall back to cpu-clock-ticks\n");
d6d901c2
ZY
257 attr->type = PERF_TYPE_SOFTWARE;
258 attr->config = PERF_COUNT_SW_CPU_CLOCK;
d1cae34d
DA
259 if (pos->name) {
260 free(pos->name);
261 pos->name = NULL;
262 }
d6d901c2
ZY
263 goto try_again;
264 }
ca6a4258
DA
265
266 if (err == ENOENT) {
3780f488 267 ui__error("The %s event is not supported.\n",
7289f83c 268 perf_evsel__name(pos));
ca6a4258
DA
269 exit(EXIT_FAILURE);
270 }
271
d6d901c2 272 printf("\n");
d9cf837e 273 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
dd7927f4 274 err, strerror(err));
bfd45118
SK
275
276#if defined(__i386__) || defined(__x86_64__)
d6d901c2
ZY
277 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
278 die("No hardware sampling interrupt available."
279 " No APIC? If so then you can boot the kernel"
280 " with the \"lapic\" boot parameter to"
281 " force-enable it.\n");
bfd45118
SK
282#endif
283
d6d901c2 284 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
c171b552
LZ
285 }
286 }
a43d3f08 287
0a102479
FW
288 if (perf_evlist__set_filters(evlist)) {
289 error("failed to set filter with %d (%s)\n", errno,
290 strerror(errno));
291 exit(-1);
292 }
293
18e60939
NE
294 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
295 if (errno == EPERM)
296 die("Permission error mapping pages.\n"
297 "Consider increasing "
298 "/proc/sys/kernel/perf_event_mlock_kb,\n"
299 "or try again with a smaller value of -m/--mmap_pages.\n"
300 "(current value: %d)\n", opts->mmap_pages);
41d0d933
NE
301 else if (!is_power_of_2(opts->mmap_pages))
302 die("--mmap_pages/-m value must be a power of two.");
303
0a27d7f9 304 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
18e60939 305 }
0a27d7f9 306
d20deb64 307 if (rec->file_new)
a91e5431
ACM
308 session->evlist = evlist;
309 else {
310 if (!perf_evlist__equal(session->evlist, evlist)) {
311 fprintf(stderr, "incompatible append\n");
312 exit(-1);
313 }
314 }
315
316 perf_session__update_sample_type(session);
16c8a109
PZ
317}
318
d20deb64 319static int process_buildids(struct perf_record *rec)
6122e4e4 320{
d20deb64 321 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 322
9f591fd7
ACM
323 if (size == 0)
324 return 0;
325
d20deb64
ACM
326 rec->session->fd = rec->output;
327 return __perf_session__process_events(rec->session, rec->post_processing_offset,
328 size - rec->post_processing_offset,
6122e4e4
ACM
329 size, &build_id__mark_dso_hit_ops);
330}
331
d20deb64 332static void perf_record__exit(int status __used, void *arg)
f5970550 333{
d20deb64
ACM
334 struct perf_record *rec = arg;
335
336 if (!rec->opts.pipe_output) {
337 rec->session->header.data_size += rec->bytes_written;
338
339 if (!rec->no_buildid)
340 process_buildids(rec);
341 perf_session__write_header(rec->session, rec->evlist,
342 rec->output, true);
343 perf_session__delete(rec->session);
344 perf_evlist__delete(rec->evlist);
d65a458b 345 symbol__exit();
c7929e47 346 }
f5970550
PZ
347}
348
8115d60c 349static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
350{
351 int err;
45694aa7 352 struct perf_tool *tool = data;
a1645ce1 353
23346f21 354 if (machine__is_host(machine))
a1645ce1
ZY
355 return;
356
357 /*
358 *As for guest kernel when processing subcommand record&report,
359 *we arrange module mmap prior to guest kernel mmap and trigger
360 *a preload dso because default guest module symbols are loaded
361 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
362 *method is used to avoid symbol missing when the first addr is
363 *in module instead of in guest kernel.
364 */
45694aa7 365 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 366 machine);
a1645ce1
ZY
367 if (err < 0)
368 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 369 " relocation symbol.\n", machine->pid);
a1645ce1 370
a1645ce1
ZY
371 /*
372 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
373 * have no _text sometimes.
374 */
45694aa7 375 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 376 machine, "_text");
a1645ce1 377 if (err < 0)
45694aa7 378 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 379 machine, "_stext");
a1645ce1
ZY
380 if (err < 0)
381 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 382 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
383}
384
98402807
FW
385static struct perf_event_header finished_round_event = {
386 .size = sizeof(struct perf_event_header),
387 .type = PERF_RECORD_FINISHED_ROUND,
388};
389
d20deb64 390static void perf_record__mmap_read_all(struct perf_record *rec)
98402807 391{
0e2e63dd 392 int i;
98402807 393
d20deb64
ACM
394 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
395 if (rec->evlist->mmap[i].base)
396 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
98402807
FW
397 }
398
2eeaaa09 399 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
d20deb64 400 write_output(rec, &finished_round_event, sizeof(finished_round_event));
98402807
FW
401}
402
d20deb64 403static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 404{
abaff32a 405 struct stat st;
abaff32a 406 int flags;
781ba9d2 407 int err, output, feat;
8b412664 408 unsigned long waking = 0;
46be604b 409 const bool forks = argc > 0;
23346f21 410 struct machine *machine;
45694aa7 411 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
412 struct perf_record_opts *opts = &rec->opts;
413 struct perf_evlist *evsel_list = rec->evlist;
414 const char *output_name = rec->output_name;
415 struct perf_session *session;
de9ac07b 416
d20deb64 417 rec->progname = argv[0];
33e49ea7 418
d20deb64 419 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 420
d20deb64 421 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
422 signal(SIGCHLD, sig_handler);
423 signal(SIGINT, sig_handler);
18483b81 424 signal(SIGUSR1, sig_handler);
f5970550 425
d7065adb
FBH
426 if (!output_name) {
427 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 428 opts->pipe_output = true;
d7065adb 429 else
d20deb64 430 rec->output_name = output_name = "perf.data";
d7065adb
FBH
431 }
432 if (output_name) {
433 if (!strcmp(output_name, "-"))
d20deb64 434 opts->pipe_output = true;
d7065adb 435 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 436 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
437 char oldname[PATH_MAX];
438 snprintf(oldname, sizeof(oldname), "%s.old",
439 output_name);
440 unlink(oldname);
441 rename(output_name, oldname);
442 }
d20deb64
ACM
443 } else if (rec->write_mode == WRITE_APPEND) {
444 rec->write_mode = WRITE_FORCE;
266e0e21 445 }
97124d5e
PZ
446 }
447
f887f301 448 flags = O_CREAT|O_RDWR;
d20deb64
ACM
449 if (rec->write_mode == WRITE_APPEND)
450 rec->file_new = 0;
abaff32a
IM
451 else
452 flags |= O_TRUNC;
453
d20deb64 454 if (opts->pipe_output)
529870e3
TZ
455 output = STDOUT_FILENO;
456 else
457 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
458 if (output < 0) {
459 perror("failed to create output file");
460 exit(-1);
461 }
462
d20deb64
ACM
463 rec->output = output;
464
7865e817 465 session = perf_session__new(output_name, O_WRONLY,
d20deb64 466 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 467 if (session == NULL) {
a9a70bbc
ACM
468 pr_err("Not enough memory for reading perf file header\n");
469 return -1;
470 }
471
d20deb64
ACM
472 rec->session = session;
473
781ba9d2
RR
474 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
475 perf_header__set_feat(&session->header, feat);
476
477 if (rec->no_buildid)
478 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
479
480 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 481 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 482
330aa675
SE
483 if (!rec->opts.branch_stack)
484 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
485
d20deb64 486 if (!rec->file_new) {
a91e5431 487 err = perf_session__read_header(session, output);
4dc0a04b 488 if (err < 0)
39d17dac 489 goto out_delete_session;
4dc0a04b
ACM
490 }
491
d4db3f16 492 if (forks) {
d20deb64 493 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
494 if (err < 0) {
495 pr_err("Couldn't run the workload!\n");
496 goto out_delete_session;
856e9660 497 }
856e9660
PZ
498 }
499
d20deb64 500 perf_record__open(rec);
de9ac07b 501
712a4b60 502 /*
d20deb64 503 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 504 */
d20deb64 505 on_exit(perf_record__exit, rec);
712a4b60 506
d20deb64 507 if (opts->pipe_output) {
529870e3
TZ
508 err = perf_header__write_pipe(output);
509 if (err < 0)
510 return err;
d20deb64 511 } else if (rec->file_new) {
a91e5431
ACM
512 err = perf_session__write_header(session, evsel_list,
513 output, false);
d5eed904
ACM
514 if (err < 0)
515 return err;
56b03f3c
ACM
516 }
517
d3665498 518 if (!rec->no_buildid
e20960c0 519 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 520 pr_err("Couldn't generate buildids. "
e20960c0
RR
521 "Use --no-buildid to profile anyway.\n");
522 return -1;
523 }
524
d20deb64 525 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 526
743eb868
ACM
527 machine = perf_session__find_host_machine(session);
528 if (!machine) {
529 pr_err("Couldn't find native kernel information.\n");
530 return -1;
531 }
532
d20deb64 533 if (opts->pipe_output) {
45694aa7 534 err = perf_event__synthesize_attrs(tool, session,
d20deb64 535 process_synthesized_event);
2c46dbb5
TZ
536 if (err < 0) {
537 pr_err("Couldn't synthesize attrs.\n");
538 return err;
539 }
cd19a035 540
45694aa7 541 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 542 machine);
cd19a035
TZ
543 if (err < 0) {
544 pr_err("Couldn't synthesize event_types.\n");
545 return err;
546 }
9215545e 547
361c99a6 548 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
549 /*
550 * FIXME err <= 0 here actually means that
551 * there were no tracepoints so its not really
552 * an error, just that we don't need to
553 * synthesize anything. We really have to
554 * return this more properly and also
555 * propagate errors that now are calling die()
556 */
45694aa7 557 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 558 process_synthesized_event);
63e0c771
TZ
559 if (err <= 0) {
560 pr_err("Couldn't record tracing data.\n");
561 return err;
562 }
d20deb64 563 advance_output(rec, err);
63e0c771 564 }
2c46dbb5
TZ
565 }
566
45694aa7 567 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 568 machine, "_text");
70162138 569 if (err < 0)
45694aa7 570 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 571 machine, "_stext");
c1a3a4b9
ACM
572 if (err < 0)
573 pr_err("Couldn't record kernel reference relocation symbol\n"
574 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
575 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 576
45694aa7 577 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 578 machine);
c1a3a4b9
ACM
579 if (err < 0)
580 pr_err("Couldn't record kernel module information.\n"
581 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
582 "Check /proc/modules permission or run as root.\n");
583
a1645ce1 584 if (perf_guest)
45694aa7 585 perf_session__process_machines(session, tool,
8115d60c 586 perf_event__synthesize_guest_os);
7c6a1c65 587
bea03405 588 if (!opts->target.system_wide)
45694aa7 589 perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 590 process_synthesized_event,
743eb868 591 machine);
234fbbf5 592 else
45694aa7 593 perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 594 machine);
7c6a1c65 595
d20deb64 596 if (rec->realtime_prio) {
de9ac07b
PZ
597 struct sched_param param;
598
d20deb64 599 param.sched_priority = rec->realtime_prio;
de9ac07b 600 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 601 pr_err("Could not set realtime priority.\n");
de9ac07b
PZ
602 exit(-1);
603 }
604 }
605
764e16a3
DA
606 perf_evlist__enable(evsel_list);
607
856e9660
PZ
608 /*
609 * Let the child rip
610 */
d4db3f16 611 if (forks)
35b9d88e 612 perf_evlist__start_workload(evsel_list);
856e9660 613
649c48a9 614 for (;;) {
d20deb64 615 int hits = rec->samples;
de9ac07b 616
d20deb64 617 perf_record__mmap_read_all(rec);
de9ac07b 618
d20deb64 619 if (hits == rec->samples) {
649c48a9
PZ
620 if (done)
621 break;
5c581041 622 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
623 waking++;
624 }
625
4152ab37
ACM
626 if (done)
627 perf_evlist__disable(evsel_list);
de9ac07b
PZ
628 }
629
18483b81 630 if (quiet || signr == SIGUSR1)
b44308f5
ACM
631 return 0;
632
8b412664
PZ
633 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
634
021e9f47
IM
635 /*
636 * Approximate RIP event size: 24 bytes.
637 */
638 fprintf(stderr,
9486aa38 639 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 640 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 641 output_name,
d20deb64 642 rec->bytes_written / 24);
addc2785 643
de9ac07b 644 return 0;
39d17dac
ACM
645
646out_delete_session:
647 perf_session__delete(session);
648 return err;
de9ac07b 649}
0e9b20b8 650
bdfebd84
RAV
651#define BRANCH_OPT(n, m) \
652 { .name = n, .mode = (m) }
653
654#define BRANCH_END { .name = NULL }
655
656struct branch_mode {
657 const char *name;
658 int mode;
659};
660
661static const struct branch_mode branch_modes[] = {
662 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
663 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
664 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
665 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
666 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
667 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
668 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
669 BRANCH_END
670};
671
672static int
a5aabdac 673parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
674{
675#define ONLY_PLM \
676 (PERF_SAMPLE_BRANCH_USER |\
677 PERF_SAMPLE_BRANCH_KERNEL |\
678 PERF_SAMPLE_BRANCH_HV)
679
680 uint64_t *mode = (uint64_t *)opt->value;
681 const struct branch_mode *br;
a5aabdac 682 char *s, *os = NULL, *p;
bdfebd84
RAV
683 int ret = -1;
684
a5aabdac
SE
685 if (unset)
686 return 0;
bdfebd84 687
a5aabdac
SE
688 /*
689 * cannot set it twice, -b + --branch-filter for instance
690 */
691 if (*mode)
bdfebd84
RAV
692 return -1;
693
a5aabdac
SE
694 /* str may be NULL in case no arg is passed to -b */
695 if (str) {
696 /* because str is read-only */
697 s = os = strdup(str);
698 if (!s)
699 return -1;
700
701 for (;;) {
702 p = strchr(s, ',');
703 if (p)
704 *p = '\0';
705
706 for (br = branch_modes; br->name; br++) {
707 if (!strcasecmp(s, br->name))
708 break;
709 }
710 if (!br->name) {
711 ui__warning("unknown branch filter %s,"
712 " check man page\n", s);
713 goto error;
714 }
bdfebd84 715
a5aabdac 716 *mode |= br->mode;
bdfebd84 717
a5aabdac
SE
718 if (!p)
719 break;
bdfebd84 720
a5aabdac
SE
721 s = p + 1;
722 }
bdfebd84
RAV
723 }
724 ret = 0;
725
a5aabdac 726 /* default to any branch */
bdfebd84 727 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 728 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
729 }
730error:
731 free(os);
732 return ret;
733}
734
0e9b20b8 735static const char * const record_usage[] = {
9e096753
MG
736 "perf record [<options>] [<command>]",
737 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
738 NULL
739};
740
d20deb64
ACM
741/*
742 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
743 * because we need to have access to it in perf_record__exit, that is called
744 * after cmd_record() exits, but since record_options need to be accessible to
745 * builtin-script, leave it here.
746 *
747 * At least we don't ouch it in all the other functions here directly.
748 *
749 * Just say no to tons of global variables, sigh.
750 */
751static struct perf_record record = {
752 .opts = {
d20deb64
ACM
753 .mmap_pages = UINT_MAX,
754 .user_freq = UINT_MAX,
755 .user_interval = ULLONG_MAX,
447a6013 756 .freq = 4000,
d1cb9fce
NK
757 .target = {
758 .uses_mmap = true,
759 },
d20deb64
ACM
760 },
761 .write_mode = WRITE_FORCE,
762 .file_new = true,
763};
7865e817 764
d20deb64
ACM
765/*
766 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
767 * with it and switch to use the library functions in perf_evlist that came
768 * from builtin-record.c, i.e. use perf_record_opts,
769 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
770 * using pipes, etc.
771 */
bca647aa 772const struct option record_options[] = {
d20deb64 773 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 774 "event selector. use 'perf list' to list available events",
f120f9d5 775 parse_events_option),
d20deb64 776 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 777 "event filter", parse_filter),
bea03405 778 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 779 "record events on existing process id"),
bea03405 780 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 781 "record events on existing thread id"),
d20deb64 782 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 783 "collect data with this RT SCHED_FIFO priority"),
d20deb64 784 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 785 "collect data without buffering"),
d20deb64 786 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 787 "collect raw sample records from all opened counters"),
bea03405 788 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 789 "system-wide collection from all CPUs"),
d20deb64 790 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 791 "append to the output file to do incremental profiling"),
bea03405 792 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 793 "list of cpus to monitor"),
d20deb64 794 OPT_BOOLEAN('f', "force", &record.force,
7865e817 795 "overwrite existing data file (deprecated)"),
d20deb64
ACM
796 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
797 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 798 "output file name"),
d20deb64 799 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 800 "child tasks do not inherit counters"),
d20deb64
ACM
801 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
802 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 803 "number of mmap data pages"),
d20deb64 804 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 805 "put the counters into a counter group"),
d20deb64 806 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
3efa1cc9 807 "do call-graph (stack chain/backtrace) recording"),
c0555642 808 OPT_INCR('v', "verbose", &verbose,
3da297a6 809 "be more verbose (show counter open errors, etc)"),
b44308f5 810 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 811 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 812 "per thread counts"),
d20deb64 813 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 814 "Sample addresses"),
d20deb64 815 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 816 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 817 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 818 "don't sample"),
d20deb64 819 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 820 "do not update the buildid cache"),
d20deb64 821 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 822 "do not collect buildids in perf.data"),
d20deb64 823 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
824 "monitor event in cgroup name only",
825 parse_cgroups),
bea03405
NK
826 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
827 "user to profile"),
a5aabdac
SE
828
829 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
830 "branch any", "sample any taken branches",
831 parse_branch_stack),
832
833 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
834 "branch filter mask", "branch stack filter modes",
bdfebd84 835 parse_branch_stack),
0e9b20b8
IM
836 OPT_END()
837};
838
f37a291c 839int cmd_record(int argc, const char **argv, const char *prefix __used)
0e9b20b8 840{
69aad6f1
ACM
841 int err = -ENOMEM;
842 struct perf_evsel *pos;
d20deb64
ACM
843 struct perf_evlist *evsel_list;
844 struct perf_record *rec = &record;
16ad2ffb 845 char errbuf[BUFSIZ];
0e9b20b8 846
fbe96f29
SE
847 perf_header__set_cmdline(argc, argv);
848
7e2ed097 849 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
850 if (evsel_list == NULL)
851 return -ENOMEM;
852
d20deb64
ACM
853 rec->evlist = evsel_list;
854
bca647aa 855 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 856 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 857 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 858 usage_with_options(record_usage, record_options);
0e9b20b8 859
d20deb64 860 if (rec->force && rec->append_file) {
3780f488
NK
861 ui__error("Can't overwrite and append at the same time."
862 " You need to choose between -f and -A");
bca647aa 863 usage_with_options(record_usage, record_options);
d20deb64
ACM
864 } else if (rec->append_file) {
865 rec->write_mode = WRITE_APPEND;
7865e817 866 } else {
d20deb64 867 rec->write_mode = WRITE_FORCE;
7865e817
FW
868 }
869
bea03405 870 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
871 ui__error("cgroup monitoring only available in"
872 " system-wide mode\n");
023695d9
SE
873 usage_with_options(record_usage, record_options);
874 }
875
655000e7 876 symbol__init();
baa2f6ce 877
ec80fde7 878 if (symbol_conf.kptr_restrict)
646aaea6
ACM
879 pr_warning(
880"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
881"check /proc/sys/kernel/kptr_restrict.\n\n"
882"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
883"file is not found in the buildid cache or in the vmlinux path.\n\n"
884"Samples in kernel modules won't be resolved at all.\n\n"
885"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
886"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 887
d20deb64 888 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 889 disable_buildid_cache();
655000e7 890
361c99a6
ACM
891 if (evsel_list->nr_entries == 0 &&
892 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
893 pr_err("Not enough memory for event selector list\n");
894 goto out_symbol_exit;
bbd36e5e 895 }
0e9b20b8 896
16ad2ffb
NK
897 err = perf_target__validate(&rec->opts.target);
898 if (err) {
899 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
900 ui__warning("%s", errbuf);
901 }
902
903 err = perf_target__parse_uid(&rec->opts.target);
904 if (err) {
905 int saved_errno = errno;
4bd0f2d2 906
16ad2ffb 907 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 908 ui__error("%s", errbuf);
16ad2ffb
NK
909
910 err = -saved_errno;
0d37aa34 911 goto out_free_fd;
16ad2ffb 912 }
0d37aa34 913
16ad2ffb 914 err = -ENOMEM;
b809ac10 915 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 916 usage_with_options(record_usage, record_options);
69aad6f1 917
361c99a6 918 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 919 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 920 goto out_free_fd;
d6d901c2 921 }
5c581041 922
d20deb64
ACM
923 if (rec->opts.user_interval != ULLONG_MAX)
924 rec->opts.default_interval = rec->opts.user_interval;
925 if (rec->opts.user_freq != UINT_MAX)
926 rec->opts.freq = rec->opts.user_freq;
f9212819 927
7e4ff9e3
MG
928 /*
929 * User specified count overrides default frequency.
930 */
d20deb64
ACM
931 if (rec->opts.default_interval)
932 rec->opts.freq = 0;
933 else if (rec->opts.freq) {
934 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 935 } else {
3780f488 936 ui__error("frequency and count are zero, aborting\n");
39d17dac 937 err = -EINVAL;
5c581041 938 goto out_free_fd;
7e4ff9e3
MG
939 }
940
d20deb64 941 err = __cmd_record(&record, argc, argv);
39d17dac 942out_free_fd:
7e2ed097 943 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
944out_symbol_exit:
945 symbol__exit();
39d17dac 946 return err;
0e9b20b8 947}