perf kvm: Add braces around multi-line statements
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
7c6a1c65 29
97124d5e 30#include <unistd.h>
de9ac07b 31#include <sched.h>
a41794cd 32#include <sys/mman.h>
de9ac07b 33
7865e817
FW
34enum write_mode_t {
35 WRITE_FORCE,
36 WRITE_APPEND
37};
38
d20deb64 39struct perf_record {
45694aa7 40 struct perf_tool tool;
d20deb64
ACM
41 struct perf_record_opts opts;
42 u64 bytes_written;
43 const char *output_name;
44 struct perf_evlist *evlist;
45 struct perf_session *session;
46 const char *progname;
47 int output;
48 unsigned int page_size;
49 int realtime_prio;
50 enum write_mode_t write_mode;
51 bool no_buildid;
52 bool no_buildid_cache;
53 bool force;
54 bool file_new;
55 bool append_file;
56 long samples;
57 off_t post_processing_offset;
0f82ebc4 58};
a21ca2ca 59
d20deb64 60static void advance_output(struct perf_record *rec, size_t size)
9215545e 61{
d20deb64 62 rec->bytes_written += size;
9215545e
TZ
63}
64
8d3eca20 65static int write_output(struct perf_record *rec, void *buf, size_t size)
f5970550
PZ
66{
67 while (size) {
d20deb64 68 int ret = write(rec->output, buf, size);
f5970550 69
8d3eca20
DA
70 if (ret < 0) {
71 pr_err("failed to write\n");
72 return -1;
73 }
f5970550
PZ
74
75 size -= ret;
76 buf += ret;
77
d20deb64 78 rec->bytes_written += ret;
f5970550 79 }
8d3eca20
DA
80
81 return 0;
f5970550
PZ
82}
83
45694aa7 84static int process_synthesized_event(struct perf_tool *tool,
d20deb64 85 union perf_event *event,
1d037ca1
IT
86 struct perf_sample *sample __maybe_unused,
87 struct machine *machine __maybe_unused)
234fbbf5 88{
45694aa7 89 struct perf_record *rec = container_of(tool, struct perf_record, tool);
8d3eca20
DA
90 if (write_output(rec, event, event->header.size) < 0)
91 return -1;
92
234fbbf5
ACM
93 return 0;
94}
95
8d3eca20 96static int perf_record__mmap_read(struct perf_record *rec,
d20deb64 97 struct perf_mmap *md)
de9ac07b 98{
744bd8aa 99 unsigned int head = perf_mmap__read_head(md);
de9ac07b 100 unsigned int old = md->prev;
d20deb64 101 unsigned char *data = md->base + rec->page_size;
de9ac07b
PZ
102 unsigned long size;
103 void *buf;
8d3eca20 104 int rc = 0;
de9ac07b 105
dc82009a 106 if (old == head)
8d3eca20 107 return 0;
dc82009a 108
d20deb64 109 rec->samples++;
de9ac07b
PZ
110
111 size = head - old;
112
113 if ((old & md->mask) + size != (head & md->mask)) {
114 buf = &data[old & md->mask];
115 size = md->mask + 1 - (old & md->mask);
116 old += size;
021e9f47 117
8d3eca20
DA
118 if (write_output(rec, buf, size) < 0) {
119 rc = -1;
120 goto out;
121 }
de9ac07b
PZ
122 }
123
124 buf = &data[old & md->mask];
125 size = head - old;
126 old += size;
021e9f47 127
8d3eca20
DA
128 if (write_output(rec, buf, size) < 0) {
129 rc = -1;
130 goto out;
131 }
de9ac07b
PZ
132
133 md->prev = old;
115d2d89 134 perf_mmap__write_tail(md, old);
8d3eca20
DA
135
136out:
137 return rc;
de9ac07b
PZ
138}
139
140static volatile int done = 0;
f7b7c26e 141static volatile int signr = -1;
33e49ea7 142static volatile int child_finished = 0;
de9ac07b 143
16c8a109 144static void sig_handler(int sig)
de9ac07b 145{
33e49ea7
AK
146 if (sig == SIGCHLD)
147 child_finished = 1;
148
16c8a109 149 done = 1;
f7b7c26e
PZ
150 signr = sig;
151}
152
1d037ca1 153static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
f7b7c26e 154{
d20deb64 155 struct perf_record *rec = arg;
33e49ea7
AK
156 int status;
157
d20deb64 158 if (rec->evlist->workload.pid > 0) {
33e49ea7 159 if (!child_finished)
d20deb64 160 kill(rec->evlist->workload.pid, SIGTERM);
33e49ea7
AK
161
162 wait(&status);
163 if (WIFSIGNALED(status))
d20deb64 164 psignal(WTERMSIG(status), rec->progname);
33e49ea7 165 }
933da83a 166
18483b81 167 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
168 return;
169
170 signal(signr, SIG_DFL);
171 kill(getpid(), signr);
de9ac07b
PZ
172}
173
a91e5431
ACM
174static bool perf_evlist__equal(struct perf_evlist *evlist,
175 struct perf_evlist *other)
176{
177 struct perf_evsel *pos, *pair;
178
179 if (evlist->nr_entries != other->nr_entries)
180 return false;
181
0c21f736 182 pair = perf_evlist__first(other);
a91e5431
ACM
183
184 list_for_each_entry(pos, &evlist->entries, node) {
185 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
186 return false;
0c21f736 187 pair = perf_evsel__next(pair);
a91e5431
ACM
188 }
189
190 return true;
191}
192
8d3eca20 193static int perf_record__open(struct perf_record *rec)
dd7927f4 194{
6a4bb04c 195 struct perf_evsel *pos;
d20deb64
ACM
196 struct perf_evlist *evlist = rec->evlist;
197 struct perf_session *session = rec->session;
198 struct perf_record_opts *opts = &rec->opts;
8d3eca20 199 int rc = 0;
dd7927f4 200
d20deb64 201 perf_evlist__config_attrs(evlist, opts);
0f82ebc4 202
6a4bb04c 203 if (opts->group)
63dab225 204 perf_evlist__set_leader(evlist);
6a4bb04c 205
dd7927f4
ACM
206 list_for_each_entry(pos, &evlist->entries, node) {
207 struct perf_event_attr *attr = &pos->attr;
208 /*
209 * Check if parse_single_tracepoint_event has already asked for
210 * PERF_SAMPLE_TIME.
211 *
212 * XXX this is kludgy but short term fix for problems introduced by
213 * eac23d1c that broke 'perf script' by having different sample_types
214 * when using multiple tracepoint events when we use a perf binary
215 * that tries to use sample_id_all on an older kernel.
216 *
217 * We need to move counter creation to perf_session, support
218 * different sample_types, etc.
219 */
220 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
d6d901c2 221
0c978128
ACM
222fallback_missing_features:
223 if (opts->exclude_guest_missing)
224 attr->exclude_guest = attr->exclude_host = 0;
dd7927f4 225retry_sample_id:
808e1226 226 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
dd7927f4 227try_again:
6a4bb04c 228 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
d6d901c2
ZY
229 int err = errno;
230
c286c419 231 if (err == EPERM || err == EACCES) {
b8631e6e 232 ui__error_paranoid();
8d3eca20
DA
233 rc = -err;
234 goto out;
bea03405 235 } else if (err == ENODEV && opts->target.cpu_list) {
8d3eca20
DA
236 pr_err("No such device - did you specify"
237 " an out-of-range profile CPU?\n");
238 rc = -err;
239 goto out;
0c978128
ACM
240 } else if (err == EINVAL) {
241 if (!opts->exclude_guest_missing &&
242 (attr->exclude_guest || attr->exclude_host)) {
243 pr_debug("Old kernel, cannot exclude "
244 "guest or host samples.\n");
245 opts->exclude_guest_missing = true;
246 goto fallback_missing_features;
808e1226 247 } else if (!opts->sample_id_all_missing) {
0c978128
ACM
248 /*
249 * Old kernel, no attr->sample_id_type_all field
250 */
808e1226 251 opts->sample_id_all_missing = true;
0c978128
ACM
252 if (!opts->sample_time && !opts->raw_samples && !time_needed)
253 attr->sample_type &= ~PERF_SAMPLE_TIME;
254
255 goto retry_sample_id;
256 }
d6d901c2 257 }
3da297a6 258
d6d901c2
ZY
259 /*
260 * If it's cycles then fall back to hrtimer
261 * based cpu-clock-tick sw counter, which
028d455b
DA
262 * is always available even if no PMU support.
263 *
264 * PPC returns ENXIO until 2.6.37 (behavior changed
265 * with commit b0a873e).
d6d901c2 266 */
028d455b
DA
267 if ((err == ENOENT || err == ENXIO)
268 && attr->type == PERF_TYPE_HARDWARE
d6d901c2
ZY
269 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
270
271 if (verbose)
ca6a4258
DA
272 ui__warning("The cycles event is not supported, "
273 "trying to fall back to cpu-clock-ticks\n");
d6d901c2
ZY
274 attr->type = PERF_TYPE_SOFTWARE;
275 attr->config = PERF_COUNT_SW_CPU_CLOCK;
d1cae34d
DA
276 if (pos->name) {
277 free(pos->name);
278 pos->name = NULL;
279 }
d6d901c2
ZY
280 goto try_again;
281 }
ca6a4258
DA
282
283 if (err == ENOENT) {
3780f488 284 ui__error("The %s event is not supported.\n",
7289f83c 285 perf_evsel__name(pos));
8d3eca20
DA
286 rc = -err;
287 goto out;
ca6a4258
DA
288 }
289
d6d901c2 290 printf("\n");
1863fbbb
SE
291 error("sys_perf_event_open() syscall returned with %d "
292 "(%s) for event %s. /bin/dmesg may provide "
293 "additional information.\n",
294 err, strerror(err), perf_evsel__name(pos));
bfd45118
SK
295
296#if defined(__i386__) || defined(__x86_64__)
8d3eca20
DA
297 if (attr->type == PERF_TYPE_HARDWARE &&
298 err == EOPNOTSUPP) {
299 pr_err("No hardware sampling interrupt available."
300 " No APIC? If so then you can boot the kernel"
301 " with the \"lapic\" boot parameter to"
302 " force-enable it.\n");
303 rc = -err;
304 goto out;
305 }
bfd45118
SK
306#endif
307
8d3eca20
DA
308 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
309 rc = -err;
310 goto out;
c171b552
LZ
311 }
312 }
a43d3f08 313
1491a632 314 if (perf_evlist__apply_filters(evlist)) {
0a102479
FW
315 error("failed to set filter with %d (%s)\n", errno,
316 strerror(errno));
8d3eca20
DA
317 rc = -1;
318 goto out;
0a102479
FW
319 }
320
18e60939 321 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
8d3eca20
DA
322 if (errno == EPERM) {
323 pr_err("Permission error mapping pages.\n"
324 "Consider increasing "
325 "/proc/sys/kernel/perf_event_mlock_kb,\n"
326 "or try again with a smaller value of -m/--mmap_pages.\n"
327 "(current value: %d)\n", opts->mmap_pages);
328 rc = -errno;
329 } else if (!is_power_of_2(opts->mmap_pages)) {
330 pr_err("--mmap_pages/-m value must be a power of two.");
331 rc = -EINVAL;
332 } else {
333 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
334 rc = -errno;
335 }
336 goto out;
18e60939 337 }
0a27d7f9 338
d20deb64 339 if (rec->file_new)
a91e5431
ACM
340 session->evlist = evlist;
341 else {
342 if (!perf_evlist__equal(session->evlist, evlist)) {
343 fprintf(stderr, "incompatible append\n");
8d3eca20
DA
344 rc = -1;
345 goto out;
a91e5431
ACM
346 }
347 }
348
7b56cce2 349 perf_session__set_id_hdr_size(session);
8d3eca20
DA
350out:
351 return rc;
16c8a109
PZ
352}
353
d20deb64 354static int process_buildids(struct perf_record *rec)
6122e4e4 355{
d20deb64 356 u64 size = lseek(rec->output, 0, SEEK_CUR);
6122e4e4 357
9f591fd7
ACM
358 if (size == 0)
359 return 0;
360
d20deb64
ACM
361 rec->session->fd = rec->output;
362 return __perf_session__process_events(rec->session, rec->post_processing_offset,
363 size - rec->post_processing_offset,
6122e4e4
ACM
364 size, &build_id__mark_dso_hit_ops);
365}
366
8d3eca20 367static void perf_record__exit(int status, void *arg)
f5970550 368{
d20deb64
ACM
369 struct perf_record *rec = arg;
370
8d3eca20
DA
371 if (status != 0)
372 return;
373
d20deb64
ACM
374 if (!rec->opts.pipe_output) {
375 rec->session->header.data_size += rec->bytes_written;
376
377 if (!rec->no_buildid)
378 process_buildids(rec);
379 perf_session__write_header(rec->session, rec->evlist,
380 rec->output, true);
381 perf_session__delete(rec->session);
382 perf_evlist__delete(rec->evlist);
d65a458b 383 symbol__exit();
c7929e47 384 }
f5970550
PZ
385}
386
8115d60c 387static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
388{
389 int err;
45694aa7 390 struct perf_tool *tool = data;
a1645ce1 391
23346f21 392 if (machine__is_host(machine))
a1645ce1
ZY
393 return;
394
395 /*
396 *As for guest kernel when processing subcommand record&report,
397 *we arrange module mmap prior to guest kernel mmap and trigger
398 *a preload dso because default guest module symbols are loaded
399 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
400 *method is used to avoid symbol missing when the first addr is
401 *in module instead of in guest kernel.
402 */
45694aa7 403 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 404 machine);
a1645ce1
ZY
405 if (err < 0)
406 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 407 " relocation symbol.\n", machine->pid);
a1645ce1 408
a1645ce1
ZY
409 /*
410 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
411 * have no _text sometimes.
412 */
45694aa7 413 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 414 machine, "_text");
a1645ce1 415 if (err < 0)
45694aa7 416 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 417 machine, "_stext");
a1645ce1
ZY
418 if (err < 0)
419 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 420 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
421}
422
98402807
FW
423static struct perf_event_header finished_round_event = {
424 .size = sizeof(struct perf_event_header),
425 .type = PERF_RECORD_FINISHED_ROUND,
426};
427
8d3eca20 428static int perf_record__mmap_read_all(struct perf_record *rec)
98402807 429{
0e2e63dd 430 int i;
8d3eca20 431 int rc = 0;
98402807 432
d20deb64 433 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
8d3eca20
DA
434 if (rec->evlist->mmap[i].base) {
435 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
436 rc = -1;
437 goto out;
438 }
439 }
98402807
FW
440 }
441
2eeaaa09 442 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
8d3eca20
DA
443 rc = write_output(rec, &finished_round_event,
444 sizeof(finished_round_event));
445
446out:
447 return rc;
98402807
FW
448}
449
d20deb64 450static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
16c8a109 451{
abaff32a 452 struct stat st;
abaff32a 453 int flags;
781ba9d2 454 int err, output, feat;
8b412664 455 unsigned long waking = 0;
46be604b 456 const bool forks = argc > 0;
23346f21 457 struct machine *machine;
45694aa7 458 struct perf_tool *tool = &rec->tool;
d20deb64
ACM
459 struct perf_record_opts *opts = &rec->opts;
460 struct perf_evlist *evsel_list = rec->evlist;
461 const char *output_name = rec->output_name;
462 struct perf_session *session;
de9ac07b 463
d20deb64 464 rec->progname = argv[0];
33e49ea7 465
d20deb64 466 rec->page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 467
d20deb64 468 on_exit(perf_record__sig_exit, rec);
f5970550
PZ
469 signal(SIGCHLD, sig_handler);
470 signal(SIGINT, sig_handler);
18483b81 471 signal(SIGUSR1, sig_handler);
f5970550 472
d7065adb
FBH
473 if (!output_name) {
474 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
d20deb64 475 opts->pipe_output = true;
d7065adb 476 else
d20deb64 477 rec->output_name = output_name = "perf.data";
d7065adb
FBH
478 }
479 if (output_name) {
480 if (!strcmp(output_name, "-"))
d20deb64 481 opts->pipe_output = true;
d7065adb 482 else if (!stat(output_name, &st) && st.st_size) {
d20deb64 483 if (rec->write_mode == WRITE_FORCE) {
d7065adb
FBH
484 char oldname[PATH_MAX];
485 snprintf(oldname, sizeof(oldname), "%s.old",
486 output_name);
487 unlink(oldname);
488 rename(output_name, oldname);
489 }
d20deb64
ACM
490 } else if (rec->write_mode == WRITE_APPEND) {
491 rec->write_mode = WRITE_FORCE;
266e0e21 492 }
97124d5e
PZ
493 }
494
f887f301 495 flags = O_CREAT|O_RDWR;
d20deb64
ACM
496 if (rec->write_mode == WRITE_APPEND)
497 rec->file_new = 0;
abaff32a
IM
498 else
499 flags |= O_TRUNC;
500
d20deb64 501 if (opts->pipe_output)
529870e3
TZ
502 output = STDOUT_FILENO;
503 else
504 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
505 if (output < 0) {
506 perror("failed to create output file");
8d3eca20 507 return -1;
de9ac07b
PZ
508 }
509
d20deb64
ACM
510 rec->output = output;
511
7865e817 512 session = perf_session__new(output_name, O_WRONLY,
d20deb64 513 rec->write_mode == WRITE_FORCE, false, NULL);
94c744b6 514 if (session == NULL) {
a9a70bbc
ACM
515 pr_err("Not enough memory for reading perf file header\n");
516 return -1;
517 }
518
d20deb64
ACM
519 rec->session = session;
520
781ba9d2
RR
521 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
522 perf_header__set_feat(&session->header, feat);
523
524 if (rec->no_buildid)
525 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
526
527 if (!have_tracepoints(&evsel_list->entries))
2eeaaa09 528 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
baa2f6ce 529
330aa675
SE
530 if (!rec->opts.branch_stack)
531 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
532
d20deb64 533 if (!rec->file_new) {
a91e5431 534 err = perf_session__read_header(session, output);
4dc0a04b 535 if (err < 0)
39d17dac 536 goto out_delete_session;
4dc0a04b
ACM
537 }
538
d4db3f16 539 if (forks) {
d20deb64 540 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
35b9d88e
ACM
541 if (err < 0) {
542 pr_err("Couldn't run the workload!\n");
543 goto out_delete_session;
856e9660 544 }
856e9660
PZ
545 }
546
8d3eca20
DA
547 if (perf_record__open(rec) != 0) {
548 err = -1;
549 goto out_delete_session;
550 }
de9ac07b 551
712a4b60 552 /*
d20deb64 553 * perf_session__delete(session) will be called at perf_record__exit()
712a4b60 554 */
d20deb64 555 on_exit(perf_record__exit, rec);
712a4b60 556
d20deb64 557 if (opts->pipe_output) {
529870e3
TZ
558 err = perf_header__write_pipe(output);
559 if (err < 0)
8d3eca20 560 goto out_delete_session;
d20deb64 561 } else if (rec->file_new) {
a91e5431
ACM
562 err = perf_session__write_header(session, evsel_list,
563 output, false);
d5eed904 564 if (err < 0)
8d3eca20 565 goto out_delete_session;
56b03f3c
ACM
566 }
567
d3665498 568 if (!rec->no_buildid
e20960c0 569 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 570 pr_err("Couldn't generate buildids. "
e20960c0 571 "Use --no-buildid to profile anyway.\n");
8d3eca20
DA
572 err = -1;
573 goto out_delete_session;
e20960c0
RR
574 }
575
d20deb64 576 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
6122e4e4 577
743eb868
ACM
578 machine = perf_session__find_host_machine(session);
579 if (!machine) {
580 pr_err("Couldn't find native kernel information.\n");
8d3eca20
DA
581 err = -1;
582 goto out_delete_session;
743eb868
ACM
583 }
584
d20deb64 585 if (opts->pipe_output) {
45694aa7 586 err = perf_event__synthesize_attrs(tool, session,
d20deb64 587 process_synthesized_event);
2c46dbb5
TZ
588 if (err < 0) {
589 pr_err("Couldn't synthesize attrs.\n");
8d3eca20 590 goto out_delete_session;
2c46dbb5 591 }
cd19a035 592
45694aa7 593 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
743eb868 594 machine);
cd19a035
TZ
595 if (err < 0) {
596 pr_err("Couldn't synthesize event_types.\n");
8d3eca20 597 goto out_delete_session;
cd19a035 598 }
9215545e 599
361c99a6 600 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
601 /*
602 * FIXME err <= 0 here actually means that
603 * there were no tracepoints so its not really
604 * an error, just that we don't need to
605 * synthesize anything. We really have to
606 * return this more properly and also
607 * propagate errors that now are calling die()
608 */
45694aa7 609 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
743eb868 610 process_synthesized_event);
63e0c771
TZ
611 if (err <= 0) {
612 pr_err("Couldn't record tracing data.\n");
8d3eca20 613 goto out_delete_session;
63e0c771 614 }
d20deb64 615 advance_output(rec, err);
63e0c771 616 }
2c46dbb5
TZ
617 }
618
45694aa7 619 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 620 machine, "_text");
70162138 621 if (err < 0)
45694aa7 622 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
743eb868 623 machine, "_stext");
c1a3a4b9
ACM
624 if (err < 0)
625 pr_err("Couldn't record kernel reference relocation symbol\n"
626 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
627 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 628
45694aa7 629 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 630 machine);
c1a3a4b9
ACM
631 if (err < 0)
632 pr_err("Couldn't record kernel module information.\n"
633 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
634 "Check /proc/modules permission or run as root.\n");
635
a1645ce1 636 if (perf_guest)
45694aa7 637 perf_session__process_machines(session, tool,
8115d60c 638 perf_event__synthesize_guest_os);
7c6a1c65 639
bea03405 640 if (!opts->target.system_wide)
8d3eca20 641 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
7c940c18 642 process_synthesized_event,
743eb868 643 machine);
234fbbf5 644 else
8d3eca20 645 err = perf_event__synthesize_threads(tool, process_synthesized_event,
743eb868 646 machine);
7c6a1c65 647
8d3eca20
DA
648 if (err != 0)
649 goto out_delete_session;
650
d20deb64 651 if (rec->realtime_prio) {
de9ac07b
PZ
652 struct sched_param param;
653
d20deb64 654 param.sched_priority = rec->realtime_prio;
de9ac07b 655 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 656 pr_err("Could not set realtime priority.\n");
8d3eca20
DA
657 err = -1;
658 goto out_delete_session;
de9ac07b
PZ
659 }
660 }
661
764e16a3
DA
662 perf_evlist__enable(evsel_list);
663
856e9660
PZ
664 /*
665 * Let the child rip
666 */
d4db3f16 667 if (forks)
35b9d88e 668 perf_evlist__start_workload(evsel_list);
856e9660 669
649c48a9 670 for (;;) {
d20deb64 671 int hits = rec->samples;
de9ac07b 672
8d3eca20
DA
673 if (perf_record__mmap_read_all(rec) < 0) {
674 err = -1;
675 goto out_delete_session;
676 }
de9ac07b 677
d20deb64 678 if (hits == rec->samples) {
649c48a9
PZ
679 if (done)
680 break;
5c581041 681 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
682 waking++;
683 }
684
4152ab37
ACM
685 if (done)
686 perf_evlist__disable(evsel_list);
de9ac07b
PZ
687 }
688
18483b81 689 if (quiet || signr == SIGUSR1)
b44308f5
ACM
690 return 0;
691
8b412664
PZ
692 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
693
021e9f47
IM
694 /*
695 * Approximate RIP event size: 24 bytes.
696 */
697 fprintf(stderr,
9486aa38 698 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
d20deb64 699 (double)rec->bytes_written / 1024.0 / 1024.0,
021e9f47 700 output_name,
d20deb64 701 rec->bytes_written / 24);
addc2785 702
de9ac07b 703 return 0;
39d17dac
ACM
704
705out_delete_session:
706 perf_session__delete(session);
707 return err;
de9ac07b 708}
0e9b20b8 709
bdfebd84
RAV
710#define BRANCH_OPT(n, m) \
711 { .name = n, .mode = (m) }
712
713#define BRANCH_END { .name = NULL }
714
715struct branch_mode {
716 const char *name;
717 int mode;
718};
719
720static const struct branch_mode branch_modes[] = {
721 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
722 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
723 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
724 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
725 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
726 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
727 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
728 BRANCH_END
729};
730
731static int
a5aabdac 732parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
733{
734#define ONLY_PLM \
735 (PERF_SAMPLE_BRANCH_USER |\
736 PERF_SAMPLE_BRANCH_KERNEL |\
737 PERF_SAMPLE_BRANCH_HV)
738
739 uint64_t *mode = (uint64_t *)opt->value;
740 const struct branch_mode *br;
a5aabdac 741 char *s, *os = NULL, *p;
bdfebd84
RAV
742 int ret = -1;
743
a5aabdac
SE
744 if (unset)
745 return 0;
bdfebd84 746
a5aabdac
SE
747 /*
748 * cannot set it twice, -b + --branch-filter for instance
749 */
750 if (*mode)
bdfebd84
RAV
751 return -1;
752
a5aabdac
SE
753 /* str may be NULL in case no arg is passed to -b */
754 if (str) {
755 /* because str is read-only */
756 s = os = strdup(str);
757 if (!s)
758 return -1;
759
760 for (;;) {
761 p = strchr(s, ',');
762 if (p)
763 *p = '\0';
764
765 for (br = branch_modes; br->name; br++) {
766 if (!strcasecmp(s, br->name))
767 break;
768 }
769 if (!br->name) {
770 ui__warning("unknown branch filter %s,"
771 " check man page\n", s);
772 goto error;
773 }
bdfebd84 774
a5aabdac 775 *mode |= br->mode;
bdfebd84 776
a5aabdac
SE
777 if (!p)
778 break;
bdfebd84 779
a5aabdac
SE
780 s = p + 1;
781 }
bdfebd84
RAV
782 }
783 ret = 0;
784
a5aabdac 785 /* default to any branch */
bdfebd84 786 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 787 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
788 }
789error:
790 free(os);
791 return ret;
792}
793
95485b1c 794#ifdef LIBUNWIND_SUPPORT
26d33022
JO
795static int get_stack_size(char *str, unsigned long *_size)
796{
797 char *endptr;
798 unsigned long size;
799 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
800
801 size = strtoul(str, &endptr, 0);
802
803 do {
804 if (*endptr)
805 break;
806
807 size = round_up(size, sizeof(u64));
808 if (!size || size > max_size)
809 break;
810
811 *_size = size;
812 return 0;
813
814 } while (0);
815
816 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
817 max_size, str);
818 return -1;
819}
95485b1c 820#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
821
822static int
1d037ca1 823parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
26d33022
JO
824 int unset)
825{
826 struct perf_record *rec = (struct perf_record *)opt->value;
827 char *tok, *name, *saveptr = NULL;
828 char *buf;
829 int ret = -1;
830
831 /* --no-call-graph */
832 if (unset)
833 return 0;
834
835 /* We specified default option if none is provided. */
836 BUG_ON(!arg);
837
838 /* We need buffer that we know we can write to. */
839 buf = malloc(strlen(arg) + 1);
840 if (!buf)
841 return -ENOMEM;
842
843 strcpy(buf, arg);
844
845 tok = strtok_r((char *)buf, ",", &saveptr);
846 name = tok ? : (char *)buf;
847
848 do {
849 /* Framepointer style */
850 if (!strncmp(name, "fp", sizeof("fp"))) {
851 if (!strtok_r(NULL, ",", &saveptr)) {
852 rec->opts.call_graph = CALLCHAIN_FP;
853 ret = 0;
854 } else
855 pr_err("callchain: No more arguments "
856 "needed for -g fp\n");
857 break;
858
95485b1c 859#ifdef LIBUNWIND_SUPPORT
26d33022
JO
860 /* Dwarf style */
861 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
61eaa3be
ACM
862 const unsigned long default_stack_dump_size = 8192;
863
26d33022
JO
864 ret = 0;
865 rec->opts.call_graph = CALLCHAIN_DWARF;
866 rec->opts.stack_dump_size = default_stack_dump_size;
867
868 tok = strtok_r(NULL, ",", &saveptr);
869 if (tok) {
870 unsigned long size = 0;
871
872 ret = get_stack_size(tok, &size);
873 rec->opts.stack_dump_size = size;
874 }
875
876 if (!ret)
877 pr_debug("callchain: stack dump size %d\n",
878 rec->opts.stack_dump_size);
95485b1c 879#endif /* LIBUNWIND_SUPPORT */
26d33022
JO
880 } else {
881 pr_err("callchain: Unknown -g option "
882 "value: %s\n", arg);
883 break;
884 }
885
886 } while (0);
887
888 free(buf);
889
890 if (!ret)
891 pr_debug("callchain: type %d\n", rec->opts.call_graph);
892
893 return ret;
894}
895
0e9b20b8 896static const char * const record_usage[] = {
9e096753
MG
897 "perf record [<options>] [<command>]",
898 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
899 NULL
900};
901
d20deb64
ACM
902/*
903 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
904 * because we need to have access to it in perf_record__exit, that is called
905 * after cmd_record() exits, but since record_options need to be accessible to
906 * builtin-script, leave it here.
907 *
908 * At least we don't ouch it in all the other functions here directly.
909 *
910 * Just say no to tons of global variables, sigh.
911 */
912static struct perf_record record = {
913 .opts = {
d20deb64
ACM
914 .mmap_pages = UINT_MAX,
915 .user_freq = UINT_MAX,
916 .user_interval = ULLONG_MAX,
447a6013 917 .freq = 4000,
d1cb9fce
NK
918 .target = {
919 .uses_mmap = true,
920 },
d20deb64
ACM
921 },
922 .write_mode = WRITE_FORCE,
923 .file_new = true,
924};
7865e817 925
61eaa3be
ACM
926#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
927
928#ifdef LIBUNWIND_SUPPORT
929static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
930#else
931static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
932#endif
933
d20deb64
ACM
934/*
935 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
936 * with it and switch to use the library functions in perf_evlist that came
937 * from builtin-record.c, i.e. use perf_record_opts,
938 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
939 * using pipes, etc.
940 */
bca647aa 941const struct option record_options[] = {
d20deb64 942 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 943 "event selector. use 'perf list' to list available events",
f120f9d5 944 parse_events_option),
d20deb64 945 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 946 "event filter", parse_filter),
bea03405 947 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 948 "record events on existing process id"),
bea03405 949 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 950 "record events on existing thread id"),
d20deb64 951 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 952 "collect data with this RT SCHED_FIFO priority"),
d20deb64 953 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
acac03fa 954 "collect data without buffering"),
d20deb64 955 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 956 "collect raw sample records from all opened counters"),
bea03405 957 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 958 "system-wide collection from all CPUs"),
d20deb64 959 OPT_BOOLEAN('A', "append", &record.append_file,
abaff32a 960 "append to the output file to do incremental profiling"),
bea03405 961 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 962 "list of cpus to monitor"),
d20deb64 963 OPT_BOOLEAN('f', "force", &record.force,
7865e817 964 "overwrite existing data file (deprecated)"),
d20deb64
ACM
965 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
966 OPT_STRING('o', "output", &record.output_name, "file",
abaff32a 967 "output file name"),
d20deb64 968 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
2e6cdf99 969 "child tasks do not inherit counters"),
d20deb64
ACM
970 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
971 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
01c2d99b 972 "number of mmap data pages"),
d20deb64 973 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 974 "put the counters into a counter group"),
26d33022
JO
975 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
976 callchain_help, &parse_callchain_opt,
977 "fp"),
c0555642 978 OPT_INCR('v', "verbose", &verbose,
3da297a6 979 "be more verbose (show counter open errors, etc)"),
b44308f5 980 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 981 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 982 "per thread counts"),
d20deb64 983 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 984 "Sample addresses"),
d20deb64 985 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 986 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 987 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 988 "don't sample"),
d20deb64 989 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 990 "do not update the buildid cache"),
d20deb64 991 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 992 "do not collect buildids in perf.data"),
d20deb64 993 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
994 "monitor event in cgroup name only",
995 parse_cgroups),
bea03405
NK
996 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
997 "user to profile"),
a5aabdac
SE
998
999 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1000 "branch any", "sample any taken branches",
1001 parse_branch_stack),
1002
1003 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1004 "branch filter mask", "branch stack filter modes",
bdfebd84 1005 parse_branch_stack),
0e9b20b8
IM
1006 OPT_END()
1007};
1008
1d037ca1 1009int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
0e9b20b8 1010{
69aad6f1
ACM
1011 int err = -ENOMEM;
1012 struct perf_evsel *pos;
d20deb64
ACM
1013 struct perf_evlist *evsel_list;
1014 struct perf_record *rec = &record;
16ad2ffb 1015 char errbuf[BUFSIZ];
0e9b20b8 1016
7e2ed097 1017 evsel_list = perf_evlist__new(NULL, NULL);
361c99a6
ACM
1018 if (evsel_list == NULL)
1019 return -ENOMEM;
1020
d20deb64
ACM
1021 rec->evlist = evsel_list;
1022
bca647aa 1023 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 1024 PARSE_OPT_STOP_AT_NON_OPTION);
d67356e7 1025 if (!argc && perf_target__none(&rec->opts.target))
bca647aa 1026 usage_with_options(record_usage, record_options);
0e9b20b8 1027
d20deb64 1028 if (rec->force && rec->append_file) {
3780f488
NK
1029 ui__error("Can't overwrite and append at the same time."
1030 " You need to choose between -f and -A");
bca647aa 1031 usage_with_options(record_usage, record_options);
d20deb64
ACM
1032 } else if (rec->append_file) {
1033 rec->write_mode = WRITE_APPEND;
7865e817 1034 } else {
d20deb64 1035 rec->write_mode = WRITE_FORCE;
7865e817
FW
1036 }
1037
bea03405 1038 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
1039 ui__error("cgroup monitoring only available in"
1040 " system-wide mode\n");
023695d9
SE
1041 usage_with_options(record_usage, record_options);
1042 }
1043
655000e7 1044 symbol__init();
baa2f6ce 1045
ec80fde7 1046 if (symbol_conf.kptr_restrict)
646aaea6
ACM
1047 pr_warning(
1048"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1049"check /proc/sys/kernel/kptr_restrict.\n\n"
1050"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1051"file is not found in the buildid cache or in the vmlinux path.\n\n"
1052"Samples in kernel modules won't be resolved at all.\n\n"
1053"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1054"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 1055
d20deb64 1056 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 1057 disable_buildid_cache();
655000e7 1058
361c99a6
ACM
1059 if (evsel_list->nr_entries == 0 &&
1060 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
1061 pr_err("Not enough memory for event selector list\n");
1062 goto out_symbol_exit;
bbd36e5e 1063 }
0e9b20b8 1064
16ad2ffb
NK
1065 err = perf_target__validate(&rec->opts.target);
1066 if (err) {
1067 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1068 ui__warning("%s", errbuf);
1069 }
1070
1071 err = perf_target__parse_uid(&rec->opts.target);
1072 if (err) {
1073 int saved_errno = errno;
4bd0f2d2 1074
16ad2ffb 1075 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1076 ui__error("%s", errbuf);
16ad2ffb
NK
1077
1078 err = -saved_errno;
0d37aa34 1079 goto out_free_fd;
16ad2ffb 1080 }
0d37aa34 1081
16ad2ffb 1082 err = -ENOMEM;
b809ac10 1083 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
dd7927f4 1084 usage_with_options(record_usage, record_options);
69aad6f1 1085
361c99a6 1086 list_for_each_entry(pos, &evsel_list->entries, node) {
7289f83c 1087 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
ad7f4e3f 1088 goto out_free_fd;
d6d901c2 1089 }
5c581041 1090
d20deb64
ACM
1091 if (rec->opts.user_interval != ULLONG_MAX)
1092 rec->opts.default_interval = rec->opts.user_interval;
1093 if (rec->opts.user_freq != UINT_MAX)
1094 rec->opts.freq = rec->opts.user_freq;
f9212819 1095
7e4ff9e3
MG
1096 /*
1097 * User specified count overrides default frequency.
1098 */
d20deb64
ACM
1099 if (rec->opts.default_interval)
1100 rec->opts.freq = 0;
1101 else if (rec->opts.freq) {
1102 rec->opts.default_interval = rec->opts.freq;
7e4ff9e3 1103 } else {
3780f488 1104 ui__error("frequency and count are zero, aborting\n");
39d17dac 1105 err = -EINVAL;
5c581041 1106 goto out_free_fd;
7e4ff9e3
MG
1107 }
1108
d20deb64 1109 err = __cmd_record(&record, argc, argv);
39d17dac 1110out_free_fd:
7e2ed097 1111 perf_evlist__delete_maps(evsel_list);
d65a458b
ACM
1112out_symbol_exit:
1113 symbol__exit();
39d17dac 1114 return err;
0e9b20b8 1115}