perf tools: Add AUX area tracing Snapshot Mode
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
16f762a2 8#include "builtin.h"
bf9e1876
IM
9
10#include "perf.h"
11
6122e4e4 12#include "util/build-id.h"
6eda5838 13#include "util/util.h"
0e9b20b8 14#include "util/parse-options.h"
8ad8db37 15#include "util/parse-events.h"
6eda5838 16
8f651eae 17#include "util/callchain.h"
f14d5707 18#include "util/cgroup.h"
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
45694aa7 25#include "util/tool.h"
8d06367f 26#include "util/symbol.h"
a12b51c4 27#include "util/cpumap.h"
fd78260b 28#include "util/thread_map.h"
f5fc1412 29#include "util/data.h"
ef149c25 30#include "util/auxtrace.h"
7c6a1c65 31
97124d5e 32#include <unistd.h>
de9ac07b 33#include <sched.h>
a41794cd 34#include <sys/mman.h>
de9ac07b 35
78da39fa 36
8c6f45a7 37struct record {
45694aa7 38 struct perf_tool tool;
b4006796 39 struct record_opts opts;
d20deb64 40 u64 bytes_written;
f5fc1412 41 struct perf_data_file file;
ef149c25 42 struct auxtrace_record *itr;
d20deb64
ACM
43 struct perf_evlist *evlist;
44 struct perf_session *session;
45 const char *progname;
d20deb64 46 int realtime_prio;
d20deb64
ACM
47 bool no_buildid;
48 bool no_buildid_cache;
d20deb64 49 long samples;
0f82ebc4 50};
a21ca2ca 51
8c6f45a7 52static int record__write(struct record *rec, void *bf, size_t size)
f5970550 53{
cf8b2e69 54 if (perf_data_file__write(rec->session->file, bf, size) < 0) {
50a9b868
JO
55 pr_err("failed to write perf data, error: %m\n");
56 return -1;
f5970550 57 }
8d3eca20 58
cf8b2e69 59 rec->bytes_written += size;
8d3eca20 60 return 0;
f5970550
PZ
61}
62
45694aa7 63static int process_synthesized_event(struct perf_tool *tool,
d20deb64 64 union perf_event *event,
1d037ca1
IT
65 struct perf_sample *sample __maybe_unused,
66 struct machine *machine __maybe_unused)
234fbbf5 67{
8c6f45a7
ACM
68 struct record *rec = container_of(tool, struct record, tool);
69 return record__write(rec, event, event->header.size);
234fbbf5
ACM
70}
71
e5685730 72static int record__mmap_read(struct record *rec, int idx)
de9ac07b 73{
e5685730 74 struct perf_mmap *md = &rec->evlist->mmap[idx];
7b8283b5
DA
75 u64 head = perf_mmap__read_head(md);
76 u64 old = md->prev;
918512b4 77 unsigned char *data = md->base + page_size;
de9ac07b
PZ
78 unsigned long size;
79 void *buf;
8d3eca20 80 int rc = 0;
de9ac07b 81
dc82009a 82 if (old == head)
8d3eca20 83 return 0;
dc82009a 84
d20deb64 85 rec->samples++;
de9ac07b
PZ
86
87 size = head - old;
88
89 if ((old & md->mask) + size != (head & md->mask)) {
90 buf = &data[old & md->mask];
91 size = md->mask + 1 - (old & md->mask);
92 old += size;
021e9f47 93
8c6f45a7 94 if (record__write(rec, buf, size) < 0) {
8d3eca20
DA
95 rc = -1;
96 goto out;
97 }
de9ac07b
PZ
98 }
99
100 buf = &data[old & md->mask];
101 size = head - old;
102 old += size;
021e9f47 103
8c6f45a7 104 if (record__write(rec, buf, size) < 0) {
8d3eca20
DA
105 rc = -1;
106 goto out;
107 }
de9ac07b
PZ
108
109 md->prev = old;
e5685730 110 perf_evlist__mmap_consume(rec->evlist, idx);
8d3eca20
DA
111out:
112 return rc;
de9ac07b
PZ
113}
114
e31f0d01
AH
115#ifdef HAVE_AUXTRACE_SUPPORT
116
ef149c25
AH
117static int record__process_auxtrace(struct perf_tool *tool,
118 union perf_event *event, void *data1,
119 size_t len1, void *data2, size_t len2)
120{
121 struct record *rec = container_of(tool, struct record, tool);
99fa2984 122 struct perf_data_file *file = &rec->file;
ef149c25
AH
123 size_t padding;
124 u8 pad[8] = {0};
125
99fa2984
AH
126 if (!perf_data_file__is_pipe(file)) {
127 off_t file_offset;
128 int fd = perf_data_file__fd(file);
129 int err;
130
131 file_offset = lseek(fd, 0, SEEK_CUR);
132 if (file_offset == -1)
133 return -1;
134 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
135 event, file_offset);
136 if (err)
137 return err;
138 }
139
ef149c25
AH
140 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
141 padding = (len1 + len2) & 7;
142 if (padding)
143 padding = 8 - padding;
144
145 record__write(rec, event, event->header.size);
146 record__write(rec, data1, len1);
147 if (len2)
148 record__write(rec, data2, len2);
149 record__write(rec, &pad, padding);
150
151 return 0;
152}
153
154static int record__auxtrace_mmap_read(struct record *rec,
155 struct auxtrace_mmap *mm)
156{
157 int ret;
158
159 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
160 record__process_auxtrace);
161 if (ret < 0)
162 return ret;
163
164 if (ret)
165 rec->samples++;
166
167 return 0;
168}
169
e31f0d01
AH
170#else
171
172static inline
173int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
174 struct auxtrace_mmap *mm __maybe_unused)
175{
176 return 0;
177}
178
179#endif
180
de9ac07b 181static volatile int done = 0;
f7b7c26e 182static volatile int signr = -1;
33e49ea7 183static volatile int child_finished = 0;
de9ac07b 184
16c8a109 185static void sig_handler(int sig)
de9ac07b 186{
33e49ea7
AK
187 if (sig == SIGCHLD)
188 child_finished = 1;
45604710
NK
189 else
190 signr = sig;
33e49ea7 191
16c8a109 192 done = 1;
f7b7c26e
PZ
193}
194
45604710 195static void record__sig_exit(void)
f7b7c26e 196{
45604710 197 if (signr == -1)
f7b7c26e
PZ
198 return;
199
200 signal(signr, SIG_DFL);
45604710 201 raise(signr);
de9ac07b
PZ
202}
203
8c6f45a7 204static int record__open(struct record *rec)
dd7927f4 205{
56e52e85 206 char msg[512];
6a4bb04c 207 struct perf_evsel *pos;
d20deb64
ACM
208 struct perf_evlist *evlist = rec->evlist;
209 struct perf_session *session = rec->session;
b4006796 210 struct record_opts *opts = &rec->opts;
8d3eca20 211 int rc = 0;
dd7927f4 212
f77a9518 213 perf_evlist__config(evlist, opts);
cac21425 214
0050f7aa 215 evlist__for_each(evlist, pos) {
dd7927f4 216try_again:
6a4bb04c 217 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
56e52e85 218 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
d6d901c2 219 if (verbose)
c0a54341 220 ui__warning("%s\n", msg);
d6d901c2
ZY
221 goto try_again;
222 }
ca6a4258 223
56e52e85
ACM
224 rc = -errno;
225 perf_evsel__open_strerror(pos, &opts->target,
226 errno, msg, sizeof(msg));
227 ui__error("%s\n", msg);
8d3eca20 228 goto out;
c171b552
LZ
229 }
230 }
a43d3f08 231
23d4aad4
ACM
232 if (perf_evlist__apply_filters(evlist, &pos)) {
233 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
234 pos->filter, perf_evsel__name(pos), errno,
35550da3 235 strerror_r(errno, msg, sizeof(msg)));
8d3eca20
DA
236 rc = -1;
237 goto out;
0a102479
FW
238 }
239
ef149c25
AH
240 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
241 opts->auxtrace_mmap_pages, false) < 0) {
8d3eca20
DA
242 if (errno == EPERM) {
243 pr_err("Permission error mapping pages.\n"
244 "Consider increasing "
245 "/proc/sys/kernel/perf_event_mlock_kb,\n"
246 "or try again with a smaller value of -m/--mmap_pages.\n"
ef149c25
AH
247 "(current value: %u,%u)\n",
248 opts->mmap_pages, opts->auxtrace_mmap_pages);
8d3eca20 249 rc = -errno;
8d3eca20 250 } else {
35550da3
MH
251 pr_err("failed to mmap with %d (%s)\n", errno,
252 strerror_r(errno, msg, sizeof(msg)));
8d3eca20
DA
253 rc = -errno;
254 }
255 goto out;
18e60939 256 }
0a27d7f9 257
563aecb2 258 session->evlist = evlist;
7b56cce2 259 perf_session__set_id_hdr_size(session);
8d3eca20
DA
260out:
261 return rc;
16c8a109
PZ
262}
263
e3d59112
NK
264static int process_sample_event(struct perf_tool *tool,
265 union perf_event *event,
266 struct perf_sample *sample,
267 struct perf_evsel *evsel,
268 struct machine *machine)
269{
270 struct record *rec = container_of(tool, struct record, tool);
271
272 rec->samples++;
273
274 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
275}
276
8c6f45a7 277static int process_buildids(struct record *rec)
6122e4e4 278{
f5fc1412
JO
279 struct perf_data_file *file = &rec->file;
280 struct perf_session *session = rec->session;
6122e4e4 281
42aa276f 282 u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
9f591fd7
ACM
283 if (size == 0)
284 return 0;
285
4ac30cf7
NK
286 file->size = size;
287
00dc8657
NK
288 /*
289 * During this process, it'll load kernel map and replace the
290 * dso->long_name to a real pathname it found. In this case
291 * we prefer the vmlinux path like
292 * /lib/modules/3.16.4/build/vmlinux
293 *
294 * rather than build-id path (in debug directory).
295 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
296 */
297 symbol_conf.ignore_vmlinux_buildid = true;
298
b7b61cbe 299 return perf_session__process_events(session);
6122e4e4
ACM
300}
301
8115d60c 302static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
303{
304 int err;
45694aa7 305 struct perf_tool *tool = data;
a1645ce1
ZY
306 /*
307 *As for guest kernel when processing subcommand record&report,
308 *we arrange module mmap prior to guest kernel mmap and trigger
309 *a preload dso because default guest module symbols are loaded
310 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
311 *method is used to avoid symbol missing when the first addr is
312 *in module instead of in guest kernel.
313 */
45694aa7 314 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 315 machine);
a1645ce1
ZY
316 if (err < 0)
317 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 318 " relocation symbol.\n", machine->pid);
a1645ce1 319
a1645ce1
ZY
320 /*
321 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
322 * have no _text sometimes.
323 */
45694aa7 324 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 325 machine);
a1645ce1
ZY
326 if (err < 0)
327 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 328 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
329}
330
98402807
FW
331static struct perf_event_header finished_round_event = {
332 .size = sizeof(struct perf_event_header),
333 .type = PERF_RECORD_FINISHED_ROUND,
334};
335
8c6f45a7 336static int record__mmap_read_all(struct record *rec)
98402807 337{
dcabb507 338 u64 bytes_written = rec->bytes_written;
0e2e63dd 339 int i;
8d3eca20 340 int rc = 0;
98402807 341
d20deb64 342 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
ef149c25
AH
343 struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
344
8d3eca20 345 if (rec->evlist->mmap[i].base) {
e5685730 346 if (record__mmap_read(rec, i) != 0) {
8d3eca20
DA
347 rc = -1;
348 goto out;
349 }
350 }
ef149c25
AH
351
352 if (mm->base &&
353 record__auxtrace_mmap_read(rec, mm) != 0) {
354 rc = -1;
355 goto out;
356 }
98402807
FW
357 }
358
dcabb507
JO
359 /*
360 * Mark the round finished in case we wrote
361 * at least one event.
362 */
363 if (bytes_written != rec->bytes_written)
364 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
8d3eca20
DA
365
366out:
367 return rc;
98402807
FW
368}
369
8c6f45a7 370static void record__init_features(struct record *rec)
57706abc 371{
57706abc
DA
372 struct perf_session *session = rec->session;
373 int feat;
374
375 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
376 perf_header__set_feat(&session->header, feat);
377
378 if (rec->no_buildid)
379 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
380
3e2be2da 381 if (!have_tracepoints(&rec->evlist->entries))
57706abc
DA
382 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
383
384 if (!rec->opts.branch_stack)
385 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
ef149c25
AH
386
387 if (!rec->opts.full_auxtrace)
388 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
57706abc
DA
389}
390
f33cbe72
ACM
391static volatile int workload_exec_errno;
392
393/*
394 * perf_evlist__prepare_workload will send a SIGUSR1
395 * if the fork fails, since we asked by setting its
396 * want_signal to true.
397 */
45604710
NK
398static void workload_exec_failed_signal(int signo __maybe_unused,
399 siginfo_t *info,
f33cbe72
ACM
400 void *ucontext __maybe_unused)
401{
402 workload_exec_errno = info->si_value.sival_int;
403 done = 1;
f33cbe72
ACM
404 child_finished = 1;
405}
406
8c6f45a7 407static int __cmd_record(struct record *rec, int argc, const char **argv)
16c8a109 408{
57706abc 409 int err;
45604710 410 int status = 0;
8b412664 411 unsigned long waking = 0;
46be604b 412 const bool forks = argc > 0;
23346f21 413 struct machine *machine;
45694aa7 414 struct perf_tool *tool = &rec->tool;
b4006796 415 struct record_opts *opts = &rec->opts;
f5fc1412 416 struct perf_data_file *file = &rec->file;
d20deb64 417 struct perf_session *session;
6dcf45ef 418 bool disabled = false, draining = false;
42aa276f 419 int fd;
de9ac07b 420
d20deb64 421 rec->progname = argv[0];
33e49ea7 422
45604710 423 atexit(record__sig_exit);
f5970550
PZ
424 signal(SIGCHLD, sig_handler);
425 signal(SIGINT, sig_handler);
804f7ac7 426 signal(SIGTERM, sig_handler);
f5970550 427
b7b61cbe 428 session = perf_session__new(file, false, tool);
94c744b6 429 if (session == NULL) {
ffa91880 430 pr_err("Perf session creation failed.\n");
a9a70bbc
ACM
431 return -1;
432 }
433
42aa276f 434 fd = perf_data_file__fd(file);
d20deb64
ACM
435 rec->session = session;
436
8c6f45a7 437 record__init_features(rec);
330aa675 438
d4db3f16 439 if (forks) {
3e2be2da 440 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
f5fc1412 441 argv, file->is_pipe,
735f7e0b 442 workload_exec_failed_signal);
35b9d88e
ACM
443 if (err < 0) {
444 pr_err("Couldn't run the workload!\n");
45604710 445 status = err;
35b9d88e 446 goto out_delete_session;
856e9660 447 }
856e9660
PZ
448 }
449
8c6f45a7 450 if (record__open(rec) != 0) {
8d3eca20 451 err = -1;
45604710 452 goto out_child;
8d3eca20 453 }
de9ac07b 454
3e2be2da 455 if (!rec->evlist->nr_groups)
a8bb559b
NK
456 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
457
f5fc1412 458 if (file->is_pipe) {
42aa276f 459 err = perf_header__write_pipe(fd);
529870e3 460 if (err < 0)
45604710 461 goto out_child;
563aecb2 462 } else {
42aa276f 463 err = perf_session__write_header(session, rec->evlist, fd, false);
d5eed904 464 if (err < 0)
45604710 465 goto out_child;
56b03f3c
ACM
466 }
467
d3665498 468 if (!rec->no_buildid
e20960c0 469 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 470 pr_err("Couldn't generate buildids. "
e20960c0 471 "Use --no-buildid to profile anyway.\n");
8d3eca20 472 err = -1;
45604710 473 goto out_child;
e20960c0
RR
474 }
475
34ba5122 476 machine = &session->machines.host;
743eb868 477
f5fc1412 478 if (file->is_pipe) {
45694aa7 479 err = perf_event__synthesize_attrs(tool, session,
d20deb64 480 process_synthesized_event);
2c46dbb5
TZ
481 if (err < 0) {
482 pr_err("Couldn't synthesize attrs.\n");
45604710 483 goto out_child;
2c46dbb5 484 }
cd19a035 485
3e2be2da 486 if (have_tracepoints(&rec->evlist->entries)) {
63e0c771
TZ
487 /*
488 * FIXME err <= 0 here actually means that
489 * there were no tracepoints so its not really
490 * an error, just that we don't need to
491 * synthesize anything. We really have to
492 * return this more properly and also
493 * propagate errors that now are calling die()
494 */
42aa276f 495 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
743eb868 496 process_synthesized_event);
63e0c771
TZ
497 if (err <= 0) {
498 pr_err("Couldn't record tracing data.\n");
45604710 499 goto out_child;
63e0c771 500 }
f34b9001 501 rec->bytes_written += err;
63e0c771 502 }
2c46dbb5
TZ
503 }
504
ef149c25
AH
505 if (rec->opts.full_auxtrace) {
506 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
507 session, process_synthesized_event);
508 if (err)
509 goto out_delete_session;
510 }
511
45694aa7 512 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 513 machine);
c1a3a4b9
ACM
514 if (err < 0)
515 pr_err("Couldn't record kernel reference relocation symbol\n"
516 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
517 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 518
45694aa7 519 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 520 machine);
c1a3a4b9
ACM
521 if (err < 0)
522 pr_err("Couldn't record kernel module information.\n"
523 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
524 "Check /proc/modules permission or run as root.\n");
525
7e383de4 526 if (perf_guest) {
876650e6
ACM
527 machines__process_guests(&session->machines,
528 perf_event__synthesize_guest_os, tool);
7e383de4 529 }
7c6a1c65 530
3e2be2da 531 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
a33fbd56 532 process_synthesized_event, opts->sample_address);
8d3eca20 533 if (err != 0)
45604710 534 goto out_child;
8d3eca20 535
d20deb64 536 if (rec->realtime_prio) {
de9ac07b
PZ
537 struct sched_param param;
538
d20deb64 539 param.sched_priority = rec->realtime_prio;
de9ac07b 540 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 541 pr_err("Could not set realtime priority.\n");
8d3eca20 542 err = -1;
45604710 543 goto out_child;
de9ac07b
PZ
544 }
545 }
546
774cb499
JO
547 /*
548 * When perf is starting the traced process, all the events
549 * (apart from group members) have enable_on_exec=1 set,
550 * so don't spoil it by prematurely enabling them.
551 */
6619a53e 552 if (!target__none(&opts->target) && !opts->initial_delay)
3e2be2da 553 perf_evlist__enable(rec->evlist);
764e16a3 554
856e9660
PZ
555 /*
556 * Let the child rip
557 */
735f7e0b 558 if (forks)
3e2be2da 559 perf_evlist__start_workload(rec->evlist);
856e9660 560
6619a53e
AK
561 if (opts->initial_delay) {
562 usleep(opts->initial_delay * 1000);
563 perf_evlist__enable(rec->evlist);
564 }
565
649c48a9 566 for (;;) {
d20deb64 567 int hits = rec->samples;
de9ac07b 568
8c6f45a7 569 if (record__mmap_read_all(rec) < 0) {
8d3eca20 570 err = -1;
45604710 571 goto out_child;
8d3eca20 572 }
de9ac07b 573
d20deb64 574 if (hits == rec->samples) {
6dcf45ef 575 if (done || draining)
649c48a9 576 break;
f66a889d 577 err = perf_evlist__poll(rec->evlist, -1);
a515114f
JO
578 /*
579 * Propagate error, only if there's any. Ignore positive
580 * number of returned events and interrupt error.
581 */
582 if (err > 0 || (err < 0 && errno == EINTR))
45604710 583 err = 0;
8b412664 584 waking++;
6dcf45ef
ACM
585
586 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
587 draining = true;
8b412664
PZ
588 }
589
774cb499
JO
590 /*
591 * When perf is starting the traced process, at the end events
592 * die with the process and we wait for that. Thus no need to
593 * disable events in this case.
594 */
602ad878 595 if (done && !disabled && !target__none(&opts->target)) {
3e2be2da 596 perf_evlist__disable(rec->evlist);
2711926a
JO
597 disabled = true;
598 }
de9ac07b
PZ
599 }
600
f33cbe72 601 if (forks && workload_exec_errno) {
35550da3 602 char msg[STRERR_BUFSIZE];
f33cbe72
ACM
603 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
604 pr_err("Workload failed: %s\n", emsg);
605 err = -1;
45604710 606 goto out_child;
f33cbe72
ACM
607 }
608
e3d59112 609 if (!quiet)
45604710 610 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
b44308f5 611
45604710
NK
612out_child:
613 if (forks) {
614 int exit_status;
addc2785 615
45604710
NK
616 if (!child_finished)
617 kill(rec->evlist->workload.pid, SIGTERM);
618
619 wait(&exit_status);
620
621 if (err < 0)
622 status = err;
623 else if (WIFEXITED(exit_status))
624 status = WEXITSTATUS(exit_status);
625 else if (WIFSIGNALED(exit_status))
626 signr = WTERMSIG(exit_status);
627 } else
628 status = err;
629
e3d59112
NK
630 /* this will be recalculated during process_buildids() */
631 rec->samples = 0;
632
45604710
NK
633 if (!err && !file->is_pipe) {
634 rec->session->header.data_size += rec->bytes_written;
635
cd10b289 636 if (!rec->no_buildid) {
45604710 637 process_buildids(rec);
cd10b289
AH
638 /*
639 * We take all buildids when the file contains
640 * AUX area tracing data because we do not decode the
641 * trace because it would take too long.
642 */
643 if (rec->opts.full_auxtrace)
644 dsos__hit_all(rec->session);
645 }
42aa276f 646 perf_session__write_header(rec->session, rec->evlist, fd, true);
45604710 647 }
39d17dac 648
e3d59112
NK
649 if (!err && !quiet) {
650 char samples[128];
651
ef149c25 652 if (rec->samples && !rec->opts.full_auxtrace)
e3d59112
NK
653 scnprintf(samples, sizeof(samples),
654 " (%" PRIu64 " samples)", rec->samples);
655 else
656 samples[0] = '\0';
657
658 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
659 perf_data_file__size(file) / 1024.0 / 1024.0,
660 file->path, samples);
661 }
662
39d17dac
ACM
663out_delete_session:
664 perf_session__delete(session);
45604710 665 return status;
de9ac07b 666}
0e9b20b8 667
bdfebd84
RAV
668#define BRANCH_OPT(n, m) \
669 { .name = n, .mode = (m) }
670
671#define BRANCH_END { .name = NULL }
672
673struct branch_mode {
674 const char *name;
675 int mode;
676};
677
678static const struct branch_mode branch_modes[] = {
679 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
680 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
681 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
682 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
683 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
684 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
685 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
0126d493
AK
686 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
687 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
688 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
0fffa5df 689 BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
bdfebd84
RAV
690 BRANCH_END
691};
692
693static int
a5aabdac 694parse_branch_stack(const struct option *opt, const char *str, int unset)
bdfebd84
RAV
695{
696#define ONLY_PLM \
697 (PERF_SAMPLE_BRANCH_USER |\
698 PERF_SAMPLE_BRANCH_KERNEL |\
699 PERF_SAMPLE_BRANCH_HV)
700
701 uint64_t *mode = (uint64_t *)opt->value;
702 const struct branch_mode *br;
a5aabdac 703 char *s, *os = NULL, *p;
bdfebd84
RAV
704 int ret = -1;
705
a5aabdac
SE
706 if (unset)
707 return 0;
bdfebd84 708
a5aabdac
SE
709 /*
710 * cannot set it twice, -b + --branch-filter for instance
711 */
712 if (*mode)
bdfebd84
RAV
713 return -1;
714
a5aabdac
SE
715 /* str may be NULL in case no arg is passed to -b */
716 if (str) {
717 /* because str is read-only */
718 s = os = strdup(str);
719 if (!s)
720 return -1;
721
722 for (;;) {
723 p = strchr(s, ',');
724 if (p)
725 *p = '\0';
726
727 for (br = branch_modes; br->name; br++) {
728 if (!strcasecmp(s, br->name))
729 break;
730 }
731 if (!br->name) {
732 ui__warning("unknown branch filter %s,"
733 " check man page\n", s);
734 goto error;
735 }
bdfebd84 736
a5aabdac 737 *mode |= br->mode;
bdfebd84 738
a5aabdac
SE
739 if (!p)
740 break;
bdfebd84 741
a5aabdac
SE
742 s = p + 1;
743 }
bdfebd84
RAV
744 }
745 ret = 0;
746
a5aabdac 747 /* default to any branch */
bdfebd84 748 if ((*mode & ~ONLY_PLM) == 0) {
a5aabdac 749 *mode = PERF_SAMPLE_BRANCH_ANY;
bdfebd84
RAV
750 }
751error:
752 free(os);
753 return ret;
754}
755
72a128aa 756static void callchain_debug(void)
09b0fd45 757{
aad2b21c 758 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
a601fdff 759
72a128aa 760 pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
26d33022 761
72a128aa 762 if (callchain_param.record_mode == CALLCHAIN_DWARF)
09b0fd45 763 pr_debug("callchain: stack dump size %d\n",
72a128aa 764 callchain_param.dump_size);
09b0fd45
JO
765}
766
72a128aa 767int record_parse_callchain_opt(const struct option *opt __maybe_unused,
09b0fd45
JO
768 const char *arg,
769 int unset)
770{
09b0fd45
JO
771 int ret;
772
72a128aa 773 callchain_param.enabled = !unset;
eb853e80 774
09b0fd45
JO
775 /* --no-call-graph */
776 if (unset) {
72a128aa 777 callchain_param.record_mode = CALLCHAIN_NONE;
09b0fd45
JO
778 pr_debug("callchain: disabled\n");
779 return 0;
780 }
781
f7f084f4 782 ret = parse_callchain_record_opt(arg);
26d33022 783 if (!ret)
72a128aa 784 callchain_debug();
26d33022
JO
785
786 return ret;
787}
788
72a128aa 789int record_callchain_opt(const struct option *opt __maybe_unused,
09b0fd45
JO
790 const char *arg __maybe_unused,
791 int unset __maybe_unused)
792{
72a128aa 793 callchain_param.enabled = true;
09b0fd45 794
72a128aa
NK
795 if (callchain_param.record_mode == CALLCHAIN_NONE)
796 callchain_param.record_mode = CALLCHAIN_FP;
eb853e80 797
72a128aa 798 callchain_debug();
09b0fd45
JO
799 return 0;
800}
801
eb853e80
JO
802static int perf_record_config(const char *var, const char *value, void *cb)
803{
eb853e80 804 if (!strcmp(var, "record.call-graph"))
5a2e5e85 805 var = "call-graph.record-mode"; /* fall-through */
eb853e80
JO
806
807 return perf_default_config(var, value, cb);
808}
809
814c8c38
PZ
810struct clockid_map {
811 const char *name;
812 int clockid;
813};
814
815#define CLOCKID_MAP(n, c) \
816 { .name = n, .clockid = (c), }
817
818#define CLOCKID_END { .name = NULL, }
819
820
821/*
822 * Add the missing ones, we need to build on many distros...
823 */
824#ifndef CLOCK_MONOTONIC_RAW
825#define CLOCK_MONOTONIC_RAW 4
826#endif
827#ifndef CLOCK_BOOTTIME
828#define CLOCK_BOOTTIME 7
829#endif
830#ifndef CLOCK_TAI
831#define CLOCK_TAI 11
832#endif
833
834static const struct clockid_map clockids[] = {
835 /* available for all events, NMI safe */
836 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
837 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
838
839 /* available for some events */
840 CLOCKID_MAP("realtime", CLOCK_REALTIME),
841 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
842 CLOCKID_MAP("tai", CLOCK_TAI),
843
844 /* available for the lazy */
845 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
846 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
847 CLOCKID_MAP("real", CLOCK_REALTIME),
848 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
849
850 CLOCKID_END,
851};
852
853static int parse_clockid(const struct option *opt, const char *str, int unset)
854{
855 struct record_opts *opts = (struct record_opts *)opt->value;
856 const struct clockid_map *cm;
857 const char *ostr = str;
858
859 if (unset) {
860 opts->use_clockid = 0;
861 return 0;
862 }
863
864 /* no arg passed */
865 if (!str)
866 return 0;
867
868 /* no setting it twice */
869 if (opts->use_clockid)
870 return -1;
871
872 opts->use_clockid = true;
873
874 /* if its a number, we're done */
875 if (sscanf(str, "%d", &opts->clockid) == 1)
876 return 0;
877
878 /* allow a "CLOCK_" prefix to the name */
879 if (!strncasecmp(str, "CLOCK_", 6))
880 str += 6;
881
882 for (cm = clockids; cm->name; cm++) {
883 if (!strcasecmp(str, cm->name)) {
884 opts->clockid = cm->clockid;
885 return 0;
886 }
887 }
888
889 opts->use_clockid = false;
890 ui__warning("unknown clockid %s, check man page\n", ostr);
891 return -1;
892}
893
e9db1310
AH
894static int record__parse_mmap_pages(const struct option *opt,
895 const char *str,
896 int unset __maybe_unused)
897{
898 struct record_opts *opts = opt->value;
899 char *s, *p;
900 unsigned int mmap_pages;
901 int ret;
902
903 if (!str)
904 return -EINVAL;
905
906 s = strdup(str);
907 if (!s)
908 return -ENOMEM;
909
910 p = strchr(s, ',');
911 if (p)
912 *p = '\0';
913
914 if (*s) {
915 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
916 if (ret)
917 goto out_free;
918 opts->mmap_pages = mmap_pages;
919 }
920
921 if (!p) {
922 ret = 0;
923 goto out_free;
924 }
925
926 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
927 if (ret)
928 goto out_free;
929
930 opts->auxtrace_mmap_pages = mmap_pages;
931
932out_free:
933 free(s);
934 return ret;
935}
936
e5b2c207 937static const char * const __record_usage[] = {
9e096753
MG
938 "perf record [<options>] [<command>]",
939 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
940 NULL
941};
e5b2c207 942const char * const *record_usage = __record_usage;
0e9b20b8 943
d20deb64 944/*
8c6f45a7
ACM
945 * XXX Ideally would be local to cmd_record() and passed to a record__new
946 * because we need to have access to it in record__exit, that is called
d20deb64
ACM
947 * after cmd_record() exits, but since record_options need to be accessible to
948 * builtin-script, leave it here.
949 *
950 * At least we don't ouch it in all the other functions here directly.
951 *
952 * Just say no to tons of global variables, sigh.
953 */
8c6f45a7 954static struct record record = {
d20deb64 955 .opts = {
8affc2b8 956 .sample_time = true,
d20deb64
ACM
957 .mmap_pages = UINT_MAX,
958 .user_freq = UINT_MAX,
959 .user_interval = ULLONG_MAX,
447a6013 960 .freq = 4000,
d1cb9fce
NK
961 .target = {
962 .uses_mmap = true,
3aa5939d 963 .default_per_cpu = true,
d1cb9fce 964 },
d20deb64 965 },
e3d59112
NK
966 .tool = {
967 .sample = process_sample_event,
968 .fork = perf_event__process_fork,
969 .comm = perf_event__process_comm,
970 .mmap = perf_event__process_mmap,
971 .mmap2 = perf_event__process_mmap2,
972 },
d20deb64 973};
7865e817 974
09b0fd45 975#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
61eaa3be 976
9ff125d1 977#ifdef HAVE_DWARF_UNWIND_SUPPORT
aad2b21c 978const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
61eaa3be 979#else
aad2b21c 980const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
61eaa3be
ACM
981#endif
982
d20deb64
ACM
983/*
984 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
985 * with it and switch to use the library functions in perf_evlist that came
b4006796 986 * from builtin-record.c, i.e. use record_opts,
d20deb64
ACM
987 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
988 * using pipes, etc.
989 */
e5b2c207 990struct option __record_options[] = {
d20deb64 991 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 992 "event selector. use 'perf list' to list available events",
f120f9d5 993 parse_events_option),
d20deb64 994 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 995 "event filter", parse_filter),
bea03405 996 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 997 "record events on existing process id"),
bea03405 998 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 999 "record events on existing thread id"),
d20deb64 1000 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 1001 "collect data with this RT SCHED_FIFO priority"),
509051ea 1002 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
acac03fa 1003 "collect data without buffering"),
d20deb64 1004 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 1005 "collect raw sample records from all opened counters"),
bea03405 1006 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 1007 "system-wide collection from all CPUs"),
bea03405 1008 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 1009 "list of cpus to monitor"),
d20deb64 1010 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
f5fc1412 1011 OPT_STRING('o', "output", &record.file.path, "file",
abaff32a 1012 "output file name"),
69e7e5b0
AH
1013 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1014 &record.opts.no_inherit_set,
1015 "child tasks do not inherit counters"),
d20deb64 1016 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
e9db1310
AH
1017 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1018 "number of mmap data pages and AUX area tracing mmap pages",
1019 record__parse_mmap_pages),
d20deb64 1020 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 1021 "put the counters into a counter group"),
09b0fd45
JO
1022 OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
1023 NULL, "enables call-graph recording" ,
1024 &record_callchain_opt),
1025 OPT_CALLBACK(0, "call-graph", &record.opts,
1026 "mode[,dump_size]", record_callchain_help,
1027 &record_parse_callchain_opt),
c0555642 1028 OPT_INCR('v', "verbose", &verbose,
3da297a6 1029 "be more verbose (show counter open errors, etc)"),
b44308f5 1030 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 1031 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 1032 "per thread counts"),
d20deb64 1033 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
4bba828d 1034 "Sample addresses"),
d20deb64 1035 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
3e76ac78 1036 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
d20deb64 1037 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 1038 "don't sample"),
d20deb64 1039 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
a1ac1d3c 1040 "do not update the buildid cache"),
d20deb64 1041 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
baa2f6ce 1042 "do not collect buildids in perf.data"),
d20deb64 1043 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
1044 "monitor event in cgroup name only",
1045 parse_cgroups),
a6205a35 1046 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
6619a53e 1047 "ms to wait before starting measurement after program start"),
bea03405
NK
1048 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1049 "user to profile"),
a5aabdac
SE
1050
1051 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1052 "branch any", "sample any taken branches",
1053 parse_branch_stack),
1054
1055 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1056 "branch filter mask", "branch stack filter modes",
bdfebd84 1057 parse_branch_stack),
05484298
AK
1058 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1059 "sample by weight (on special events only)"),
475eeab9
AK
1060 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1061 "sample transaction flags (special events only)"),
3aa5939d
AH
1062 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1063 "use per-thread mmaps"),
4b6c5177
SE
1064 OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
1065 "Sample machine registers on interrupt"),
85c273d2
AK
1066 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1067 "Record running/enabled time of read (:S) events"),
814c8c38
PZ
1068 OPT_CALLBACK('k', "clockid", &record.opts,
1069 "clockid", "clockid to use for events, see clock_gettime()",
1070 parse_clockid),
0e9b20b8
IM
1071 OPT_END()
1072};
1073
e5b2c207
NK
1074struct option *record_options = __record_options;
1075
1d037ca1 1076int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
0e9b20b8 1077{
ef149c25 1078 int err;
8c6f45a7 1079 struct record *rec = &record;
16ad2ffb 1080 char errbuf[BUFSIZ];
0e9b20b8 1081
3e2be2da
ACM
1082 rec->evlist = perf_evlist__new();
1083 if (rec->evlist == NULL)
361c99a6
ACM
1084 return -ENOMEM;
1085
eb853e80
JO
1086 perf_config(perf_record_config, rec);
1087
bca647aa 1088 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 1089 PARSE_OPT_STOP_AT_NON_OPTION);
602ad878 1090 if (!argc && target__none(&rec->opts.target))
bca647aa 1091 usage_with_options(record_usage, record_options);
0e9b20b8 1092
bea03405 1093 if (nr_cgroups && !rec->opts.target.system_wide) {
3780f488
NK
1094 ui__error("cgroup monitoring only available in"
1095 " system-wide mode\n");
023695d9
SE
1096 usage_with_options(record_usage, record_options);
1097 }
1098
ef149c25
AH
1099 if (!rec->itr) {
1100 rec->itr = auxtrace_record__init(rec->evlist, &err);
1101 if (err)
1102 return err;
1103 }
1104
1105 err = -ENOMEM;
1106
0a7e6d1b 1107 symbol__init(NULL);
baa2f6ce 1108
ec80fde7 1109 if (symbol_conf.kptr_restrict)
646aaea6
ACM
1110 pr_warning(
1111"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1112"check /proc/sys/kernel/kptr_restrict.\n\n"
1113"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1114"file is not found in the buildid cache or in the vmlinux path.\n\n"
1115"Samples in kernel modules won't be resolved at all.\n\n"
1116"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1117"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 1118
d20deb64 1119 if (rec->no_buildid_cache || rec->no_buildid)
a1ac1d3c 1120 disable_buildid_cache();
655000e7 1121
3e2be2da
ACM
1122 if (rec->evlist->nr_entries == 0 &&
1123 perf_evlist__add_default(rec->evlist) < 0) {
69aad6f1
ACM
1124 pr_err("Not enough memory for event selector list\n");
1125 goto out_symbol_exit;
bbd36e5e 1126 }
0e9b20b8 1127
69e7e5b0
AH
1128 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1129 rec->opts.no_inherit = true;
1130
602ad878 1131 err = target__validate(&rec->opts.target);
16ad2ffb 1132 if (err) {
602ad878 1133 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
16ad2ffb
NK
1134 ui__warning("%s", errbuf);
1135 }
1136
602ad878 1137 err = target__parse_uid(&rec->opts.target);
16ad2ffb
NK
1138 if (err) {
1139 int saved_errno = errno;
4bd0f2d2 1140
602ad878 1141 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 1142 ui__error("%s", errbuf);
16ad2ffb
NK
1143
1144 err = -saved_errno;
8fa60e1f 1145 goto out_symbol_exit;
16ad2ffb 1146 }
0d37aa34 1147
16ad2ffb 1148 err = -ENOMEM;
3e2be2da 1149 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
dd7927f4 1150 usage_with_options(record_usage, record_options);
69aad6f1 1151
ef149c25
AH
1152 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1153 if (err)
1154 goto out_symbol_exit;
1155
b4006796 1156 if (record_opts__config(&rec->opts)) {
39d17dac 1157 err = -EINVAL;
03ad9747 1158 goto out_symbol_exit;
7e4ff9e3
MG
1159 }
1160
d20deb64 1161 err = __cmd_record(&record, argc, argv);
d65a458b 1162out_symbol_exit:
45604710 1163 perf_evlist__delete(rec->evlist);
d65a458b 1164 symbol__exit();
ef149c25 1165 auxtrace_record__free(rec->itr);
39d17dac 1166 return err;
0e9b20b8 1167}