perf evlist: Introduce side band thread
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
abaff32a 2/*
bf9e1876
IM
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
abaff32a 8 */
16f762a2 9#include "builtin.h"
bf9e1876
IM
10
11#include "perf.h"
12
6122e4e4 13#include "util/build-id.h"
6eda5838 14#include "util/util.h"
4b6ab94e 15#include <subcmd/parse-options.h>
8ad8db37 16#include "util/parse-events.h"
41840d21 17#include "util/config.h"
6eda5838 18
8f651eae 19#include "util/callchain.h"
f14d5707 20#include "util/cgroup.h"
7c6a1c65 21#include "util/header.h"
66e274f3 22#include "util/event.h"
361c99a6 23#include "util/evlist.h"
69aad6f1 24#include "util/evsel.h"
8f28827a 25#include "util/debug.h"
94c744b6 26#include "util/session.h"
45694aa7 27#include "util/tool.h"
8d06367f 28#include "util/symbol.h"
a12b51c4 29#include "util/cpumap.h"
fd78260b 30#include "util/thread_map.h"
f5fc1412 31#include "util/data.h"
bcc84ec6 32#include "util/perf_regs.h"
ef149c25 33#include "util/auxtrace.h"
46bc29b9 34#include "util/tsc.h"
f00898f4 35#include "util/parse-branch-options.h"
bcc84ec6 36#include "util/parse-regs-options.h"
71dc2326 37#include "util/llvm-utils.h"
8690a2a7 38#include "util/bpf-loader.h"
5f9cf599 39#include "util/trigger.h"
a074865e 40#include "util/perf-hooks.h"
f13de660 41#include "util/cpu-set-sched.h"
c5e4027e 42#include "util/time-utils.h"
58db1d6e 43#include "util/units.h"
7b612e29 44#include "util/bpf-event.h"
d8871ea7 45#include "asm/bug.h"
7c6a1c65 46
a43783ae 47#include <errno.h>
fd20e811 48#include <inttypes.h>
67230479 49#include <locale.h>
4208735d 50#include <poll.h>
97124d5e 51#include <unistd.h>
de9ac07b 52#include <sched.h>
9607ad3a 53#include <signal.h>
a41794cd 54#include <sys/mman.h>
4208735d 55#include <sys/wait.h>
0693e680 56#include <linux/time64.h>
78da39fa 57
1b43b704 58struct switch_output {
dc0c6127 59 bool enabled;
1b43b704 60 bool signal;
dc0c6127 61 unsigned long size;
bfacbe3b 62 unsigned long time;
cb4e1ebb
JO
63 const char *str;
64 bool set;
03724b2e
AK
65 char **filenames;
66 int num_files;
67 int cur_file;
1b43b704
JO
68};
69
8c6f45a7 70struct record {
45694aa7 71 struct perf_tool tool;
b4006796 72 struct record_opts opts;
d20deb64 73 u64 bytes_written;
8ceb41d7 74 struct perf_data data;
ef149c25 75 struct auxtrace_record *itr;
d20deb64
ACM
76 struct perf_evlist *evlist;
77 struct perf_session *session;
d20deb64 78 int realtime_prio;
d20deb64 79 bool no_buildid;
d2db9a98 80 bool no_buildid_set;
d20deb64 81 bool no_buildid_cache;
d2db9a98 82 bool no_buildid_cache_set;
6156681b 83 bool buildid_all;
ecfd7a9c 84 bool timestamp_filename;
68588baf 85 bool timestamp_boundary;
1b43b704 86 struct switch_output switch_output;
9f065194 87 unsigned long long samples;
9d2ed645 88 cpu_set_t affinity_mask;
0f82ebc4 89};
a21ca2ca 90
dc0c6127
JO
91static volatile int auxtrace_record__snapshot_started;
92static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93static DEFINE_TRIGGER(switch_output_trigger);
94
9d2ed645
AB
95static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96 "SYS", "NODE", "CPU"
97};
98
dc0c6127
JO
99static bool switch_output_signal(struct record *rec)
100{
101 return rec->switch_output.signal &&
102 trigger_is_ready(&switch_output_trigger);
103}
104
105static bool switch_output_size(struct record *rec)
106{
107 return rec->switch_output.size &&
108 trigger_is_ready(&switch_output_trigger) &&
109 (rec->bytes_written >= rec->switch_output.size);
110}
111
bfacbe3b
JO
112static bool switch_output_time(struct record *rec)
113{
114 return rec->switch_output.time &&
115 trigger_is_ready(&switch_output_trigger);
116}
117
ded2b8fe
JO
118static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119 void *bf, size_t size)
f5970550 120{
ded2b8fe
JO
121 struct perf_data_file *file = &rec->session->data->file;
122
123 if (perf_data_file__write(file, bf, size) < 0) {
50a9b868
JO
124 pr_err("failed to write perf data, error: %m\n");
125 return -1;
f5970550 126 }
8d3eca20 127
cf8b2e69 128 rec->bytes_written += size;
dc0c6127
JO
129
130 if (switch_output_size(rec))
131 trigger_hit(&switch_output_trigger);
132
8d3eca20 133 return 0;
f5970550
PZ
134}
135
d3d1af6f
AB
136#ifdef HAVE_AIO_SUPPORT
137static int record__aio_write(struct aiocb *cblock, int trace_fd,
138 void *buf, size_t size, off_t off)
139{
140 int rc;
141
142 cblock->aio_fildes = trace_fd;
143 cblock->aio_buf = buf;
144 cblock->aio_nbytes = size;
145 cblock->aio_offset = off;
146 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
147
148 do {
149 rc = aio_write(cblock);
150 if (rc == 0) {
151 break;
152 } else if (errno != EAGAIN) {
153 cblock->aio_fildes = -1;
154 pr_err("failed to queue perf data, error: %m\n");
155 break;
156 }
157 } while (1);
158
159 return rc;
160}
161
162static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
163{
164 void *rem_buf;
165 off_t rem_off;
166 size_t rem_size;
167 int rc, aio_errno;
168 ssize_t aio_ret, written;
169
170 aio_errno = aio_error(cblock);
171 if (aio_errno == EINPROGRESS)
172 return 0;
173
174 written = aio_ret = aio_return(cblock);
175 if (aio_ret < 0) {
176 if (aio_errno != EINTR)
177 pr_err("failed to write perf data, error: %m\n");
178 written = 0;
179 }
180
181 rem_size = cblock->aio_nbytes - written;
182
183 if (rem_size == 0) {
184 cblock->aio_fildes = -1;
185 /*
186 * md->refcount is incremented in perf_mmap__push() for
187 * every enqueued aio write request so decrement it because
188 * the request is now complete.
189 */
190 perf_mmap__put(md);
191 rc = 1;
192 } else {
193 /*
194 * aio write request may require restart with the
195 * reminder if the kernel didn't write whole
196 * chunk at once.
197 */
198 rem_off = cblock->aio_offset + written;
199 rem_buf = (void *)(cblock->aio_buf + written);
200 record__aio_write(cblock, cblock->aio_fildes,
201 rem_buf, rem_size, rem_off);
202 rc = 0;
203 }
204
205 return rc;
206}
207
93f20c0f 208static int record__aio_sync(struct perf_mmap *md, bool sync_all)
d3d1af6f 209{
93f20c0f
AB
210 struct aiocb **aiocb = md->aio.aiocb;
211 struct aiocb *cblocks = md->aio.cblocks;
d3d1af6f 212 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
93f20c0f 213 int i, do_suspend;
d3d1af6f
AB
214
215 do {
93f20c0f
AB
216 do_suspend = 0;
217 for (i = 0; i < md->aio.nr_cblocks; ++i) {
218 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
219 if (sync_all)
220 aiocb[i] = NULL;
221 else
222 return i;
223 } else {
224 /*
225 * Started aio write is not complete yet
226 * so it has to be waited before the
227 * next allocation.
228 */
229 aiocb[i] = &cblocks[i];
230 do_suspend = 1;
231 }
232 }
233 if (!do_suspend)
234 return -1;
d3d1af6f 235
93f20c0f 236 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
d3d1af6f
AB
237 if (!(errno == EAGAIN || errno == EINTR))
238 pr_err("failed to sync perf data, error: %m\n");
239 }
240 } while (1);
241}
242
243static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
244{
245 struct record *rec = to;
246 int ret, trace_fd = rec->session->data->file.fd;
247
248 rec->samples++;
249
250 ret = record__aio_write(cblock, trace_fd, bf, size, off);
251 if (!ret) {
252 rec->bytes_written += size;
253 if (switch_output_size(rec))
254 trigger_hit(&switch_output_trigger);
255 }
256
257 return ret;
258}
259
260static off_t record__aio_get_pos(int trace_fd)
261{
262 return lseek(trace_fd, 0, SEEK_CUR);
263}
264
265static void record__aio_set_pos(int trace_fd, off_t pos)
266{
267 lseek(trace_fd, pos, SEEK_SET);
268}
269
270static void record__aio_mmap_read_sync(struct record *rec)
271{
272 int i;
273 struct perf_evlist *evlist = rec->evlist;
274 struct perf_mmap *maps = evlist->mmap;
275
276 if (!rec->opts.nr_cblocks)
277 return;
278
279 for (i = 0; i < evlist->nr_mmaps; i++) {
280 struct perf_mmap *map = &maps[i];
281
282 if (map->base)
93f20c0f 283 record__aio_sync(map, true);
d3d1af6f
AB
284 }
285}
286
287static int nr_cblocks_default = 1;
93f20c0f 288static int nr_cblocks_max = 4;
d3d1af6f
AB
289
290static int record__aio_parse(const struct option *opt,
93f20c0f 291 const char *str,
d3d1af6f
AB
292 int unset)
293{
294 struct record_opts *opts = (struct record_opts *)opt->value;
295
93f20c0f 296 if (unset) {
d3d1af6f 297 opts->nr_cblocks = 0;
93f20c0f
AB
298 } else {
299 if (str)
300 opts->nr_cblocks = strtol(str, NULL, 0);
301 if (!opts->nr_cblocks)
302 opts->nr_cblocks = nr_cblocks_default;
303 }
d3d1af6f
AB
304
305 return 0;
306}
307#else /* HAVE_AIO_SUPPORT */
93f20c0f
AB
308static int nr_cblocks_max = 0;
309
310static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
d3d1af6f 311{
93f20c0f 312 return -1;
d3d1af6f
AB
313}
314
315static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
317{
318 return -1;
319}
320
321static off_t record__aio_get_pos(int trace_fd __maybe_unused)
322{
323 return -1;
324}
325
326static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
327{
328}
329
330static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
331{
332}
333#endif
334
335static int record__aio_enabled(struct record *rec)
336{
337 return rec->opts.nr_cblocks > 0;
338}
339
45694aa7 340static int process_synthesized_event(struct perf_tool *tool,
d20deb64 341 union perf_event *event,
1d037ca1
IT
342 struct perf_sample *sample __maybe_unused,
343 struct machine *machine __maybe_unused)
234fbbf5 344{
8c6f45a7 345 struct record *rec = container_of(tool, struct record, tool);
ded2b8fe 346 return record__write(rec, NULL, event, event->header.size);
234fbbf5
ACM
347}
348
ded2b8fe 349static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
d37f1586
ACM
350{
351 struct record *rec = to;
352
353 rec->samples++;
ded2b8fe 354 return record__write(rec, map, bf, size);
d37f1586
ACM
355}
356
2dd6d8a1
AH
357static volatile int done;
358static volatile int signr = -1;
359static volatile int child_finished;
c0bdc1c4 360
2dd6d8a1
AH
361static void sig_handler(int sig)
362{
363 if (sig == SIGCHLD)
364 child_finished = 1;
365 else
366 signr = sig;
367
368 done = 1;
369}
370
a074865e
WN
371static void sigsegv_handler(int sig)
372{
373 perf_hooks__recover();
374 sighandler_dump_stack(sig);
375}
376
2dd6d8a1
AH
377static void record__sig_exit(void)
378{
379 if (signr == -1)
380 return;
381
382 signal(signr, SIG_DFL);
383 raise(signr);
384}
385
e31f0d01
AH
386#ifdef HAVE_AUXTRACE_SUPPORT
387
ef149c25 388static int record__process_auxtrace(struct perf_tool *tool,
ded2b8fe 389 struct perf_mmap *map,
ef149c25
AH
390 union perf_event *event, void *data1,
391 size_t len1, void *data2, size_t len2)
392{
393 struct record *rec = container_of(tool, struct record, tool);
8ceb41d7 394 struct perf_data *data = &rec->data;
ef149c25
AH
395 size_t padding;
396 u8 pad[8] = {0};
397
cd3dd8dd 398 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
99fa2984 399 off_t file_offset;
8ceb41d7 400 int fd = perf_data__fd(data);
99fa2984
AH
401 int err;
402
403 file_offset = lseek(fd, 0, SEEK_CUR);
404 if (file_offset == -1)
405 return -1;
406 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
407 event, file_offset);
408 if (err)
409 return err;
410 }
411
ef149c25
AH
412 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
413 padding = (len1 + len2) & 7;
414 if (padding)
415 padding = 8 - padding;
416
ded2b8fe
JO
417 record__write(rec, map, event, event->header.size);
418 record__write(rec, map, data1, len1);
ef149c25 419 if (len2)
ded2b8fe
JO
420 record__write(rec, map, data2, len2);
421 record__write(rec, map, &pad, padding);
ef149c25
AH
422
423 return 0;
424}
425
426static int record__auxtrace_mmap_read(struct record *rec,
e035f4ca 427 struct perf_mmap *map)
ef149c25
AH
428{
429 int ret;
430
e035f4ca 431 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
ef149c25
AH
432 record__process_auxtrace);
433 if (ret < 0)
434 return ret;
435
436 if (ret)
437 rec->samples++;
438
439 return 0;
440}
441
2dd6d8a1 442static int record__auxtrace_mmap_read_snapshot(struct record *rec,
e035f4ca 443 struct perf_mmap *map)
2dd6d8a1
AH
444{
445 int ret;
446
e035f4ca 447 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
2dd6d8a1
AH
448 record__process_auxtrace,
449 rec->opts.auxtrace_snapshot_size);
450 if (ret < 0)
451 return ret;
452
453 if (ret)
454 rec->samples++;
455
456 return 0;
457}
458
459static int record__auxtrace_read_snapshot_all(struct record *rec)
460{
461 int i;
462 int rc = 0;
463
464 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
e035f4ca 465 struct perf_mmap *map = &rec->evlist->mmap[i];
2dd6d8a1 466
e035f4ca 467 if (!map->auxtrace_mmap.base)
2dd6d8a1
AH
468 continue;
469
e035f4ca 470 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
2dd6d8a1
AH
471 rc = -1;
472 goto out;
473 }
474 }
475out:
476 return rc;
477}
478
479static void record__read_auxtrace_snapshot(struct record *rec)
480{
481 pr_debug("Recording AUX area tracing snapshot\n");
482 if (record__auxtrace_read_snapshot_all(rec) < 0) {
5f9cf599 483 trigger_error(&auxtrace_snapshot_trigger);
2dd6d8a1 484 } else {
5f9cf599
WN
485 if (auxtrace_record__snapshot_finish(rec->itr))
486 trigger_error(&auxtrace_snapshot_trigger);
487 else
488 trigger_ready(&auxtrace_snapshot_trigger);
2dd6d8a1
AH
489 }
490}
491
4b5ea3bd
AH
492static int record__auxtrace_init(struct record *rec)
493{
494 int err;
495
496 if (!rec->itr) {
497 rec->itr = auxtrace_record__init(rec->evlist, &err);
498 if (err)
499 return err;
500 }
501
502 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
503 rec->opts.auxtrace_snapshot_opts);
504 if (err)
505 return err;
506
507 return auxtrace_parse_filters(rec->evlist);
508}
509
e31f0d01
AH
510#else
511
512static inline
513int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
e035f4ca 514 struct perf_mmap *map __maybe_unused)
e31f0d01
AH
515{
516 return 0;
517}
518
2dd6d8a1
AH
519static inline
520void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
de9ac07b 521{
f7b7c26e
PZ
522}
523
2dd6d8a1
AH
524static inline
525int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
f7b7c26e 526{
2dd6d8a1 527 return 0;
de9ac07b
PZ
528}
529
4b5ea3bd
AH
530static int record__auxtrace_init(struct record *rec __maybe_unused)
531{
532 return 0;
533}
534
2dd6d8a1
AH
535#endif
536
cda57a8c
WN
537static int record__mmap_evlist(struct record *rec,
538 struct perf_evlist *evlist)
539{
540 struct record_opts *opts = &rec->opts;
541 char msg[512];
542
f13de660
AB
543 if (opts->affinity != PERF_AFFINITY_SYS)
544 cpu__setup_cpunode_map();
545
7a276ff6 546 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
cda57a8c 547 opts->auxtrace_mmap_pages,
9d2ed645
AB
548 opts->auxtrace_snapshot_mode,
549 opts->nr_cblocks, opts->affinity) < 0) {
cda57a8c
WN
550 if (errno == EPERM) {
551 pr_err("Permission error mapping pages.\n"
552 "Consider increasing "
553 "/proc/sys/kernel/perf_event_mlock_kb,\n"
554 "or try again with a smaller value of -m/--mmap_pages.\n"
555 "(current value: %u,%u)\n",
556 opts->mmap_pages, opts->auxtrace_mmap_pages);
557 return -errno;
558 } else {
559 pr_err("failed to mmap with %d (%s)\n", errno,
c8b5f2c9 560 str_error_r(errno, msg, sizeof(msg)));
cda57a8c
WN
561 if (errno)
562 return -errno;
563 else
564 return -EINVAL;
565 }
566 }
567 return 0;
568}
569
570static int record__mmap(struct record *rec)
571{
572 return record__mmap_evlist(rec, rec->evlist);
573}
574
8c6f45a7 575static int record__open(struct record *rec)
dd7927f4 576{
d6195a6a 577 char msg[BUFSIZ];
6a4bb04c 578 struct perf_evsel *pos;
d20deb64
ACM
579 struct perf_evlist *evlist = rec->evlist;
580 struct perf_session *session = rec->session;
b4006796 581 struct record_opts *opts = &rec->opts;
8d3eca20 582 int rc = 0;
dd7927f4 583
d3dbf43c
ACM
584 /*
585 * For initial_delay we need to add a dummy event so that we can track
586 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
587 * real events, the ones asked by the user.
588 */
589 if (opts->initial_delay) {
590 if (perf_evlist__add_dummy(evlist))
591 return -ENOMEM;
592
593 pos = perf_evlist__first(evlist);
594 pos->tracking = 0;
595 pos = perf_evlist__last(evlist);
596 pos->tracking = 1;
597 pos->attr.enable_on_exec = 1;
598 }
599
e68ae9cf 600 perf_evlist__config(evlist, opts, &callchain_param);
cac21425 601
e5cadb93 602 evlist__for_each_entry(evlist, pos) {
dd7927f4 603try_again:
d988d5ee 604 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
56e52e85 605 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
bb963e16 606 if (verbose > 0)
c0a54341 607 ui__warning("%s\n", msg);
d6d901c2
ZY
608 goto try_again;
609 }
cf99ad14
AK
610 if ((errno == EINVAL || errno == EBADF) &&
611 pos->leader != pos &&
612 pos->weak_group) {
613 pos = perf_evlist__reset_weak_group(evlist, pos);
614 goto try_again;
615 }
56e52e85
ACM
616 rc = -errno;
617 perf_evsel__open_strerror(pos, &opts->target,
618 errno, msg, sizeof(msg));
619 ui__error("%s\n", msg);
8d3eca20 620 goto out;
c171b552 621 }
bfd8f72c
AK
622
623 pos->supported = true;
c171b552 624 }
a43d3f08 625
23d4aad4 626 if (perf_evlist__apply_filters(evlist, &pos)) {
62d94b00 627 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
23d4aad4 628 pos->filter, perf_evsel__name(pos), errno,
c8b5f2c9 629 str_error_r(errno, msg, sizeof(msg)));
8d3eca20 630 rc = -1;
5d8bb1ec
MP
631 goto out;
632 }
633
cda57a8c
WN
634 rc = record__mmap(rec);
635 if (rc)
8d3eca20 636 goto out;
0a27d7f9 637
563aecb2 638 session->evlist = evlist;
7b56cce2 639 perf_session__set_id_hdr_size(session);
8d3eca20
DA
640out:
641 return rc;
16c8a109
PZ
642}
643
e3d59112
NK
644static int process_sample_event(struct perf_tool *tool,
645 union perf_event *event,
646 struct perf_sample *sample,
647 struct perf_evsel *evsel,
648 struct machine *machine)
649{
650 struct record *rec = container_of(tool, struct record, tool);
651
68588baf
JY
652 if (rec->evlist->first_sample_time == 0)
653 rec->evlist->first_sample_time = sample->time;
654
655 rec->evlist->last_sample_time = sample->time;
e3d59112 656
68588baf
JY
657 if (rec->buildid_all)
658 return 0;
659
660 rec->samples++;
e3d59112
NK
661 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
662}
663
8c6f45a7 664static int process_buildids(struct record *rec)
6122e4e4 665{
f5fc1412 666 struct perf_session *session = rec->session;
6122e4e4 667
45112e89 668 if (perf_data__size(&rec->data) == 0)
9f591fd7
ACM
669 return 0;
670
00dc8657
NK
671 /*
672 * During this process, it'll load kernel map and replace the
673 * dso->long_name to a real pathname it found. In this case
674 * we prefer the vmlinux path like
675 * /lib/modules/3.16.4/build/vmlinux
676 *
677 * rather than build-id path (in debug directory).
678 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
679 */
680 symbol_conf.ignore_vmlinux_buildid = true;
681
6156681b
NK
682 /*
683 * If --buildid-all is given, it marks all DSO regardless of hits,
68588baf
JY
684 * so no need to process samples. But if timestamp_boundary is enabled,
685 * it still needs to walk on all samples to get the timestamps of
686 * first/last samples.
6156681b 687 */
68588baf 688 if (rec->buildid_all && !rec->timestamp_boundary)
6156681b
NK
689 rec->tool.sample = NULL;
690
b7b61cbe 691 return perf_session__process_events(session);
6122e4e4
ACM
692}
693
8115d60c 694static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
695{
696 int err;
45694aa7 697 struct perf_tool *tool = data;
a1645ce1
ZY
698 /*
699 *As for guest kernel when processing subcommand record&report,
700 *we arrange module mmap prior to guest kernel mmap and trigger
701 *a preload dso because default guest module symbols are loaded
702 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
703 *method is used to avoid symbol missing when the first addr is
704 *in module instead of in guest kernel.
705 */
45694aa7 706 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 707 machine);
a1645ce1
ZY
708 if (err < 0)
709 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 710 " relocation symbol.\n", machine->pid);
a1645ce1 711
a1645ce1
ZY
712 /*
713 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
714 * have no _text sometimes.
715 */
45694aa7 716 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 717 machine);
a1645ce1
ZY
718 if (err < 0)
719 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 720 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
721}
722
98402807
FW
723static struct perf_event_header finished_round_event = {
724 .size = sizeof(struct perf_event_header),
725 .type = PERF_RECORD_FINISHED_ROUND,
726};
727
f13de660
AB
728static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
729{
730 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
731 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
732 CPU_ZERO(&rec->affinity_mask);
733 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
734 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
735 }
736}
737
a4ea0ec4 738static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
0b72d69a 739 bool overwrite)
98402807 740{
dcabb507 741 u64 bytes_written = rec->bytes_written;
0e2e63dd 742 int i;
8d3eca20 743 int rc = 0;
a4ea0ec4 744 struct perf_mmap *maps;
d3d1af6f
AB
745 int trace_fd = rec->data.file.fd;
746 off_t off;
98402807 747
cb21686b
WN
748 if (!evlist)
749 return 0;
ef149c25 750
0b72d69a 751 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
a4ea0ec4
WN
752 if (!maps)
753 return 0;
754
0b72d69a 755 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
54cc54de
WN
756 return 0;
757
d3d1af6f
AB
758 if (record__aio_enabled(rec))
759 off = record__aio_get_pos(trace_fd);
760
cb21686b 761 for (i = 0; i < evlist->nr_mmaps; i++) {
e035f4ca 762 struct perf_mmap *map = &maps[i];
cb21686b 763
e035f4ca 764 if (map->base) {
f13de660 765 record__adjust_affinity(rec, map);
d3d1af6f
AB
766 if (!record__aio_enabled(rec)) {
767 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
768 rc = -1;
769 goto out;
770 }
771 } else {
93f20c0f 772 int idx;
d3d1af6f
AB
773 /*
774 * Call record__aio_sync() to wait till map->data buffer
775 * becomes available after previous aio write request.
776 */
93f20c0f
AB
777 idx = record__aio_sync(map, false);
778 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
d3d1af6f
AB
779 record__aio_set_pos(trace_fd, off);
780 rc = -1;
781 goto out;
782 }
8d3eca20
DA
783 }
784 }
ef149c25 785
e035f4ca
JO
786 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
787 record__auxtrace_mmap_read(rec, map) != 0) {
ef149c25
AH
788 rc = -1;
789 goto out;
790 }
98402807
FW
791 }
792
d3d1af6f
AB
793 if (record__aio_enabled(rec))
794 record__aio_set_pos(trace_fd, off);
795
dcabb507
JO
796 /*
797 * Mark the round finished in case we wrote
798 * at least one event.
799 */
800 if (bytes_written != rec->bytes_written)
ded2b8fe 801 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
8d3eca20 802
0b72d69a 803 if (overwrite)
54cc54de 804 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
8d3eca20
DA
805out:
806 return rc;
98402807
FW
807}
808
cb21686b
WN
809static int record__mmap_read_all(struct record *rec)
810{
811 int err;
812
a4ea0ec4 813 err = record__mmap_read_evlist(rec, rec->evlist, false);
cb21686b
WN
814 if (err)
815 return err;
816
05737464 817 return record__mmap_read_evlist(rec, rec->evlist, true);
cb21686b
WN
818}
819
8c6f45a7 820static void record__init_features(struct record *rec)
57706abc 821{
57706abc
DA
822 struct perf_session *session = rec->session;
823 int feat;
824
825 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
826 perf_header__set_feat(&session->header, feat);
827
828 if (rec->no_buildid)
829 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
830
3e2be2da 831 if (!have_tracepoints(&rec->evlist->entries))
57706abc
DA
832 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
833
834 if (!rec->opts.branch_stack)
835 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
ef149c25
AH
836
837 if (!rec->opts.full_auxtrace)
838 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
ffa517ad 839
cf790516
AB
840 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
841 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
842
258031c0
JO
843 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
844
ffa517ad 845 perf_header__clear_feat(&session->header, HEADER_STAT);
57706abc
DA
846}
847
e1ab48ba
WN
848static void
849record__finish_output(struct record *rec)
850{
8ceb41d7
JO
851 struct perf_data *data = &rec->data;
852 int fd = perf_data__fd(data);
e1ab48ba 853
8ceb41d7 854 if (data->is_pipe)
e1ab48ba
WN
855 return;
856
857 rec->session->header.data_size += rec->bytes_written;
45112e89 858 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
e1ab48ba
WN
859
860 if (!rec->no_buildid) {
861 process_buildids(rec);
862
863 if (rec->buildid_all)
864 dsos__hit_all(rec->session);
865 }
866 perf_session__write_header(rec->session, rec->evlist, fd, true);
867
868 return;
869}
870
4ea648ae 871static int record__synthesize_workload(struct record *rec, bool tail)
be7b0c9e 872{
9d6aae72
ACM
873 int err;
874 struct thread_map *thread_map;
be7b0c9e 875
4ea648ae
WN
876 if (rec->opts.tail_synthesize != tail)
877 return 0;
878
9d6aae72
ACM
879 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
880 if (thread_map == NULL)
881 return -1;
882
883 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
be7b0c9e
WN
884 process_synthesized_event,
885 &rec->session->machines.host,
3fcb10e4 886 rec->opts.sample_address);
9d6aae72
ACM
887 thread_map__put(thread_map);
888 return err;
be7b0c9e
WN
889}
890
4ea648ae 891static int record__synthesize(struct record *rec, bool tail);
3c1cb7e3 892
ecfd7a9c
WN
893static int
894record__switch_output(struct record *rec, bool at_exit)
895{
8ceb41d7 896 struct perf_data *data = &rec->data;
ecfd7a9c 897 int fd, err;
03724b2e 898 char *new_filename;
ecfd7a9c
WN
899
900 /* Same Size: "2015122520103046"*/
901 char timestamp[] = "InvalidTimestamp";
902
d3d1af6f
AB
903 record__aio_mmap_read_sync(rec);
904
4ea648ae
WN
905 record__synthesize(rec, true);
906 if (target__none(&rec->opts.target))
907 record__synthesize_workload(rec, true);
908
ecfd7a9c
WN
909 rec->samples = 0;
910 record__finish_output(rec);
911 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
912 if (err) {
913 pr_err("Failed to get current timestamp\n");
914 return -EINVAL;
915 }
916
8ceb41d7 917 fd = perf_data__switch(data, timestamp,
ecfd7a9c 918 rec->session->header.data_offset,
03724b2e 919 at_exit, &new_filename);
ecfd7a9c
WN
920 if (fd >= 0 && !at_exit) {
921 rec->bytes_written = 0;
922 rec->session->header.data_size = 0;
923 }
924
925 if (!quiet)
926 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
2d4f2799 927 data->path, timestamp);
3c1cb7e3 928
03724b2e
AK
929 if (rec->switch_output.num_files) {
930 int n = rec->switch_output.cur_file + 1;
931
932 if (n >= rec->switch_output.num_files)
933 n = 0;
934 rec->switch_output.cur_file = n;
935 if (rec->switch_output.filenames[n]) {
936 remove(rec->switch_output.filenames[n]);
937 free(rec->switch_output.filenames[n]);
938 }
939 rec->switch_output.filenames[n] = new_filename;
940 } else {
941 free(new_filename);
942 }
943
3c1cb7e3 944 /* Output tracking events */
be7b0c9e 945 if (!at_exit) {
4ea648ae 946 record__synthesize(rec, false);
3c1cb7e3 947
be7b0c9e
WN
948 /*
949 * In 'perf record --switch-output' without -a,
950 * record__synthesize() in record__switch_output() won't
951 * generate tracking events because there's no thread_map
952 * in evlist. Which causes newly created perf.data doesn't
953 * contain map and comm information.
954 * Create a fake thread_map and directly call
955 * perf_event__synthesize_thread_map() for those events.
956 */
957 if (target__none(&rec->opts.target))
4ea648ae 958 record__synthesize_workload(rec, false);
be7b0c9e 959 }
ecfd7a9c
WN
960 return fd;
961}
962
f33cbe72
ACM
963static volatile int workload_exec_errno;
964
965/*
966 * perf_evlist__prepare_workload will send a SIGUSR1
967 * if the fork fails, since we asked by setting its
968 * want_signal to true.
969 */
45604710
NK
970static void workload_exec_failed_signal(int signo __maybe_unused,
971 siginfo_t *info,
f33cbe72
ACM
972 void *ucontext __maybe_unused)
973{
974 workload_exec_errno = info->si_value.sival_int;
975 done = 1;
f33cbe72
ACM
976 child_finished = 1;
977}
978
2dd6d8a1 979static void snapshot_sig_handler(int sig);
bfacbe3b 980static void alarm_sig_handler(int sig);
2dd6d8a1 981
46bc29b9
AH
982int __weak
983perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
984 struct perf_tool *tool __maybe_unused,
985 perf_event__handler_t process __maybe_unused,
986 struct machine *machine __maybe_unused)
987{
988 return 0;
989}
990
ee667f94
WN
991static const struct perf_event_mmap_page *
992perf_evlist__pick_pc(struct perf_evlist *evlist)
993{
b2cb615d
WN
994 if (evlist) {
995 if (evlist->mmap && evlist->mmap[0].base)
996 return evlist->mmap[0].base;
0b72d69a
WN
997 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
998 return evlist->overwrite_mmap[0].base;
b2cb615d 999 }
ee667f94
WN
1000 return NULL;
1001}
1002
c45628b0
WN
1003static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1004{
ee667f94
WN
1005 const struct perf_event_mmap_page *pc;
1006
1007 pc = perf_evlist__pick_pc(rec->evlist);
1008 if (pc)
1009 return pc;
c45628b0
WN
1010 return NULL;
1011}
1012
4ea648ae 1013static int record__synthesize(struct record *rec, bool tail)
c45c86eb
WN
1014{
1015 struct perf_session *session = rec->session;
1016 struct machine *machine = &session->machines.host;
8ceb41d7 1017 struct perf_data *data = &rec->data;
c45c86eb
WN
1018 struct record_opts *opts = &rec->opts;
1019 struct perf_tool *tool = &rec->tool;
8ceb41d7 1020 int fd = perf_data__fd(data);
c45c86eb
WN
1021 int err = 0;
1022
4ea648ae
WN
1023 if (rec->opts.tail_synthesize != tail)
1024 return 0;
1025
8ceb41d7 1026 if (data->is_pipe) {
a2015516
JO
1027 /*
1028 * We need to synthesize events first, because some
1029 * features works on top of them (on report side).
1030 */
318ec184 1031 err = perf_event__synthesize_attrs(tool, rec->evlist,
c45c86eb
WN
1032 process_synthesized_event);
1033 if (err < 0) {
1034 pr_err("Couldn't synthesize attrs.\n");
1035 goto out;
1036 }
1037
a2015516
JO
1038 err = perf_event__synthesize_features(tool, session, rec->evlist,
1039 process_synthesized_event);
1040 if (err < 0) {
1041 pr_err("Couldn't synthesize features.\n");
1042 return err;
1043 }
1044
c45c86eb
WN
1045 if (have_tracepoints(&rec->evlist->entries)) {
1046 /*
1047 * FIXME err <= 0 here actually means that
1048 * there were no tracepoints so its not really
1049 * an error, just that we don't need to
1050 * synthesize anything. We really have to
1051 * return this more properly and also
1052 * propagate errors that now are calling die()
1053 */
1054 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1055 process_synthesized_event);
1056 if (err <= 0) {
1057 pr_err("Couldn't record tracing data.\n");
1058 goto out;
1059 }
1060 rec->bytes_written += err;
1061 }
1062 }
1063
c45628b0 1064 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
46bc29b9
AH
1065 process_synthesized_event, machine);
1066 if (err)
1067 goto out;
1068
c45c86eb
WN
1069 if (rec->opts.full_auxtrace) {
1070 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1071 session, process_synthesized_event);
1072 if (err)
1073 goto out;
1074 }
1075
6c443954
ACM
1076 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1077 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1078 machine);
1079 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1080 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1081 "Check /proc/kallsyms permission or run as root.\n");
1082
1083 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1084 machine);
1085 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1086 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1087 "Check /proc/modules permission or run as root.\n");
1088 }
c45c86eb
WN
1089
1090 if (perf_guest) {
1091 machines__process_guests(&session->machines,
1092 perf_event__synthesize_guest_os, tool);
1093 }
1094
bfd8f72c
AK
1095 err = perf_event__synthesize_extra_attr(&rec->tool,
1096 rec->evlist,
1097 process_synthesized_event,
1098 data->is_pipe);
1099 if (err)
1100 goto out;
1101
373565d2
AK
1102 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1103 process_synthesized_event,
1104 NULL);
1105 if (err < 0) {
1106 pr_err("Couldn't synthesize thread map.\n");
1107 return err;
1108 }
1109
1110 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1111 process_synthesized_event, NULL);
1112 if (err < 0) {
1113 pr_err("Couldn't synthesize cpu map.\n");
1114 return err;
1115 }
1116
e5416950 1117 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
7b612e29
SL
1118 machine, opts);
1119 if (err < 0)
1120 pr_warning("Couldn't synthesize bpf events.\n");
1121
c45c86eb
WN
1122 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1123 process_synthesized_event, opts->sample_address,
3fcb10e4 1124 1);
c45c86eb
WN
1125out:
1126 return err;
1127}
1128
8c6f45a7 1129static int __cmd_record(struct record *rec, int argc, const char **argv)
16c8a109 1130{
57706abc 1131 int err;
45604710 1132 int status = 0;
8b412664 1133 unsigned long waking = 0;
46be604b 1134 const bool forks = argc > 0;
45694aa7 1135 struct perf_tool *tool = &rec->tool;
b4006796 1136 struct record_opts *opts = &rec->opts;
8ceb41d7 1137 struct perf_data *data = &rec->data;
d20deb64 1138 struct perf_session *session;
6dcf45ef 1139 bool disabled = false, draining = false;
657ee553 1140 struct perf_evlist *sb_evlist = NULL;
42aa276f 1141 int fd;
de9ac07b 1142
45604710 1143 atexit(record__sig_exit);
f5970550
PZ
1144 signal(SIGCHLD, sig_handler);
1145 signal(SIGINT, sig_handler);
804f7ac7 1146 signal(SIGTERM, sig_handler);
a074865e 1147 signal(SIGSEGV, sigsegv_handler);
c0bdc1c4 1148
f3b3614a
HB
1149 if (rec->opts.record_namespaces)
1150 tool->namespace_events = true;
1151
dc0c6127 1152 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2dd6d8a1 1153 signal(SIGUSR2, snapshot_sig_handler);
3c1cb7e3
WN
1154 if (rec->opts.auxtrace_snapshot_mode)
1155 trigger_on(&auxtrace_snapshot_trigger);
dc0c6127 1156 if (rec->switch_output.enabled)
3c1cb7e3 1157 trigger_on(&switch_output_trigger);
c0bdc1c4 1158 } else {
2dd6d8a1 1159 signal(SIGUSR2, SIG_IGN);
c0bdc1c4 1160 }
f5970550 1161
8ceb41d7 1162 session = perf_session__new(data, false, tool);
94c744b6 1163 if (session == NULL) {
ffa91880 1164 pr_err("Perf session creation failed.\n");
a9a70bbc
ACM
1165 return -1;
1166 }
1167
8ceb41d7 1168 fd = perf_data__fd(data);
d20deb64
ACM
1169 rec->session = session;
1170
8c6f45a7 1171 record__init_features(rec);
330aa675 1172
cf790516
AB
1173 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1174 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1175
d4db3f16 1176 if (forks) {
3e2be2da 1177 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
8ceb41d7 1178 argv, data->is_pipe,
735f7e0b 1179 workload_exec_failed_signal);
35b9d88e
ACM
1180 if (err < 0) {
1181 pr_err("Couldn't run the workload!\n");
45604710 1182 status = err;
35b9d88e 1183 goto out_delete_session;
856e9660 1184 }
856e9660
PZ
1185 }
1186
ad46e48c
JO
1187 /*
1188 * If we have just single event and are sending data
1189 * through pipe, we need to force the ids allocation,
1190 * because we synthesize event name through the pipe
1191 * and need the id for that.
1192 */
1193 if (data->is_pipe && rec->evlist->nr_entries == 1)
1194 rec->opts.sample_id = true;
1195
8c6f45a7 1196 if (record__open(rec) != 0) {
8d3eca20 1197 err = -1;
45604710 1198 goto out_child;
8d3eca20 1199 }
de9ac07b 1200
8690a2a7
WN
1201 err = bpf__apply_obj_config();
1202 if (err) {
1203 char errbuf[BUFSIZ];
1204
1205 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1206 pr_err("ERROR: Apply config to BPF failed: %s\n",
1207 errbuf);
1208 goto out_child;
1209 }
1210
cca8482c
AH
1211 /*
1212 * Normally perf_session__new would do this, but it doesn't have the
1213 * evlist.
1214 */
1215 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1216 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1217 rec->tool.ordered_events = false;
1218 }
1219
3e2be2da 1220 if (!rec->evlist->nr_groups)
a8bb559b
NK
1221 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1222
8ceb41d7 1223 if (data->is_pipe) {
42aa276f 1224 err = perf_header__write_pipe(fd);
529870e3 1225 if (err < 0)
45604710 1226 goto out_child;
563aecb2 1227 } else {
42aa276f 1228 err = perf_session__write_header(session, rec->evlist, fd, false);
d5eed904 1229 if (err < 0)
45604710 1230 goto out_child;
56b03f3c
ACM
1231 }
1232
d3665498 1233 if (!rec->no_buildid
e20960c0 1234 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 1235 pr_err("Couldn't generate buildids. "
e20960c0 1236 "Use --no-buildid to profile anyway.\n");
8d3eca20 1237 err = -1;
45604710 1238 goto out_child;
e20960c0
RR
1239 }
1240
657ee553
SL
1241 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1242 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1243 opts->no_bpf_event = true;
1244 }
1245
4ea648ae 1246 err = record__synthesize(rec, false);
c45c86eb 1247 if (err < 0)
45604710 1248 goto out_child;
8d3eca20 1249
d20deb64 1250 if (rec->realtime_prio) {
de9ac07b
PZ
1251 struct sched_param param;
1252
d20deb64 1253 param.sched_priority = rec->realtime_prio;
de9ac07b 1254 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 1255 pr_err("Could not set realtime priority.\n");
8d3eca20 1256 err = -1;
45604710 1257 goto out_child;
de9ac07b
PZ
1258 }
1259 }
1260
774cb499
JO
1261 /*
1262 * When perf is starting the traced process, all the events
1263 * (apart from group members) have enable_on_exec=1 set,
1264 * so don't spoil it by prematurely enabling them.
1265 */
6619a53e 1266 if (!target__none(&opts->target) && !opts->initial_delay)
3e2be2da 1267 perf_evlist__enable(rec->evlist);
764e16a3 1268
856e9660
PZ
1269 /*
1270 * Let the child rip
1271 */
e803cf97 1272 if (forks) {
20a8a3cf 1273 struct machine *machine = &session->machines.host;
e5bed564 1274 union perf_event *event;
e907caf3 1275 pid_t tgid;
e5bed564
NK
1276
1277 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1278 if (event == NULL) {
1279 err = -ENOMEM;
1280 goto out_child;
1281 }
1282
e803cf97
NK
1283 /*
1284 * Some H/W events are generated before COMM event
1285 * which is emitted during exec(), so perf script
1286 * cannot see a correct process name for those events.
1287 * Synthesize COMM event to prevent it.
1288 */
e907caf3
HB
1289 tgid = perf_event__synthesize_comm(tool, event,
1290 rec->evlist->workload.pid,
1291 process_synthesized_event,
1292 machine);
1293 free(event);
1294
1295 if (tgid == -1)
1296 goto out_child;
1297
1298 event = malloc(sizeof(event->namespaces) +
1299 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1300 machine->id_hdr_size);
1301 if (event == NULL) {
1302 err = -ENOMEM;
1303 goto out_child;
1304 }
1305
1306 /*
1307 * Synthesize NAMESPACES event for the command specified.
1308 */
1309 perf_event__synthesize_namespaces(tool, event,
1310 rec->evlist->workload.pid,
1311 tgid, process_synthesized_event,
1312 machine);
e5bed564 1313 free(event);
e803cf97 1314
3e2be2da 1315 perf_evlist__start_workload(rec->evlist);
e803cf97 1316 }
856e9660 1317
6619a53e 1318 if (opts->initial_delay) {
0693e680 1319 usleep(opts->initial_delay * USEC_PER_MSEC);
6619a53e
AK
1320 perf_evlist__enable(rec->evlist);
1321 }
1322
5f9cf599 1323 trigger_ready(&auxtrace_snapshot_trigger);
3c1cb7e3 1324 trigger_ready(&switch_output_trigger);
a074865e 1325 perf_hooks__invoke_record_start();
649c48a9 1326 for (;;) {
9f065194 1327 unsigned long long hits = rec->samples;
de9ac07b 1328
05737464
WN
1329 /*
1330 * rec->evlist->bkw_mmap_state is possible to be
1331 * BKW_MMAP_EMPTY here: when done == true and
1332 * hits != rec->samples in previous round.
1333 *
1334 * perf_evlist__toggle_bkw_mmap ensure we never
1335 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1336 */
1337 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1338 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1339
8c6f45a7 1340 if (record__mmap_read_all(rec) < 0) {
5f9cf599 1341 trigger_error(&auxtrace_snapshot_trigger);
3c1cb7e3 1342 trigger_error(&switch_output_trigger);
8d3eca20 1343 err = -1;
45604710 1344 goto out_child;
8d3eca20 1345 }
de9ac07b 1346
2dd6d8a1
AH
1347 if (auxtrace_record__snapshot_started) {
1348 auxtrace_record__snapshot_started = 0;
5f9cf599 1349 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2dd6d8a1 1350 record__read_auxtrace_snapshot(rec);
5f9cf599 1351 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2dd6d8a1
AH
1352 pr_err("AUX area tracing snapshot failed\n");
1353 err = -1;
1354 goto out_child;
1355 }
1356 }
1357
3c1cb7e3 1358 if (trigger_is_hit(&switch_output_trigger)) {
05737464
WN
1359 /*
1360 * If switch_output_trigger is hit, the data in
1361 * overwritable ring buffer should have been collected,
1362 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1363 *
1364 * If SIGUSR2 raise after or during record__mmap_read_all(),
1365 * record__mmap_read_all() didn't collect data from
1366 * overwritable ring buffer. Read again.
1367 */
1368 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1369 continue;
3c1cb7e3
WN
1370 trigger_ready(&switch_output_trigger);
1371
05737464
WN
1372 /*
1373 * Reenable events in overwrite ring buffer after
1374 * record__mmap_read_all(): we should have collected
1375 * data from it.
1376 */
1377 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1378
3c1cb7e3
WN
1379 if (!quiet)
1380 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1381 waking);
1382 waking = 0;
1383 fd = record__switch_output(rec, false);
1384 if (fd < 0) {
1385 pr_err("Failed to switch to new file\n");
1386 trigger_error(&switch_output_trigger);
1387 err = fd;
1388 goto out_child;
1389 }
bfacbe3b
JO
1390
1391 /* re-arm the alarm */
1392 if (rec->switch_output.time)
1393 alarm(rec->switch_output.time);
3c1cb7e3
WN
1394 }
1395
d20deb64 1396 if (hits == rec->samples) {
6dcf45ef 1397 if (done || draining)
649c48a9 1398 break;
f66a889d 1399 err = perf_evlist__poll(rec->evlist, -1);
a515114f
JO
1400 /*
1401 * Propagate error, only if there's any. Ignore positive
1402 * number of returned events and interrupt error.
1403 */
1404 if (err > 0 || (err < 0 && errno == EINTR))
45604710 1405 err = 0;
8b412664 1406 waking++;
6dcf45ef
ACM
1407
1408 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1409 draining = true;
8b412664
PZ
1410 }
1411
774cb499
JO
1412 /*
1413 * When perf is starting the traced process, at the end events
1414 * die with the process and we wait for that. Thus no need to
1415 * disable events in this case.
1416 */
602ad878 1417 if (done && !disabled && !target__none(&opts->target)) {
5f9cf599 1418 trigger_off(&auxtrace_snapshot_trigger);
3e2be2da 1419 perf_evlist__disable(rec->evlist);
2711926a
JO
1420 disabled = true;
1421 }
de9ac07b 1422 }
5f9cf599 1423 trigger_off(&auxtrace_snapshot_trigger);
3c1cb7e3 1424 trigger_off(&switch_output_trigger);
de9ac07b 1425
f33cbe72 1426 if (forks && workload_exec_errno) {
35550da3 1427 char msg[STRERR_BUFSIZE];
c8b5f2c9 1428 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
f33cbe72
ACM
1429 pr_err("Workload failed: %s\n", emsg);
1430 err = -1;
45604710 1431 goto out_child;
f33cbe72
ACM
1432 }
1433
e3d59112 1434 if (!quiet)
45604710 1435 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
b44308f5 1436
4ea648ae
WN
1437 if (target__none(&rec->opts.target))
1438 record__synthesize_workload(rec, true);
1439
45604710 1440out_child:
d3d1af6f
AB
1441 record__aio_mmap_read_sync(rec);
1442
45604710
NK
1443 if (forks) {
1444 int exit_status;
addc2785 1445
45604710
NK
1446 if (!child_finished)
1447 kill(rec->evlist->workload.pid, SIGTERM);
1448
1449 wait(&exit_status);
1450
1451 if (err < 0)
1452 status = err;
1453 else if (WIFEXITED(exit_status))
1454 status = WEXITSTATUS(exit_status);
1455 else if (WIFSIGNALED(exit_status))
1456 signr = WTERMSIG(exit_status);
1457 } else
1458 status = err;
1459
4ea648ae 1460 record__synthesize(rec, true);
e3d59112
NK
1461 /* this will be recalculated during process_buildids() */
1462 rec->samples = 0;
1463
ecfd7a9c
WN
1464 if (!err) {
1465 if (!rec->timestamp_filename) {
1466 record__finish_output(rec);
1467 } else {
1468 fd = record__switch_output(rec, true);
1469 if (fd < 0) {
1470 status = fd;
1471 goto out_delete_session;
1472 }
1473 }
1474 }
39d17dac 1475
a074865e
WN
1476 perf_hooks__invoke_record_end();
1477
e3d59112
NK
1478 if (!err && !quiet) {
1479 char samples[128];
ecfd7a9c
WN
1480 const char *postfix = rec->timestamp_filename ?
1481 ".<timestamp>" : "";
e3d59112 1482
ef149c25 1483 if (rec->samples && !rec->opts.full_auxtrace)
e3d59112
NK
1484 scnprintf(samples, sizeof(samples),
1485 " (%" PRIu64 " samples)", rec->samples);
1486 else
1487 samples[0] = '\0';
1488
ecfd7a9c 1489 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
8ceb41d7 1490 perf_data__size(data) / 1024.0 / 1024.0,
2d4f2799 1491 data->path, postfix, samples);
e3d59112
NK
1492 }
1493
39d17dac
ACM
1494out_delete_session:
1495 perf_session__delete(session);
657ee553
SL
1496
1497 if (!opts->no_bpf_event)
1498 perf_evlist__stop_sb_thread(sb_evlist);
45604710 1499 return status;
de9ac07b 1500}
0e9b20b8 1501
0883e820 1502static void callchain_debug(struct callchain_param *callchain)
09b0fd45 1503{
aad2b21c 1504 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
a601fdff 1505
0883e820 1506 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
26d33022 1507
0883e820 1508 if (callchain->record_mode == CALLCHAIN_DWARF)
09b0fd45 1509 pr_debug("callchain: stack dump size %d\n",
0883e820 1510 callchain->dump_size);
09b0fd45
JO
1511}
1512
0883e820
ACM
1513int record_opts__parse_callchain(struct record_opts *record,
1514 struct callchain_param *callchain,
1515 const char *arg, bool unset)
09b0fd45 1516{
09b0fd45 1517 int ret;
0883e820 1518 callchain->enabled = !unset;
eb853e80 1519
09b0fd45
JO
1520 /* --no-call-graph */
1521 if (unset) {
0883e820 1522 callchain->record_mode = CALLCHAIN_NONE;
09b0fd45
JO
1523 pr_debug("callchain: disabled\n");
1524 return 0;
1525 }
1526
0883e820 1527 ret = parse_callchain_record_opt(arg, callchain);
5c0cf224
JO
1528 if (!ret) {
1529 /* Enable data address sampling for DWARF unwind. */
0883e820 1530 if (callchain->record_mode == CALLCHAIN_DWARF)
5c0cf224 1531 record->sample_address = true;
0883e820 1532 callchain_debug(callchain);
5c0cf224 1533 }
26d33022
JO
1534
1535 return ret;
1536}
1537
0883e820
ACM
1538int record_parse_callchain_opt(const struct option *opt,
1539 const char *arg,
1540 int unset)
1541{
1542 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1543}
1544
c421e80b 1545int record_callchain_opt(const struct option *opt,
09b0fd45
JO
1546 const char *arg __maybe_unused,
1547 int unset __maybe_unused)
1548{
2ddd5c04 1549 struct callchain_param *callchain = opt->value;
c421e80b 1550
2ddd5c04 1551 callchain->enabled = true;
09b0fd45 1552
2ddd5c04
ACM
1553 if (callchain->record_mode == CALLCHAIN_NONE)
1554 callchain->record_mode = CALLCHAIN_FP;
eb853e80 1555
2ddd5c04 1556 callchain_debug(callchain);
09b0fd45
JO
1557 return 0;
1558}
1559
eb853e80
JO
1560static int perf_record_config(const char *var, const char *value, void *cb)
1561{
7a29c087
NK
1562 struct record *rec = cb;
1563
1564 if (!strcmp(var, "record.build-id")) {
1565 if (!strcmp(value, "cache"))
1566 rec->no_buildid_cache = false;
1567 else if (!strcmp(value, "no-cache"))
1568 rec->no_buildid_cache = true;
1569 else if (!strcmp(value, "skip"))
1570 rec->no_buildid = true;
1571 else
1572 return -1;
1573 return 0;
1574 }
cff17205
YX
1575 if (!strcmp(var, "record.call-graph")) {
1576 var = "call-graph.record-mode";
1577 return perf_default_config(var, value, cb);
1578 }
93f20c0f
AB
1579#ifdef HAVE_AIO_SUPPORT
1580 if (!strcmp(var, "record.aio")) {
1581 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1582 if (!rec->opts.nr_cblocks)
1583 rec->opts.nr_cblocks = nr_cblocks_default;
1584 }
1585#endif
eb853e80 1586
cff17205 1587 return 0;
eb853e80
JO
1588}
1589
814c8c38
PZ
1590struct clockid_map {
1591 const char *name;
1592 int clockid;
1593};
1594
1595#define CLOCKID_MAP(n, c) \
1596 { .name = n, .clockid = (c), }
1597
1598#define CLOCKID_END { .name = NULL, }
1599
1600
1601/*
1602 * Add the missing ones, we need to build on many distros...
1603 */
1604#ifndef CLOCK_MONOTONIC_RAW
1605#define CLOCK_MONOTONIC_RAW 4
1606#endif
1607#ifndef CLOCK_BOOTTIME
1608#define CLOCK_BOOTTIME 7
1609#endif
1610#ifndef CLOCK_TAI
1611#define CLOCK_TAI 11
1612#endif
1613
1614static const struct clockid_map clockids[] = {
1615 /* available for all events, NMI safe */
1616 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1617 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1618
1619 /* available for some events */
1620 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1621 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1622 CLOCKID_MAP("tai", CLOCK_TAI),
1623
1624 /* available for the lazy */
1625 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1626 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1627 CLOCKID_MAP("real", CLOCK_REALTIME),
1628 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1629
1630 CLOCKID_END,
1631};
1632
cf790516
AB
1633static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1634{
1635 struct timespec res;
1636
1637 *res_ns = 0;
1638 if (!clock_getres(clk_id, &res))
1639 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1640 else
1641 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1642
1643 return 0;
1644}
1645
814c8c38
PZ
1646static int parse_clockid(const struct option *opt, const char *str, int unset)
1647{
1648 struct record_opts *opts = (struct record_opts *)opt->value;
1649 const struct clockid_map *cm;
1650 const char *ostr = str;
1651
1652 if (unset) {
1653 opts->use_clockid = 0;
1654 return 0;
1655 }
1656
1657 /* no arg passed */
1658 if (!str)
1659 return 0;
1660
1661 /* no setting it twice */
1662 if (opts->use_clockid)
1663 return -1;
1664
1665 opts->use_clockid = true;
1666
1667 /* if its a number, we're done */
1668 if (sscanf(str, "%d", &opts->clockid) == 1)
cf790516 1669 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
814c8c38
PZ
1670
1671 /* allow a "CLOCK_" prefix to the name */
1672 if (!strncasecmp(str, "CLOCK_", 6))
1673 str += 6;
1674
1675 for (cm = clockids; cm->name; cm++) {
1676 if (!strcasecmp(str, cm->name)) {
1677 opts->clockid = cm->clockid;
cf790516
AB
1678 return get_clockid_res(opts->clockid,
1679 &opts->clockid_res_ns);
814c8c38
PZ
1680 }
1681 }
1682
1683 opts->use_clockid = false;
1684 ui__warning("unknown clockid %s, check man page\n", ostr);
1685 return -1;
1686}
1687
f4fe11b7
AB
1688static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1689{
1690 struct record_opts *opts = (struct record_opts *)opt->value;
1691
1692 if (unset || !str)
1693 return 0;
1694
1695 if (!strcasecmp(str, "node"))
1696 opts->affinity = PERF_AFFINITY_NODE;
1697 else if (!strcasecmp(str, "cpu"))
1698 opts->affinity = PERF_AFFINITY_CPU;
1699
1700 return 0;
1701}
1702
e9db1310
AH
1703static int record__parse_mmap_pages(const struct option *opt,
1704 const char *str,
1705 int unset __maybe_unused)
1706{
1707 struct record_opts *opts = opt->value;
1708 char *s, *p;
1709 unsigned int mmap_pages;
1710 int ret;
1711
1712 if (!str)
1713 return -EINVAL;
1714
1715 s = strdup(str);
1716 if (!s)
1717 return -ENOMEM;
1718
1719 p = strchr(s, ',');
1720 if (p)
1721 *p = '\0';
1722
1723 if (*s) {
1724 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1725 if (ret)
1726 goto out_free;
1727 opts->mmap_pages = mmap_pages;
1728 }
1729
1730 if (!p) {
1731 ret = 0;
1732 goto out_free;
1733 }
1734
1735 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1736 if (ret)
1737 goto out_free;
1738
1739 opts->auxtrace_mmap_pages = mmap_pages;
1740
1741out_free:
1742 free(s);
1743 return ret;
1744}
1745
0c582449
JO
1746static void switch_output_size_warn(struct record *rec)
1747{
1748 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1749 struct switch_output *s = &rec->switch_output;
1750
1751 wakeup_size /= 2;
1752
1753 if (s->size < wakeup_size) {
1754 char buf[100];
1755
1756 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1757 pr_warning("WARNING: switch-output data size lower than "
1758 "wakeup kernel buffer size (%s) "
1759 "expect bigger perf.data sizes\n", buf);
1760 }
1761}
1762
cb4e1ebb
JO
1763static int switch_output_setup(struct record *rec)
1764{
1765 struct switch_output *s = &rec->switch_output;
dc0c6127
JO
1766 static struct parse_tag tags_size[] = {
1767 { .tag = 'B', .mult = 1 },
1768 { .tag = 'K', .mult = 1 << 10 },
1769 { .tag = 'M', .mult = 1 << 20 },
1770 { .tag = 'G', .mult = 1 << 30 },
1771 { .tag = 0 },
1772 };
bfacbe3b
JO
1773 static struct parse_tag tags_time[] = {
1774 { .tag = 's', .mult = 1 },
1775 { .tag = 'm', .mult = 60 },
1776 { .tag = 'h', .mult = 60*60 },
1777 { .tag = 'd', .mult = 60*60*24 },
1778 { .tag = 0 },
1779 };
dc0c6127 1780 unsigned long val;
cb4e1ebb
JO
1781
1782 if (!s->set)
1783 return 0;
1784
1785 if (!strcmp(s->str, "signal")) {
1786 s->signal = true;
1787 pr_debug("switch-output with SIGUSR2 signal\n");
dc0c6127
JO
1788 goto enabled;
1789 }
1790
1791 val = parse_tag_value(s->str, tags_size);
1792 if (val != (unsigned long) -1) {
1793 s->size = val;
1794 pr_debug("switch-output with %s size threshold\n", s->str);
1795 goto enabled;
cb4e1ebb
JO
1796 }
1797
bfacbe3b
JO
1798 val = parse_tag_value(s->str, tags_time);
1799 if (val != (unsigned long) -1) {
1800 s->time = val;
1801 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1802 s->str, s->time);
1803 goto enabled;
1804 }
1805
cb4e1ebb 1806 return -1;
dc0c6127
JO
1807
1808enabled:
1809 rec->timestamp_filename = true;
1810 s->enabled = true;
0c582449
JO
1811
1812 if (s->size && !rec->opts.no_buffering)
1813 switch_output_size_warn(rec);
1814
dc0c6127 1815 return 0;
cb4e1ebb
JO
1816}
1817
e5b2c207 1818static const char * const __record_usage[] = {
9e096753
MG
1819 "perf record [<options>] [<command>]",
1820 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
1821 NULL
1822};
e5b2c207 1823const char * const *record_usage = __record_usage;
0e9b20b8 1824
d20deb64 1825/*
8c6f45a7
ACM
1826 * XXX Ideally would be local to cmd_record() and passed to a record__new
1827 * because we need to have access to it in record__exit, that is called
d20deb64
ACM
1828 * after cmd_record() exits, but since record_options need to be accessible to
1829 * builtin-script, leave it here.
1830 *
1831 * At least we don't ouch it in all the other functions here directly.
1832 *
1833 * Just say no to tons of global variables, sigh.
1834 */
8c6f45a7 1835static struct record record = {
d20deb64 1836 .opts = {
8affc2b8 1837 .sample_time = true,
d20deb64
ACM
1838 .mmap_pages = UINT_MAX,
1839 .user_freq = UINT_MAX,
1840 .user_interval = ULLONG_MAX,
447a6013 1841 .freq = 4000,
d1cb9fce
NK
1842 .target = {
1843 .uses_mmap = true,
3aa5939d 1844 .default_per_cpu = true,
d1cb9fce 1845 },
d20deb64 1846 },
e3d59112
NK
1847 .tool = {
1848 .sample = process_sample_event,
1849 .fork = perf_event__process_fork,
cca8482c 1850 .exit = perf_event__process_exit,
e3d59112 1851 .comm = perf_event__process_comm,
f3b3614a 1852 .namespaces = perf_event__process_namespaces,
e3d59112
NK
1853 .mmap = perf_event__process_mmap,
1854 .mmap2 = perf_event__process_mmap2,
cca8482c 1855 .ordered_events = true,
e3d59112 1856 },
d20deb64 1857};
7865e817 1858
76a26549
NK
1859const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1860 "\n\t\t\t\tDefault: fp";
61eaa3be 1861
0aab2136
WN
1862static bool dry_run;
1863
d20deb64
ACM
1864/*
1865 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1866 * with it and switch to use the library functions in perf_evlist that came
b4006796 1867 * from builtin-record.c, i.e. use record_opts,
d20deb64
ACM
1868 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1869 * using pipes, etc.
1870 */
efd21307 1871static struct option __record_options[] = {
d20deb64 1872 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 1873 "event selector. use 'perf list' to list available events",
f120f9d5 1874 parse_events_option),
d20deb64 1875 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 1876 "event filter", parse_filter),
4ba1faa1
WN
1877 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1878 NULL, "don't record events from perf itself",
1879 exclude_perf),
bea03405 1880 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 1881 "record events on existing process id"),
bea03405 1882 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 1883 "record events on existing thread id"),
d20deb64 1884 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 1885 "collect data with this RT SCHED_FIFO priority"),
509051ea 1886 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
acac03fa 1887 "collect data without buffering"),
d20deb64 1888 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 1889 "collect raw sample records from all opened counters"),
bea03405 1890 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 1891 "system-wide collection from all CPUs"),
bea03405 1892 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 1893 "list of cpus to monitor"),
d20deb64 1894 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2d4f2799 1895 OPT_STRING('o', "output", &record.data.path, "file",
abaff32a 1896 "output file name"),
69e7e5b0
AH
1897 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1898 &record.opts.no_inherit_set,
1899 "child tasks do not inherit counters"),
4ea648ae
WN
1900 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1901 "synthesize non-sample events at the end of output"),
626a6b78 1902 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
71184c6a 1903 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
b09c2364
ACM
1904 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1905 "Fail if the specified frequency can't be used"),
67230479
ACM
1906 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1907 "profile at this frequency",
1908 record__parse_freq),
e9db1310
AH
1909 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1910 "number of mmap data pages and AUX area tracing mmap pages",
1911 record__parse_mmap_pages),
d20deb64 1912 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 1913 "put the counters into a counter group"),
2ddd5c04 1914 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
09b0fd45
JO
1915 NULL, "enables call-graph recording" ,
1916 &record_callchain_opt),
1917 OPT_CALLBACK(0, "call-graph", &record.opts,
76a26549 1918 "record_mode[,record_size]", record_callchain_help,
09b0fd45 1919 &record_parse_callchain_opt),
c0555642 1920 OPT_INCR('v', "verbose", &verbose,
3da297a6 1921 "be more verbose (show counter open errors, etc)"),
b44308f5 1922 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 1923 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 1924 "per thread counts"),
56100321 1925 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3b0a5daa
KL
1926 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1927 "Record the sample physical addresses"),
b6f35ed7 1928 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3abebc55
AH
1929 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1930 &record.opts.sample_time_set,
1931 "Record the sample timestamps"),
f290aa1f
JO
1932 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1933 "Record the sample period"),
d20deb64 1934 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 1935 "don't sample"),
d2db9a98
WN
1936 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1937 &record.no_buildid_cache_set,
1938 "do not update the buildid cache"),
1939 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1940 &record.no_buildid_set,
1941 "do not collect buildids in perf.data"),
d20deb64 1942 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
1943 "monitor event in cgroup name only",
1944 parse_cgroups),
a6205a35 1945 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
6619a53e 1946 "ms to wait before starting measurement after program start"),
bea03405
NK
1947 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1948 "user to profile"),
a5aabdac
SE
1949
1950 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1951 "branch any", "sample any taken branches",
1952 parse_branch_stack),
1953
1954 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1955 "branch filter mask", "branch stack filter modes",
bdfebd84 1956 parse_branch_stack),
05484298
AK
1957 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1958 "sample by weight (on special events only)"),
475eeab9
AK
1959 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1960 "sample transaction flags (special events only)"),
3aa5939d
AH
1961 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1962 "use per-thread mmaps"),
bcc84ec6
SE
1963 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1964 "sample selected machine registers on interrupt,"
1965 " use -I ? to list register names", parse_regs),
84c41742
AK
1966 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1967 "sample selected machine registers on interrupt,"
1968 " use -I ? to list register names", parse_regs),
85c273d2
AK
1969 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1970 "Record running/enabled time of read (:S) events"),
814c8c38
PZ
1971 OPT_CALLBACK('k', "clockid", &record.opts,
1972 "clockid", "clockid to use for events, see clock_gettime()",
1973 parse_clockid),
2dd6d8a1
AH
1974 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1975 "opts", "AUX area tracing Snapshot Mode", ""),
3fcb10e4 1976 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
9d9cad76 1977 "per thread proc mmap processing timeout in ms"),
f3b3614a
HB
1978 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1979 "Record namespaces events"),
b757bb09
AH
1980 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1981 "Record context switch events"),
85723885
JO
1982 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1983 "Configure all used events to run in kernel space.",
1984 PARSE_OPT_EXCLUSIVE),
1985 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1986 "Configure all used events to run in user space.",
1987 PARSE_OPT_EXCLUSIVE),
71dc2326
WN
1988 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1989 "clang binary to use for compiling BPF scriptlets"),
1990 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1991 "options passed to clang when compiling BPF scriptlets"),
7efe0e03
HK
1992 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1993 "file", "vmlinux pathname"),
6156681b
NK
1994 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1995 "Record build-id of all DSOs regardless of hits"),
ecfd7a9c
WN
1996 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1997 "append timestamp to output filename"),
68588baf
JY
1998 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1999 "Record timestamp boundary (time of first/last samples)"),
cb4e1ebb 2000 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
c38dab7d
AK
2001 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2002 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
dc0c6127 2003 "signal"),
03724b2e
AK
2004 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2005 "Limit number of switch output generated files"),
0aab2136
WN
2006 OPT_BOOLEAN(0, "dry-run", &dry_run,
2007 "Parse options then exit"),
d3d1af6f 2008#ifdef HAVE_AIO_SUPPORT
93f20c0f
AB
2009 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2010 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
d3d1af6f
AB
2011 record__aio_parse),
2012#endif
f4fe11b7
AB
2013 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2014 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2015 record__parse_affinity),
0e9b20b8
IM
2016 OPT_END()
2017};
2018
e5b2c207
NK
2019struct option *record_options = __record_options;
2020
b0ad8ea6 2021int cmd_record(int argc, const char **argv)
0e9b20b8 2022{
ef149c25 2023 int err;
8c6f45a7 2024 struct record *rec = &record;
16ad2ffb 2025 char errbuf[BUFSIZ];
0e9b20b8 2026
67230479
ACM
2027 setlocale(LC_ALL, "");
2028
48e1cab1
WN
2029#ifndef HAVE_LIBBPF_SUPPORT
2030# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2031 set_nobuild('\0', "clang-path", true);
2032 set_nobuild('\0', "clang-opt", true);
2033# undef set_nobuild
7efe0e03
HK
2034#endif
2035
2036#ifndef HAVE_BPF_PROLOGUE
2037# if !defined (HAVE_DWARF_SUPPORT)
2038# define REASON "NO_DWARF=1"
2039# elif !defined (HAVE_LIBBPF_SUPPORT)
2040# define REASON "NO_LIBBPF=1"
2041# else
2042# define REASON "this architecture doesn't support BPF prologue"
2043# endif
2044# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2045 set_nobuild('\0', "vmlinux", true);
2046# undef set_nobuild
2047# undef REASON
48e1cab1
WN
2048#endif
2049
9d2ed645
AB
2050 CPU_ZERO(&rec->affinity_mask);
2051 rec->opts.affinity = PERF_AFFINITY_SYS;
2052
3e2be2da
ACM
2053 rec->evlist = perf_evlist__new();
2054 if (rec->evlist == NULL)
361c99a6
ACM
2055 return -ENOMEM;
2056
ecc4c561
ACM
2057 err = perf_config(perf_record_config, rec);
2058 if (err)
2059 return err;
eb853e80 2060
bca647aa 2061 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 2062 PARSE_OPT_STOP_AT_NON_OPTION);
68ba3235
NK
2063 if (quiet)
2064 perf_quiet_option();
483635a9
JO
2065
2066 /* Make system wide (-a) the default target. */
602ad878 2067 if (!argc && target__none(&rec->opts.target))
483635a9 2068 rec->opts.target.system_wide = true;
0e9b20b8 2069
bea03405 2070 if (nr_cgroups && !rec->opts.target.system_wide) {
c7118369
NK
2071 usage_with_options_msg(record_usage, record_options,
2072 "cgroup monitoring only available in system-wide mode");
2073
023695d9 2074 }
b757bb09
AH
2075 if (rec->opts.record_switch_events &&
2076 !perf_can_record_switch_events()) {
c7118369
NK
2077 ui__error("kernel does not support recording context switch events\n");
2078 parse_options_usage(record_usage, record_options, "switch-events", 0);
2079 return -EINVAL;
b757bb09 2080 }
023695d9 2081
cb4e1ebb
JO
2082 if (switch_output_setup(rec)) {
2083 parse_options_usage(record_usage, record_options, "switch-output", 0);
2084 return -EINVAL;
2085 }
2086
bfacbe3b
JO
2087 if (rec->switch_output.time) {
2088 signal(SIGALRM, alarm_sig_handler);
2089 alarm(rec->switch_output.time);
2090 }
2091
03724b2e
AK
2092 if (rec->switch_output.num_files) {
2093 rec->switch_output.filenames = calloc(sizeof(char *),
2094 rec->switch_output.num_files);
2095 if (!rec->switch_output.filenames)
2096 return -EINVAL;
2097 }
2098
1b36c03e
AH
2099 /*
2100 * Allow aliases to facilitate the lookup of symbols for address
2101 * filters. Refer to auxtrace_parse_filters().
2102 */
2103 symbol_conf.allow_aliases = true;
2104
2105 symbol__init(NULL);
2106
4b5ea3bd 2107 err = record__auxtrace_init(rec);
1b36c03e
AH
2108 if (err)
2109 goto out;
2110
0aab2136 2111 if (dry_run)
5c01ad60 2112 goto out;
0aab2136 2113
d7888573
WN
2114 err = bpf__setup_stdout(rec->evlist);
2115 if (err) {
2116 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2117 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2118 errbuf);
5c01ad60 2119 goto out;
d7888573
WN
2120 }
2121
ef149c25
AH
2122 err = -ENOMEM;
2123
6c443954 2124 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
646aaea6
ACM
2125 pr_warning(
2126"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2127"check /proc/sys/kernel/kptr_restrict.\n\n"
2128"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2129"file is not found in the buildid cache or in the vmlinux path.\n\n"
2130"Samples in kernel modules won't be resolved at all.\n\n"
2131"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2132"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 2133
0c1d46a8 2134 if (rec->no_buildid_cache || rec->no_buildid) {
a1ac1d3c 2135 disable_buildid_cache();
dc0c6127 2136 } else if (rec->switch_output.enabled) {
0c1d46a8
WN
2137 /*
2138 * In 'perf record --switch-output', disable buildid
2139 * generation by default to reduce data file switching
2140 * overhead. Still generate buildid if they are required
2141 * explicitly using
2142 *
60437ac0 2143 * perf record --switch-output --no-no-buildid \
0c1d46a8
WN
2144 * --no-no-buildid-cache
2145 *
2146 * Following code equals to:
2147 *
2148 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2149 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2150 * disable_buildid_cache();
2151 */
2152 bool disable = true;
2153
2154 if (rec->no_buildid_set && !rec->no_buildid)
2155 disable = false;
2156 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2157 disable = false;
2158 if (disable) {
2159 rec->no_buildid = true;
2160 rec->no_buildid_cache = true;
2161 disable_buildid_cache();
2162 }
2163 }
655000e7 2164
4ea648ae
WN
2165 if (record.opts.overwrite)
2166 record.opts.tail_synthesize = true;
2167
3e2be2da 2168 if (rec->evlist->nr_entries == 0 &&
4b4cd503 2169 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
69aad6f1 2170 pr_err("Not enough memory for event selector list\n");
394c01ed 2171 goto out;
bbd36e5e 2172 }
0e9b20b8 2173
69e7e5b0
AH
2174 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2175 rec->opts.no_inherit = true;
2176
602ad878 2177 err = target__validate(&rec->opts.target);
16ad2ffb 2178 if (err) {
602ad878 2179 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
c3dec27b 2180 ui__warning("%s\n", errbuf);
16ad2ffb
NK
2181 }
2182
602ad878 2183 err = target__parse_uid(&rec->opts.target);
16ad2ffb
NK
2184 if (err) {
2185 int saved_errno = errno;
4bd0f2d2 2186
602ad878 2187 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 2188 ui__error("%s", errbuf);
16ad2ffb
NK
2189
2190 err = -saved_errno;
394c01ed 2191 goto out;
16ad2ffb 2192 }
0d37aa34 2193
ca800068
MZ
2194 /* Enable ignoring missing threads when -u/-p option is defined. */
2195 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
23dc4f15 2196
16ad2ffb 2197 err = -ENOMEM;
3e2be2da 2198 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
dd7927f4 2199 usage_with_options(record_usage, record_options);
69aad6f1 2200
ef149c25
AH
2201 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2202 if (err)
394c01ed 2203 goto out;
ef149c25 2204
6156681b
NK
2205 /*
2206 * We take all buildids when the file contains
2207 * AUX area tracing data because we do not decode the
2208 * trace because it would take too long.
2209 */
2210 if (rec->opts.full_auxtrace)
2211 rec->buildid_all = true;
2212
b4006796 2213 if (record_opts__config(&rec->opts)) {
39d17dac 2214 err = -EINVAL;
394c01ed 2215 goto out;
7e4ff9e3
MG
2216 }
2217
93f20c0f
AB
2218 if (rec->opts.nr_cblocks > nr_cblocks_max)
2219 rec->opts.nr_cblocks = nr_cblocks_max;
d3d1af6f
AB
2220 if (verbose > 0)
2221 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2222
9d2ed645
AB
2223 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2224
d20deb64 2225 err = __cmd_record(&record, argc, argv);
394c01ed 2226out:
45604710 2227 perf_evlist__delete(rec->evlist);
d65a458b 2228 symbol__exit();
ef149c25 2229 auxtrace_record__free(rec->itr);
39d17dac 2230 return err;
0e9b20b8 2231}
2dd6d8a1
AH
2232
2233static void snapshot_sig_handler(int sig __maybe_unused)
2234{
dc0c6127
JO
2235 struct record *rec = &record;
2236
5f9cf599
WN
2237 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2238 trigger_hit(&auxtrace_snapshot_trigger);
2239 auxtrace_record__snapshot_started = 1;
2240 if (auxtrace_record__snapshot_start(record.itr))
2241 trigger_error(&auxtrace_snapshot_trigger);
2242 }
3c1cb7e3 2243
dc0c6127 2244 if (switch_output_signal(rec))
3c1cb7e3 2245 trigger_hit(&switch_output_trigger);
2dd6d8a1 2246}
bfacbe3b
JO
2247
2248static void alarm_sig_handler(int sig __maybe_unused)
2249{
2250 struct record *rec = &record;
2251
2252 if (switch_output_time(rec))
2253 trigger_hit(&switch_output_trigger);
2254}