tools feature: Rename HAVE_EVENTFD to HAVE_EVENTFD_SUPPORT
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
abaff32a 2/*
bf9e1876
IM
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
abaff32a 8 */
16f762a2 9#include "builtin.h"
bf9e1876 10
6122e4e4 11#include "util/build-id.h"
4b6ab94e 12#include <subcmd/parse-options.h>
8ad8db37 13#include "util/parse-events.h"
41840d21 14#include "util/config.h"
6eda5838 15
8f651eae 16#include "util/callchain.h"
f14d5707 17#include "util/cgroup.h"
7c6a1c65 18#include "util/header.h"
66e274f3 19#include "util/event.h"
361c99a6 20#include "util/evlist.h"
69aad6f1 21#include "util/evsel.h"
8f28827a 22#include "util/debug.h"
e0fcfb08 23#include "util/mmap.h"
aeb00b1a 24#include "util/target.h"
94c744b6 25#include "util/session.h"
45694aa7 26#include "util/tool.h"
8d06367f 27#include "util/symbol.h"
aeb00b1a 28#include "util/record.h"
a12b51c4 29#include "util/cpumap.h"
fd78260b 30#include "util/thread_map.h"
f5fc1412 31#include "util/data.h"
bcc84ec6 32#include "util/perf_regs.h"
ef149c25 33#include "util/auxtrace.h"
46bc29b9 34#include "util/tsc.h"
f00898f4 35#include "util/parse-branch-options.h"
bcc84ec6 36#include "util/parse-regs-options.h"
40c7d246 37#include "util/perf_api_probe.h"
71dc2326 38#include "util/llvm-utils.h"
8690a2a7 39#include "util/bpf-loader.h"
5f9cf599 40#include "util/trigger.h"
a074865e 41#include "util/perf-hooks.h"
f13de660 42#include "util/cpu-set-sched.h"
ea49e01c 43#include "util/synthetic-events.h"
c5e4027e 44#include "util/time-utils.h"
58db1d6e 45#include "util/units.h"
7b612e29 46#include "util/bpf-event.h"
d99c22ea 47#include "util/util.h"
d8871ea7 48#include "asm/bug.h"
c1a604df 49#include "perf.h"
7c6a1c65 50
a43783ae 51#include <errno.h>
fd20e811 52#include <inttypes.h>
67230479 53#include <locale.h>
4208735d 54#include <poll.h>
d99c22ea 55#include <pthread.h>
97124d5e 56#include <unistd.h>
de9ac07b 57#include <sched.h>
9607ad3a 58#include <signal.h>
a41794cd 59#include <sys/mman.h>
4208735d 60#include <sys/wait.h>
eeb399b5
AH
61#include <sys/types.h>
62#include <sys/stat.h>
63#include <fcntl.h>
6ef81c55 64#include <linux/err.h>
8520a98d 65#include <linux/string.h>
0693e680 66#include <linux/time64.h>
d8f9da24 67#include <linux/zalloc.h>
8384a260 68#include <linux/bitmap.h>
78da39fa 69
1b43b704 70struct switch_output {
dc0c6127 71 bool enabled;
1b43b704 72 bool signal;
dc0c6127 73 unsigned long size;
bfacbe3b 74 unsigned long time;
cb4e1ebb
JO
75 const char *str;
76 bool set;
03724b2e
AK
77 char **filenames;
78 int num_files;
79 int cur_file;
1b43b704
JO
80};
81
8c6f45a7 82struct record {
45694aa7 83 struct perf_tool tool;
b4006796 84 struct record_opts opts;
d20deb64 85 u64 bytes_written;
8ceb41d7 86 struct perf_data data;
ef149c25 87 struct auxtrace_record *itr;
63503dba 88 struct evlist *evlist;
d20deb64 89 struct perf_session *session;
bc477d79 90 struct evlist *sb_evlist;
899e5ffb 91 pthread_t thread_id;
d20deb64 92 int realtime_prio;
899e5ffb 93 bool switch_output_event_set;
d20deb64 94 bool no_buildid;
d2db9a98 95 bool no_buildid_set;
d20deb64 96 bool no_buildid_cache;
d2db9a98 97 bool no_buildid_cache_set;
6156681b 98 bool buildid_all;
ecfd7a9c 99 bool timestamp_filename;
68588baf 100 bool timestamp_boundary;
1b43b704 101 struct switch_output switch_output;
9f065194 102 unsigned long long samples;
8384a260 103 struct mmap_cpu_mask affinity_mask;
6d575816 104 unsigned long output_max_size; /* = 0: unlimited */
0f82ebc4 105};
a21ca2ca 106
6d575816
JS
107static volatile int done;
108
dc0c6127
JO
109static volatile int auxtrace_record__snapshot_started;
110static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
111static DEFINE_TRIGGER(switch_output_trigger);
112
9d2ed645
AB
113static const char *affinity_tags[PERF_AFFINITY_MAX] = {
114 "SYS", "NODE", "CPU"
115};
116
dc0c6127
JO
117static bool switch_output_signal(struct record *rec)
118{
119 return rec->switch_output.signal &&
120 trigger_is_ready(&switch_output_trigger);
121}
122
123static bool switch_output_size(struct record *rec)
124{
125 return rec->switch_output.size &&
126 trigger_is_ready(&switch_output_trigger) &&
127 (rec->bytes_written >= rec->switch_output.size);
128}
129
bfacbe3b
JO
130static bool switch_output_time(struct record *rec)
131{
132 return rec->switch_output.time &&
133 trigger_is_ready(&switch_output_trigger);
134}
135
6d575816
JS
136static bool record__output_max_size_exceeded(struct record *rec)
137{
138 return rec->output_max_size &&
139 (rec->bytes_written >= rec->output_max_size);
140}
141
a5830532 142static int record__write(struct record *rec, struct mmap *map __maybe_unused,
ded2b8fe 143 void *bf, size_t size)
f5970550 144{
ded2b8fe
JO
145 struct perf_data_file *file = &rec->session->data->file;
146
147 if (perf_data_file__write(file, bf, size) < 0) {
50a9b868
JO
148 pr_err("failed to write perf data, error: %m\n");
149 return -1;
f5970550 150 }
8d3eca20 151
cf8b2e69 152 rec->bytes_written += size;
dc0c6127 153
6d575816
JS
154 if (record__output_max_size_exceeded(rec) && !done) {
155 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
156 " stopping session ]\n",
157 rec->bytes_written >> 10);
158 done = 1;
159 }
160
dc0c6127
JO
161 if (switch_output_size(rec))
162 trigger_hit(&switch_output_trigger);
163
8d3eca20 164 return 0;
f5970550
PZ
165}
166
ef781128
AB
167static int record__aio_enabled(struct record *rec);
168static int record__comp_enabled(struct record *rec);
5d7f4116
AB
169static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
170 void *src, size_t src_size);
171
d3d1af6f
AB
172#ifdef HAVE_AIO_SUPPORT
173static int record__aio_write(struct aiocb *cblock, int trace_fd,
174 void *buf, size_t size, off_t off)
175{
176 int rc;
177
178 cblock->aio_fildes = trace_fd;
179 cblock->aio_buf = buf;
180 cblock->aio_nbytes = size;
181 cblock->aio_offset = off;
182 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
183
184 do {
185 rc = aio_write(cblock);
186 if (rc == 0) {
187 break;
188 } else if (errno != EAGAIN) {
189 cblock->aio_fildes = -1;
190 pr_err("failed to queue perf data, error: %m\n");
191 break;
192 }
193 } while (1);
194
195 return rc;
196}
197
a5830532 198static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
d3d1af6f
AB
199{
200 void *rem_buf;
201 off_t rem_off;
202 size_t rem_size;
203 int rc, aio_errno;
204 ssize_t aio_ret, written;
205
206 aio_errno = aio_error(cblock);
207 if (aio_errno == EINPROGRESS)
208 return 0;
209
210 written = aio_ret = aio_return(cblock);
211 if (aio_ret < 0) {
212 if (aio_errno != EINTR)
213 pr_err("failed to write perf data, error: %m\n");
214 written = 0;
215 }
216
217 rem_size = cblock->aio_nbytes - written;
218
219 if (rem_size == 0) {
220 cblock->aio_fildes = -1;
221 /*
ef781128
AB
222 * md->refcount is incremented in record__aio_pushfn() for
223 * every aio write request started in record__aio_push() so
224 * decrement it because the request is now complete.
d3d1af6f 225 */
80e53d11 226 perf_mmap__put(&md->core);
d3d1af6f
AB
227 rc = 1;
228 } else {
229 /*
230 * aio write request may require restart with the
231 * reminder if the kernel didn't write whole
232 * chunk at once.
233 */
234 rem_off = cblock->aio_offset + written;
235 rem_buf = (void *)(cblock->aio_buf + written);
236 record__aio_write(cblock, cblock->aio_fildes,
237 rem_buf, rem_size, rem_off);
238 rc = 0;
239 }
240
241 return rc;
242}
243
a5830532 244static int record__aio_sync(struct mmap *md, bool sync_all)
d3d1af6f 245{
93f20c0f
AB
246 struct aiocb **aiocb = md->aio.aiocb;
247 struct aiocb *cblocks = md->aio.cblocks;
d3d1af6f 248 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
93f20c0f 249 int i, do_suspend;
d3d1af6f
AB
250
251 do {
93f20c0f
AB
252 do_suspend = 0;
253 for (i = 0; i < md->aio.nr_cblocks; ++i) {
254 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
255 if (sync_all)
256 aiocb[i] = NULL;
257 else
258 return i;
259 } else {
260 /*
261 * Started aio write is not complete yet
262 * so it has to be waited before the
263 * next allocation.
264 */
265 aiocb[i] = &cblocks[i];
266 do_suspend = 1;
267 }
268 }
269 if (!do_suspend)
270 return -1;
d3d1af6f 271
93f20c0f 272 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
d3d1af6f
AB
273 if (!(errno == EAGAIN || errno == EINTR))
274 pr_err("failed to sync perf data, error: %m\n");
275 }
276 } while (1);
277}
278
ef781128
AB
279struct record_aio {
280 struct record *rec;
281 void *data;
282 size_t size;
283};
284
a5830532 285static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
d3d1af6f 286{
ef781128 287 struct record_aio *aio = to;
d3d1af6f 288
ef781128 289 /*
547740f7 290 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
ef781128
AB
291 * to release space in the kernel buffer as fast as possible, calling
292 * perf_mmap__consume() from perf_mmap__push() function.
293 *
294 * That lets the kernel to proceed with storing more profiling data into
295 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
296 *
297 * Coping can be done in two steps in case the chunk of profiling data
298 * crosses the upper bound of the kernel buffer. In this case we first move
299 * part of data from map->start till the upper bound and then the reminder
300 * from the beginning of the kernel buffer till the end of the data chunk.
301 */
302
303 if (record__comp_enabled(aio->rec)) {
304 size = zstd_compress(aio->rec->session, aio->data + aio->size,
bf59b305 305 mmap__mmap_len(map) - aio->size,
ef781128
AB
306 buf, size);
307 } else {
308 memcpy(aio->data + aio->size, buf, size);
309 }
310
311 if (!aio->size) {
312 /*
313 * Increment map->refcount to guard map->aio.data[] buffer
314 * from premature deallocation because map object can be
315 * released earlier than aio write request started on
316 * map->aio.data[] buffer is complete.
317 *
318 * perf_mmap__put() is done at record__aio_complete()
319 * after started aio request completion or at record__aio_push()
320 * if the request failed to start.
321 */
e75710f0 322 perf_mmap__get(&map->core);
ef781128
AB
323 }
324
325 aio->size += size;
326
327 return size;
328}
329
a5830532 330static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
ef781128
AB
331{
332 int ret, idx;
333 int trace_fd = rec->session->data->file.fd;
334 struct record_aio aio = { .rec = rec, .size = 0 };
d3d1af6f 335
ef781128
AB
336 /*
337 * Call record__aio_sync() to wait till map->aio.data[] buffer
338 * becomes available after previous aio write operation.
339 */
340
341 idx = record__aio_sync(map, false);
342 aio.data = map->aio.data[idx];
343 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
344 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
345 return ret;
346
347 rec->samples++;
348 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
d3d1af6f 349 if (!ret) {
ef781128
AB
350 *off += aio.size;
351 rec->bytes_written += aio.size;
d3d1af6f
AB
352 if (switch_output_size(rec))
353 trigger_hit(&switch_output_trigger);
ef781128
AB
354 } else {
355 /*
356 * Decrement map->refcount incremented in record__aio_pushfn()
357 * back if record__aio_write() operation failed to start, otherwise
358 * map->refcount is decremented in record__aio_complete() after
359 * aio write operation finishes successfully.
360 */
80e53d11 361 perf_mmap__put(&map->core);
d3d1af6f
AB
362 }
363
364 return ret;
365}
366
367static off_t record__aio_get_pos(int trace_fd)
368{
369 return lseek(trace_fd, 0, SEEK_CUR);
370}
371
372static void record__aio_set_pos(int trace_fd, off_t pos)
373{
374 lseek(trace_fd, pos, SEEK_SET);
375}
376
377static void record__aio_mmap_read_sync(struct record *rec)
378{
379 int i;
63503dba 380 struct evlist *evlist = rec->evlist;
a5830532 381 struct mmap *maps = evlist->mmap;
d3d1af6f 382
ef781128 383 if (!record__aio_enabled(rec))
d3d1af6f
AB
384 return;
385
c976ee11 386 for (i = 0; i < evlist->core.nr_mmaps; i++) {
a5830532 387 struct mmap *map = &maps[i];
d3d1af6f 388
547740f7 389 if (map->core.base)
93f20c0f 390 record__aio_sync(map, true);
d3d1af6f
AB
391 }
392}
393
394static int nr_cblocks_default = 1;
93f20c0f 395static int nr_cblocks_max = 4;
d3d1af6f
AB
396
397static int record__aio_parse(const struct option *opt,
93f20c0f 398 const char *str,
d3d1af6f
AB
399 int unset)
400{
401 struct record_opts *opts = (struct record_opts *)opt->value;
402
93f20c0f 403 if (unset) {
d3d1af6f 404 opts->nr_cblocks = 0;
93f20c0f
AB
405 } else {
406 if (str)
407 opts->nr_cblocks = strtol(str, NULL, 0);
408 if (!opts->nr_cblocks)
409 opts->nr_cblocks = nr_cblocks_default;
410 }
d3d1af6f
AB
411
412 return 0;
413}
414#else /* HAVE_AIO_SUPPORT */
93f20c0f
AB
415static int nr_cblocks_max = 0;
416
a5830532 417static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
ef781128 418 off_t *off __maybe_unused)
d3d1af6f
AB
419{
420 return -1;
421}
422
423static off_t record__aio_get_pos(int trace_fd __maybe_unused)
424{
425 return -1;
426}
427
428static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
429{
430}
431
432static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
433{
434}
435#endif
436
437static int record__aio_enabled(struct record *rec)
438{
439 return rec->opts.nr_cblocks > 0;
440}
441
470530bb
AB
442#define MMAP_FLUSH_DEFAULT 1
443static int record__mmap_flush_parse(const struct option *opt,
444 const char *str,
445 int unset)
446{
447 int flush_max;
448 struct record_opts *opts = (struct record_opts *)opt->value;
449 static struct parse_tag tags[] = {
450 { .tag = 'B', .mult = 1 },
451 { .tag = 'K', .mult = 1 << 10 },
452 { .tag = 'M', .mult = 1 << 20 },
453 { .tag = 'G', .mult = 1 << 30 },
454 { .tag = 0 },
455 };
456
457 if (unset)
458 return 0;
459
460 if (str) {
461 opts->mmap_flush = parse_tag_value(str, tags);
462 if (opts->mmap_flush == (int)-1)
463 opts->mmap_flush = strtol(str, NULL, 0);
464 }
465
466 if (!opts->mmap_flush)
467 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
468
9521b5f2 469 flush_max = evlist__mmap_size(opts->mmap_pages);
470530bb
AB
470 flush_max /= 4;
471 if (opts->mmap_flush > flush_max)
472 opts->mmap_flush = flush_max;
473
474 return 0;
475}
476
504c1ad1
AB
477#ifdef HAVE_ZSTD_SUPPORT
478static unsigned int comp_level_default = 1;
479
480static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
481{
482 struct record_opts *opts = opt->value;
483
484 if (unset) {
485 opts->comp_level = 0;
486 } else {
487 if (str)
488 opts->comp_level = strtol(str, NULL, 0);
489 if (!opts->comp_level)
490 opts->comp_level = comp_level_default;
491 }
492
493 return 0;
494}
495#endif
51255a8a
AB
496static unsigned int comp_level_max = 22;
497
42e1fd80
AB
498static int record__comp_enabled(struct record *rec)
499{
500 return rec->opts.comp_level > 0;
501}
502
45694aa7 503static int process_synthesized_event(struct perf_tool *tool,
d20deb64 504 union perf_event *event,
1d037ca1
IT
505 struct perf_sample *sample __maybe_unused,
506 struct machine *machine __maybe_unused)
234fbbf5 507{
8c6f45a7 508 struct record *rec = container_of(tool, struct record, tool);
ded2b8fe 509 return record__write(rec, NULL, event, event->header.size);
234fbbf5
ACM
510}
511
d99c22ea
SE
512static int process_locked_synthesized_event(struct perf_tool *tool,
513 union perf_event *event,
514 struct perf_sample *sample __maybe_unused,
515 struct machine *machine __maybe_unused)
516{
517 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
518 int ret;
519
520 pthread_mutex_lock(&synth_lock);
521 ret = process_synthesized_event(tool, event, sample, machine);
522 pthread_mutex_unlock(&synth_lock);
523 return ret;
524}
525
a5830532 526static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
d37f1586
ACM
527{
528 struct record *rec = to;
529
5d7f4116 530 if (record__comp_enabled(rec)) {
bf59b305 531 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
5d7f4116
AB
532 bf = map->data;
533 }
534
d37f1586 535 rec->samples++;
ded2b8fe 536 return record__write(rec, map, bf, size);
d37f1586
ACM
537}
538
2dd6d8a1
AH
539static volatile int signr = -1;
540static volatile int child_finished;
c0bdc1c4 541
2dd6d8a1
AH
542static void sig_handler(int sig)
543{
544 if (sig == SIGCHLD)
545 child_finished = 1;
546 else
547 signr = sig;
548
549 done = 1;
550}
551
a074865e
WN
552static void sigsegv_handler(int sig)
553{
554 perf_hooks__recover();
555 sighandler_dump_stack(sig);
556}
557
2dd6d8a1
AH
558static void record__sig_exit(void)
559{
560 if (signr == -1)
561 return;
562
563 signal(signr, SIG_DFL);
564 raise(signr);
565}
566
e31f0d01
AH
567#ifdef HAVE_AUXTRACE_SUPPORT
568
ef149c25 569static int record__process_auxtrace(struct perf_tool *tool,
a5830532 570 struct mmap *map,
ef149c25
AH
571 union perf_event *event, void *data1,
572 size_t len1, void *data2, size_t len2)
573{
574 struct record *rec = container_of(tool, struct record, tool);
8ceb41d7 575 struct perf_data *data = &rec->data;
ef149c25
AH
576 size_t padding;
577 u8 pad[8] = {0};
578
46e201ef 579 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
99fa2984 580 off_t file_offset;
8ceb41d7 581 int fd = perf_data__fd(data);
99fa2984
AH
582 int err;
583
584 file_offset = lseek(fd, 0, SEEK_CUR);
585 if (file_offset == -1)
586 return -1;
587 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
588 event, file_offset);
589 if (err)
590 return err;
591 }
592
ef149c25
AH
593 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
594 padding = (len1 + len2) & 7;
595 if (padding)
596 padding = 8 - padding;
597
ded2b8fe
JO
598 record__write(rec, map, event, event->header.size);
599 record__write(rec, map, data1, len1);
ef149c25 600 if (len2)
ded2b8fe
JO
601 record__write(rec, map, data2, len2);
602 record__write(rec, map, &pad, padding);
ef149c25
AH
603
604 return 0;
605}
606
607static int record__auxtrace_mmap_read(struct record *rec,
a5830532 608 struct mmap *map)
ef149c25
AH
609{
610 int ret;
611
e035f4ca 612 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
ef149c25
AH
613 record__process_auxtrace);
614 if (ret < 0)
615 return ret;
616
617 if (ret)
618 rec->samples++;
619
620 return 0;
621}
622
2dd6d8a1 623static int record__auxtrace_mmap_read_snapshot(struct record *rec,
a5830532 624 struct mmap *map)
2dd6d8a1
AH
625{
626 int ret;
627
e035f4ca 628 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
2dd6d8a1
AH
629 record__process_auxtrace,
630 rec->opts.auxtrace_snapshot_size);
631 if (ret < 0)
632 return ret;
633
634 if (ret)
635 rec->samples++;
636
637 return 0;
638}
639
640static int record__auxtrace_read_snapshot_all(struct record *rec)
641{
642 int i;
643 int rc = 0;
644
c976ee11 645 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
a5830532 646 struct mmap *map = &rec->evlist->mmap[i];
2dd6d8a1 647
e035f4ca 648 if (!map->auxtrace_mmap.base)
2dd6d8a1
AH
649 continue;
650
e035f4ca 651 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
2dd6d8a1
AH
652 rc = -1;
653 goto out;
654 }
655 }
656out:
657 return rc;
658}
659
ce7b0e42 660static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
2dd6d8a1
AH
661{
662 pr_debug("Recording AUX area tracing snapshot\n");
663 if (record__auxtrace_read_snapshot_all(rec) < 0) {
5f9cf599 664 trigger_error(&auxtrace_snapshot_trigger);
2dd6d8a1 665 } else {
ce7b0e42 666 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
5f9cf599
WN
667 trigger_error(&auxtrace_snapshot_trigger);
668 else
669 trigger_ready(&auxtrace_snapshot_trigger);
2dd6d8a1
AH
670 }
671}
672
ce7b0e42
AS
673static int record__auxtrace_snapshot_exit(struct record *rec)
674{
675 if (trigger_is_error(&auxtrace_snapshot_trigger))
676 return 0;
677
678 if (!auxtrace_record__snapshot_started &&
679 auxtrace_record__snapshot_start(rec->itr))
680 return -1;
681
682 record__read_auxtrace_snapshot(rec, true);
683 if (trigger_is_error(&auxtrace_snapshot_trigger))
684 return -1;
685
686 return 0;
687}
688
4b5ea3bd
AH
689static int record__auxtrace_init(struct record *rec)
690{
691 int err;
692
693 if (!rec->itr) {
694 rec->itr = auxtrace_record__init(rec->evlist, &err);
695 if (err)
696 return err;
697 }
698
699 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
700 rec->opts.auxtrace_snapshot_opts);
701 if (err)
702 return err;
703
c0a6de06
AH
704 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
705 rec->opts.auxtrace_sample_opts);
706 if (err)
707 return err;
708
4b5ea3bd
AH
709 return auxtrace_parse_filters(rec->evlist);
710}
711
e31f0d01
AH
712#else
713
714static inline
715int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
a5830532 716 struct mmap *map __maybe_unused)
e31f0d01
AH
717{
718 return 0;
719}
720
2dd6d8a1 721static inline
ce7b0e42
AS
722void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
723 bool on_exit __maybe_unused)
de9ac07b 724{
f7b7c26e
PZ
725}
726
2dd6d8a1
AH
727static inline
728int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
f7b7c26e 729{
2dd6d8a1 730 return 0;
de9ac07b
PZ
731}
732
ce7b0e42
AS
733static inline
734int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
735{
736 return 0;
737}
738
4b5ea3bd
AH
739static int record__auxtrace_init(struct record *rec __maybe_unused)
740{
741 return 0;
742}
743
2dd6d8a1
AH
744#endif
745
eeb399b5
AH
746static bool record__kcore_readable(struct machine *machine)
747{
748 char kcore[PATH_MAX];
749 int fd;
750
751 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
752
753 fd = open(kcore, O_RDONLY);
754 if (fd < 0)
755 return false;
756
757 close(fd);
758
759 return true;
760}
761
762static int record__kcore_copy(struct machine *machine, struct perf_data *data)
763{
764 char from_dir[PATH_MAX];
765 char kcore_dir[PATH_MAX];
766 int ret;
767
768 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
769
770 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
771 if (ret)
772 return ret;
773
774 return kcore_copy(from_dir, kcore_dir);
775}
776
cda57a8c 777static int record__mmap_evlist(struct record *rec,
63503dba 778 struct evlist *evlist)
cda57a8c
WN
779{
780 struct record_opts *opts = &rec->opts;
c0a6de06
AH
781 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
782 opts->auxtrace_sample_mode;
cda57a8c
WN
783 char msg[512];
784
f13de660
AB
785 if (opts->affinity != PERF_AFFINITY_SYS)
786 cpu__setup_cpunode_map();
787
9521b5f2 788 if (evlist__mmap_ex(evlist, opts->mmap_pages,
cda57a8c 789 opts->auxtrace_mmap_pages,
c0a6de06 790 auxtrace_overwrite,
470530bb 791 opts->nr_cblocks, opts->affinity,
51255a8a 792 opts->mmap_flush, opts->comp_level) < 0) {
cda57a8c
WN
793 if (errno == EPERM) {
794 pr_err("Permission error mapping pages.\n"
795 "Consider increasing "
796 "/proc/sys/kernel/perf_event_mlock_kb,\n"
797 "or try again with a smaller value of -m/--mmap_pages.\n"
798 "(current value: %u,%u)\n",
799 opts->mmap_pages, opts->auxtrace_mmap_pages);
800 return -errno;
801 } else {
802 pr_err("failed to mmap with %d (%s)\n", errno,
c8b5f2c9 803 str_error_r(errno, msg, sizeof(msg)));
cda57a8c
WN
804 if (errno)
805 return -errno;
806 else
807 return -EINVAL;
808 }
809 }
810 return 0;
811}
812
813static int record__mmap(struct record *rec)
814{
815 return record__mmap_evlist(rec, rec->evlist);
816}
817
8c6f45a7 818static int record__open(struct record *rec)
dd7927f4 819{
d6195a6a 820 char msg[BUFSIZ];
32dcd021 821 struct evsel *pos;
63503dba 822 struct evlist *evlist = rec->evlist;
d20deb64 823 struct perf_session *session = rec->session;
b4006796 824 struct record_opts *opts = &rec->opts;
8d3eca20 825 int rc = 0;
dd7927f4 826
d3dbf43c 827 /*
0a892c1c
IR
828 * For initial_delay or system wide, we need to add a dummy event so
829 * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
830 * event synthesis.
d3dbf43c 831 */
0a892c1c 832 if (opts->initial_delay || target__has_cpu(&opts->target)) {
d3dbf43c
ACM
833 if (perf_evlist__add_dummy(evlist))
834 return -ENOMEM;
835
0a892c1c 836 /* Disable tracking of mmaps on lead event. */
515dbe48 837 pos = evlist__first(evlist);
d3dbf43c 838 pos->tracking = 0;
0a892c1c 839 /* Set up dummy event. */
515dbe48 840 pos = evlist__last(evlist);
d3dbf43c 841 pos->tracking = 1;
0a892c1c
IR
842 /*
843 * Enable the dummy event when the process is forked for
844 * initial_delay, immediately for system wide.
845 */
846 if (opts->initial_delay)
847 pos->core.attr.enable_on_exec = 1;
848 else
849 pos->immediate = 1;
d3dbf43c
ACM
850 }
851
e68ae9cf 852 perf_evlist__config(evlist, opts, &callchain_param);
cac21425 853
e5cadb93 854 evlist__for_each_entry(evlist, pos) {
dd7927f4 855try_again:
af663bd0 856 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
ae430892 857 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
bb963e16 858 if (verbose > 0)
c0a54341 859 ui__warning("%s\n", msg);
d6d901c2
ZY
860 goto try_again;
861 }
cf99ad14
AK
862 if ((errno == EINVAL || errno == EBADF) &&
863 pos->leader != pos &&
864 pos->weak_group) {
4804e011 865 pos = perf_evlist__reset_weak_group(evlist, pos, true);
cf99ad14
AK
866 goto try_again;
867 }
56e52e85 868 rc = -errno;
2bb72dbb 869 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
56e52e85 870 ui__error("%s\n", msg);
8d3eca20 871 goto out;
c171b552 872 }
bfd8f72c
AK
873
874 pos->supported = true;
c171b552 875 }
a43d3f08 876
c8b567c8
ACM
877 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
878 pr_warning(
879"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
880"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
881"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
882"file is not found in the buildid cache or in the vmlinux path.\n\n"
883"Samples in kernel modules won't be resolved at all.\n\n"
884"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
885"even with a suitable vmlinux or kallsyms file.\n\n");
886 }
887
23d4aad4 888 if (perf_evlist__apply_filters(evlist, &pos)) {
62d94b00 889 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
8ab2e96d 890 pos->filter, evsel__name(pos), errno,
c8b5f2c9 891 str_error_r(errno, msg, sizeof(msg)));
8d3eca20 892 rc = -1;
5d8bb1ec
MP
893 goto out;
894 }
895
cda57a8c
WN
896 rc = record__mmap(rec);
897 if (rc)
8d3eca20 898 goto out;
0a27d7f9 899
563aecb2 900 session->evlist = evlist;
7b56cce2 901 perf_session__set_id_hdr_size(session);
8d3eca20
DA
902out:
903 return rc;
16c8a109
PZ
904}
905
e3d59112
NK
906static int process_sample_event(struct perf_tool *tool,
907 union perf_event *event,
908 struct perf_sample *sample,
32dcd021 909 struct evsel *evsel,
e3d59112
NK
910 struct machine *machine)
911{
912 struct record *rec = container_of(tool, struct record, tool);
913
68588baf
JY
914 if (rec->evlist->first_sample_time == 0)
915 rec->evlist->first_sample_time = sample->time;
916
917 rec->evlist->last_sample_time = sample->time;
e3d59112 918
68588baf
JY
919 if (rec->buildid_all)
920 return 0;
921
922 rec->samples++;
e3d59112
NK
923 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
924}
925
8c6f45a7 926static int process_buildids(struct record *rec)
6122e4e4 927{
f5fc1412 928 struct perf_session *session = rec->session;
6122e4e4 929
45112e89 930 if (perf_data__size(&rec->data) == 0)
9f591fd7
ACM
931 return 0;
932
00dc8657
NK
933 /*
934 * During this process, it'll load kernel map and replace the
935 * dso->long_name to a real pathname it found. In this case
936 * we prefer the vmlinux path like
937 * /lib/modules/3.16.4/build/vmlinux
938 *
939 * rather than build-id path (in debug directory).
940 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
941 */
942 symbol_conf.ignore_vmlinux_buildid = true;
943
6156681b
NK
944 /*
945 * If --buildid-all is given, it marks all DSO regardless of hits,
68588baf
JY
946 * so no need to process samples. But if timestamp_boundary is enabled,
947 * it still needs to walk on all samples to get the timestamps of
948 * first/last samples.
6156681b 949 */
68588baf 950 if (rec->buildid_all && !rec->timestamp_boundary)
6156681b
NK
951 rec->tool.sample = NULL;
952
b7b61cbe 953 return perf_session__process_events(session);
6122e4e4
ACM
954}
955
8115d60c 956static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
957{
958 int err;
45694aa7 959 struct perf_tool *tool = data;
a1645ce1
ZY
960 /*
961 *As for guest kernel when processing subcommand record&report,
962 *we arrange module mmap prior to guest kernel mmap and trigger
963 *a preload dso because default guest module symbols are loaded
964 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
965 *method is used to avoid symbol missing when the first addr is
966 *in module instead of in guest kernel.
967 */
45694aa7 968 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 969 machine);
a1645ce1
ZY
970 if (err < 0)
971 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 972 " relocation symbol.\n", machine->pid);
a1645ce1 973
a1645ce1
ZY
974 /*
975 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
976 * have no _text sometimes.
977 */
45694aa7 978 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 979 machine);
a1645ce1
ZY
980 if (err < 0)
981 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 982 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
983}
984
98402807
FW
985static struct perf_event_header finished_round_event = {
986 .size = sizeof(struct perf_event_header),
987 .type = PERF_RECORD_FINISHED_ROUND,
988};
989
a5830532 990static void record__adjust_affinity(struct record *rec, struct mmap *map)
f13de660
AB
991{
992 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
8384a260
AB
993 !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
994 rec->affinity_mask.nbits)) {
995 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
996 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
997 map->affinity_mask.bits, rec->affinity_mask.nbits);
998 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
999 (cpu_set_t *)rec->affinity_mask.bits);
1000 if (verbose == 2)
1001 mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
f13de660
AB
1002 }
1003}
1004
5d7f4116
AB
1005static size_t process_comp_header(void *record, size_t increment)
1006{
72932371 1007 struct perf_record_compressed *event = record;
5d7f4116
AB
1008 size_t size = sizeof(*event);
1009
1010 if (increment) {
1011 event->header.size += increment;
1012 return increment;
1013 }
1014
1015 event->header.type = PERF_RECORD_COMPRESSED;
1016 event->header.size = size;
1017
1018 return size;
1019}
1020
1021static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1022 void *src, size_t src_size)
1023{
1024 size_t compressed;
72932371 1025 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
5d7f4116
AB
1026
1027 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1028 max_record_size, process_comp_header);
1029
1030 session->bytes_transferred += src_size;
1031 session->bytes_compressed += compressed;
1032
1033 return compressed;
1034}
1035
63503dba 1036static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
470530bb 1037 bool overwrite, bool synch)
98402807 1038{
dcabb507 1039 u64 bytes_written = rec->bytes_written;
0e2e63dd 1040 int i;
8d3eca20 1041 int rc = 0;
a5830532 1042 struct mmap *maps;
d3d1af6f 1043 int trace_fd = rec->data.file.fd;
ef781128 1044 off_t off = 0;
98402807 1045
cb21686b
WN
1046 if (!evlist)
1047 return 0;
ef149c25 1048
0b72d69a 1049 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
a4ea0ec4
WN
1050 if (!maps)
1051 return 0;
1052
0b72d69a 1053 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
54cc54de
WN
1054 return 0;
1055
d3d1af6f
AB
1056 if (record__aio_enabled(rec))
1057 off = record__aio_get_pos(trace_fd);
1058
c976ee11 1059 for (i = 0; i < evlist->core.nr_mmaps; i++) {
470530bb 1060 u64 flush = 0;
a5830532 1061 struct mmap *map = &maps[i];
cb21686b 1062
547740f7 1063 if (map->core.base) {
f13de660 1064 record__adjust_affinity(rec, map);
470530bb 1065 if (synch) {
65aa2e6b
JO
1066 flush = map->core.flush;
1067 map->core.flush = 1;
470530bb 1068 }
d3d1af6f 1069 if (!record__aio_enabled(rec)) {
ef781128 1070 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
470530bb 1071 if (synch)
65aa2e6b 1072 map->core.flush = flush;
d3d1af6f
AB
1073 rc = -1;
1074 goto out;
1075 }
1076 } else {
ef781128 1077 if (record__aio_push(rec, map, &off) < 0) {
d3d1af6f 1078 record__aio_set_pos(trace_fd, off);
470530bb 1079 if (synch)
65aa2e6b 1080 map->core.flush = flush;
d3d1af6f
AB
1081 rc = -1;
1082 goto out;
1083 }
8d3eca20 1084 }
470530bb 1085 if (synch)
65aa2e6b 1086 map->core.flush = flush;
8d3eca20 1087 }
ef149c25 1088
e035f4ca 1089 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
c0a6de06 1090 !rec->opts.auxtrace_sample_mode &&
e035f4ca 1091 record__auxtrace_mmap_read(rec, map) != 0) {
ef149c25
AH
1092 rc = -1;
1093 goto out;
1094 }
98402807
FW
1095 }
1096
d3d1af6f
AB
1097 if (record__aio_enabled(rec))
1098 record__aio_set_pos(trace_fd, off);
1099
dcabb507
JO
1100 /*
1101 * Mark the round finished in case we wrote
1102 * at least one event.
1103 */
1104 if (bytes_written != rec->bytes_written)
ded2b8fe 1105 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
8d3eca20 1106
0b72d69a 1107 if (overwrite)
54cc54de 1108 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
8d3eca20
DA
1109out:
1110 return rc;
98402807
FW
1111}
1112
470530bb 1113static int record__mmap_read_all(struct record *rec, bool synch)
cb21686b
WN
1114{
1115 int err;
1116
470530bb 1117 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
cb21686b
WN
1118 if (err)
1119 return err;
1120
470530bb 1121 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
cb21686b
WN
1122}
1123
8c6f45a7 1124static void record__init_features(struct record *rec)
57706abc 1125{
57706abc
DA
1126 struct perf_session *session = rec->session;
1127 int feat;
1128
1129 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1130 perf_header__set_feat(&session->header, feat);
1131
1132 if (rec->no_buildid)
1133 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1134
ce9036a6 1135 if (!have_tracepoints(&rec->evlist->core.entries))
57706abc
DA
1136 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1137
1138 if (!rec->opts.branch_stack)
1139 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
ef149c25
AH
1140
1141 if (!rec->opts.full_auxtrace)
1142 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
ffa517ad 1143
cf790516
AB
1144 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1145 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1146
258031c0 1147 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
42e1fd80
AB
1148 if (!record__comp_enabled(rec))
1149 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
258031c0 1150
ffa517ad 1151 perf_header__clear_feat(&session->header, HEADER_STAT);
57706abc
DA
1152}
1153
e1ab48ba
WN
1154static void
1155record__finish_output(struct record *rec)
1156{
8ceb41d7
JO
1157 struct perf_data *data = &rec->data;
1158 int fd = perf_data__fd(data);
e1ab48ba 1159
8ceb41d7 1160 if (data->is_pipe)
e1ab48ba
WN
1161 return;
1162
1163 rec->session->header.data_size += rec->bytes_written;
45112e89 1164 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
e1ab48ba
WN
1165
1166 if (!rec->no_buildid) {
1167 process_buildids(rec);
1168
1169 if (rec->buildid_all)
1170 dsos__hit_all(rec->session);
1171 }
1172 perf_session__write_header(rec->session, rec->evlist, fd, true);
1173
1174 return;
1175}
1176
4ea648ae 1177static int record__synthesize_workload(struct record *rec, bool tail)
be7b0c9e 1178{
9d6aae72 1179 int err;
9749b90e 1180 struct perf_thread_map *thread_map;
be7b0c9e 1181
4ea648ae
WN
1182 if (rec->opts.tail_synthesize != tail)
1183 return 0;
1184
9d6aae72
ACM
1185 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1186 if (thread_map == NULL)
1187 return -1;
1188
1189 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
be7b0c9e
WN
1190 process_synthesized_event,
1191 &rec->session->machines.host,
3fcb10e4 1192 rec->opts.sample_address);
7836e52e 1193 perf_thread_map__put(thread_map);
9d6aae72 1194 return err;
be7b0c9e
WN
1195}
1196
4ea648ae 1197static int record__synthesize(struct record *rec, bool tail);
3c1cb7e3 1198
ecfd7a9c
WN
1199static int
1200record__switch_output(struct record *rec, bool at_exit)
1201{
8ceb41d7 1202 struct perf_data *data = &rec->data;
ecfd7a9c 1203 int fd, err;
03724b2e 1204 char *new_filename;
ecfd7a9c
WN
1205
1206 /* Same Size: "2015122520103046"*/
1207 char timestamp[] = "InvalidTimestamp";
1208
d3d1af6f
AB
1209 record__aio_mmap_read_sync(rec);
1210
4ea648ae
WN
1211 record__synthesize(rec, true);
1212 if (target__none(&rec->opts.target))
1213 record__synthesize_workload(rec, true);
1214
ecfd7a9c
WN
1215 rec->samples = 0;
1216 record__finish_output(rec);
1217 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1218 if (err) {
1219 pr_err("Failed to get current timestamp\n");
1220 return -EINVAL;
1221 }
1222
8ceb41d7 1223 fd = perf_data__switch(data, timestamp,
ecfd7a9c 1224 rec->session->header.data_offset,
03724b2e 1225 at_exit, &new_filename);
ecfd7a9c
WN
1226 if (fd >= 0 && !at_exit) {
1227 rec->bytes_written = 0;
1228 rec->session->header.data_size = 0;
1229 }
1230
1231 if (!quiet)
1232 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
2d4f2799 1233 data->path, timestamp);
3c1cb7e3 1234
03724b2e
AK
1235 if (rec->switch_output.num_files) {
1236 int n = rec->switch_output.cur_file + 1;
1237
1238 if (n >= rec->switch_output.num_files)
1239 n = 0;
1240 rec->switch_output.cur_file = n;
1241 if (rec->switch_output.filenames[n]) {
1242 remove(rec->switch_output.filenames[n]);
d8f9da24 1243 zfree(&rec->switch_output.filenames[n]);
03724b2e
AK
1244 }
1245 rec->switch_output.filenames[n] = new_filename;
1246 } else {
1247 free(new_filename);
1248 }
1249
3c1cb7e3 1250 /* Output tracking events */
be7b0c9e 1251 if (!at_exit) {
4ea648ae 1252 record__synthesize(rec, false);
3c1cb7e3 1253
be7b0c9e
WN
1254 /*
1255 * In 'perf record --switch-output' without -a,
1256 * record__synthesize() in record__switch_output() won't
1257 * generate tracking events because there's no thread_map
1258 * in evlist. Which causes newly created perf.data doesn't
1259 * contain map and comm information.
1260 * Create a fake thread_map and directly call
1261 * perf_event__synthesize_thread_map() for those events.
1262 */
1263 if (target__none(&rec->opts.target))
4ea648ae 1264 record__synthesize_workload(rec, false);
be7b0c9e 1265 }
ecfd7a9c
WN
1266 return fd;
1267}
1268
f33cbe72
ACM
1269static volatile int workload_exec_errno;
1270
1271/*
1272 * perf_evlist__prepare_workload will send a SIGUSR1
1273 * if the fork fails, since we asked by setting its
1274 * want_signal to true.
1275 */
45604710
NK
1276static void workload_exec_failed_signal(int signo __maybe_unused,
1277 siginfo_t *info,
f33cbe72
ACM
1278 void *ucontext __maybe_unused)
1279{
1280 workload_exec_errno = info->si_value.sival_int;
1281 done = 1;
f33cbe72
ACM
1282 child_finished = 1;
1283}
1284
2dd6d8a1 1285static void snapshot_sig_handler(int sig);
bfacbe3b 1286static void alarm_sig_handler(int sig);
2dd6d8a1 1287
ee667f94 1288static const struct perf_event_mmap_page *
63503dba 1289perf_evlist__pick_pc(struct evlist *evlist)
ee667f94 1290{
b2cb615d 1291 if (evlist) {
547740f7
JO
1292 if (evlist->mmap && evlist->mmap[0].core.base)
1293 return evlist->mmap[0].core.base;
1294 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1295 return evlist->overwrite_mmap[0].core.base;
b2cb615d 1296 }
ee667f94
WN
1297 return NULL;
1298}
1299
c45628b0
WN
1300static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1301{
ee667f94
WN
1302 const struct perf_event_mmap_page *pc;
1303
1304 pc = perf_evlist__pick_pc(rec->evlist);
1305 if (pc)
1306 return pc;
c45628b0
WN
1307 return NULL;
1308}
1309
4ea648ae 1310static int record__synthesize(struct record *rec, bool tail)
c45c86eb
WN
1311{
1312 struct perf_session *session = rec->session;
1313 struct machine *machine = &session->machines.host;
8ceb41d7 1314 struct perf_data *data = &rec->data;
c45c86eb
WN
1315 struct record_opts *opts = &rec->opts;
1316 struct perf_tool *tool = &rec->tool;
8ceb41d7 1317 int fd = perf_data__fd(data);
c45c86eb 1318 int err = 0;
d99c22ea 1319 event_op f = process_synthesized_event;
c45c86eb 1320
4ea648ae
WN
1321 if (rec->opts.tail_synthesize != tail)
1322 return 0;
1323
8ceb41d7 1324 if (data->is_pipe) {
a2015516
JO
1325 /*
1326 * We need to synthesize events first, because some
1327 * features works on top of them (on report side).
1328 */
318ec184 1329 err = perf_event__synthesize_attrs(tool, rec->evlist,
c45c86eb
WN
1330 process_synthesized_event);
1331 if (err < 0) {
1332 pr_err("Couldn't synthesize attrs.\n");
1333 goto out;
1334 }
1335
a2015516
JO
1336 err = perf_event__synthesize_features(tool, session, rec->evlist,
1337 process_synthesized_event);
1338 if (err < 0) {
1339 pr_err("Couldn't synthesize features.\n");
1340 return err;
1341 }
1342
ce9036a6 1343 if (have_tracepoints(&rec->evlist->core.entries)) {
c45c86eb
WN
1344 /*
1345 * FIXME err <= 0 here actually means that
1346 * there were no tracepoints so its not really
1347 * an error, just that we don't need to
1348 * synthesize anything. We really have to
1349 * return this more properly and also
1350 * propagate errors that now are calling die()
1351 */
1352 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1353 process_synthesized_event);
1354 if (err <= 0) {
1355 pr_err("Couldn't record tracing data.\n");
1356 goto out;
1357 }
1358 rec->bytes_written += err;
1359 }
1360 }
1361
c45628b0 1362 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
46bc29b9
AH
1363 process_synthesized_event, machine);
1364 if (err)
1365 goto out;
1366
c0a6de06
AH
1367 /* Synthesize id_index before auxtrace_info */
1368 if (rec->opts.auxtrace_sample_mode) {
1369 err = perf_event__synthesize_id_index(tool,
1370 process_synthesized_event,
1371 session->evlist, machine);
1372 if (err)
1373 goto out;
1374 }
1375
c45c86eb
WN
1376 if (rec->opts.full_auxtrace) {
1377 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1378 session, process_synthesized_event);
1379 if (err)
1380 goto out;
1381 }
1382
6c443954
ACM
1383 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1384 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1385 machine);
1386 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1387 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1388 "Check /proc/kallsyms permission or run as root.\n");
1389
1390 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1391 machine);
1392 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1393 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1394 "Check /proc/modules permission or run as root.\n");
1395 }
c45c86eb
WN
1396
1397 if (perf_guest) {
1398 machines__process_guests(&session->machines,
1399 perf_event__synthesize_guest_os, tool);
1400 }
1401
bfd8f72c
AK
1402 err = perf_event__synthesize_extra_attr(&rec->tool,
1403 rec->evlist,
1404 process_synthesized_event,
1405 data->is_pipe);
1406 if (err)
1407 goto out;
1408
03617c22 1409 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
373565d2
AK
1410 process_synthesized_event,
1411 NULL);
1412 if (err < 0) {
1413 pr_err("Couldn't synthesize thread map.\n");
1414 return err;
1415 }
1416
f72f901d 1417 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
373565d2
AK
1418 process_synthesized_event, NULL);
1419 if (err < 0) {
1420 pr_err("Couldn't synthesize cpu map.\n");
1421 return err;
1422 }
1423
e5416950 1424 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
7b612e29
SL
1425 machine, opts);
1426 if (err < 0)
1427 pr_warning("Couldn't synthesize bpf events.\n");
1428
ab64069f
NK
1429 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1430 machine);
1431 if (err < 0)
1432 pr_warning("Couldn't synthesize cgroup events.\n");
1433
d99c22ea
SE
1434 if (rec->opts.nr_threads_synthesize > 1) {
1435 perf_set_multithreaded();
1436 f = process_locked_synthesized_event;
1437 }
1438
03617c22 1439 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
d99c22ea
SE
1440 f, opts->sample_address,
1441 rec->opts.nr_threads_synthesize);
1442
1443 if (rec->opts.nr_threads_synthesize > 1)
1444 perf_set_singlethreaded();
1445
c45c86eb
WN
1446out:
1447 return err;
1448}
1449
899e5ffb
ACM
1450static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1451{
1452 struct record *rec = data;
1453 pthread_kill(rec->thread_id, SIGUSR2);
1454 return 0;
1455}
1456
23cbb41c
ACM
1457static int record__setup_sb_evlist(struct record *rec)
1458{
1459 struct record_opts *opts = &rec->opts;
1460
1461 if (rec->sb_evlist != NULL) {
1462 /*
1463 * We get here if --switch-output-event populated the
1464 * sb_evlist, so associate a callback that will send a SIGUSR2
1465 * to the main thread.
1466 */
1467 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1468 rec->thread_id = pthread_self();
1469 }
1470
1471 if (!opts->no_bpf_event) {
1472 if (rec->sb_evlist == NULL) {
1473 rec->sb_evlist = evlist__new();
1474
1475 if (rec->sb_evlist == NULL) {
1476 pr_err("Couldn't create side band evlist.\n.");
1477 return -1;
1478 }
1479 }
1480
1481 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1482 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1483 return -1;
1484 }
1485 }
1486
1487 if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1488 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1489 opts->no_bpf_event = true;
1490 }
1491
1492 return 0;
1493}
1494
8c6f45a7 1495static int __cmd_record(struct record *rec, int argc, const char **argv)
16c8a109 1496{
57706abc 1497 int err;
45604710 1498 int status = 0;
8b412664 1499 unsigned long waking = 0;
46be604b 1500 const bool forks = argc > 0;
45694aa7 1501 struct perf_tool *tool = &rec->tool;
b4006796 1502 struct record_opts *opts = &rec->opts;
8ceb41d7 1503 struct perf_data *data = &rec->data;
d20deb64 1504 struct perf_session *session;
6dcf45ef 1505 bool disabled = false, draining = false;
42aa276f 1506 int fd;
d3c8c08e 1507 float ratio = 0;
de9ac07b 1508
45604710 1509 atexit(record__sig_exit);
f5970550
PZ
1510 signal(SIGCHLD, sig_handler);
1511 signal(SIGINT, sig_handler);
804f7ac7 1512 signal(SIGTERM, sig_handler);
a074865e 1513 signal(SIGSEGV, sigsegv_handler);
c0bdc1c4 1514
f3b3614a
HB
1515 if (rec->opts.record_namespaces)
1516 tool->namespace_events = true;
1517
8fb4b679
NK
1518 if (rec->opts.record_cgroup) {
1519#ifdef HAVE_FILE_HANDLE
1520 tool->cgroup_events = true;
1521#else
1522 pr_err("cgroup tracking is not supported\n");
1523 return -1;
1524#endif
1525 }
1526
dc0c6127 1527 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2dd6d8a1 1528 signal(SIGUSR2, snapshot_sig_handler);
3c1cb7e3
WN
1529 if (rec->opts.auxtrace_snapshot_mode)
1530 trigger_on(&auxtrace_snapshot_trigger);
dc0c6127 1531 if (rec->switch_output.enabled)
3c1cb7e3 1532 trigger_on(&switch_output_trigger);
c0bdc1c4 1533 } else {
2dd6d8a1 1534 signal(SIGUSR2, SIG_IGN);
c0bdc1c4 1535 }
f5970550 1536
8ceb41d7 1537 session = perf_session__new(data, false, tool);
6ef81c55 1538 if (IS_ERR(session)) {
ffa91880 1539 pr_err("Perf session creation failed.\n");
6ef81c55 1540 return PTR_ERR(session);
a9a70bbc
ACM
1541 }
1542
8ceb41d7 1543 fd = perf_data__fd(data);
d20deb64
ACM
1544 rec->session = session;
1545
5d7f4116
AB
1546 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1547 pr_err("Compression initialization failed.\n");
1548 return -1;
1549 }
1550
1551 session->header.env.comp_type = PERF_COMP_ZSTD;
1552 session->header.env.comp_level = rec->opts.comp_level;
1553
eeb399b5
AH
1554 if (rec->opts.kcore &&
1555 !record__kcore_readable(&session->machines.host)) {
1556 pr_err("ERROR: kcore is not readable.\n");
1557 return -1;
1558 }
1559
8c6f45a7 1560 record__init_features(rec);
330aa675 1561
cf790516
AB
1562 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1563 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1564
d4db3f16 1565 if (forks) {
3e2be2da 1566 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
8ceb41d7 1567 argv, data->is_pipe,
735f7e0b 1568 workload_exec_failed_signal);
35b9d88e
ACM
1569 if (err < 0) {
1570 pr_err("Couldn't run the workload!\n");
45604710 1571 status = err;
35b9d88e 1572 goto out_delete_session;
856e9660 1573 }
856e9660
PZ
1574 }
1575
ad46e48c
JO
1576 /*
1577 * If we have just single event and are sending data
1578 * through pipe, we need to force the ids allocation,
1579 * because we synthesize event name through the pipe
1580 * and need the id for that.
1581 */
6484d2f9 1582 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
ad46e48c
JO
1583 rec->opts.sample_id = true;
1584
8c6f45a7 1585 if (record__open(rec) != 0) {
8d3eca20 1586 err = -1;
45604710 1587 goto out_child;
8d3eca20 1588 }
f6fa4375 1589 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
de9ac07b 1590
eeb399b5
AH
1591 if (rec->opts.kcore) {
1592 err = record__kcore_copy(&session->machines.host, data);
1593 if (err) {
1594 pr_err("ERROR: Failed to copy kcore\n");
1595 goto out_child;
1596 }
1597 }
1598
8690a2a7
WN
1599 err = bpf__apply_obj_config();
1600 if (err) {
1601 char errbuf[BUFSIZ];
1602
1603 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1604 pr_err("ERROR: Apply config to BPF failed: %s\n",
1605 errbuf);
1606 goto out_child;
1607 }
1608
cca8482c
AH
1609 /*
1610 * Normally perf_session__new would do this, but it doesn't have the
1611 * evlist.
1612 */
1613 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1614 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1615 rec->tool.ordered_events = false;
1616 }
1617
3e2be2da 1618 if (!rec->evlist->nr_groups)
a8bb559b
NK
1619 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1620
8ceb41d7 1621 if (data->is_pipe) {
42aa276f 1622 err = perf_header__write_pipe(fd);
529870e3 1623 if (err < 0)
45604710 1624 goto out_child;
563aecb2 1625 } else {
42aa276f 1626 err = perf_session__write_header(session, rec->evlist, fd, false);
d5eed904 1627 if (err < 0)
45604710 1628 goto out_child;
56b03f3c
ACM
1629 }
1630
b38d85ef 1631 err = -1;
d3665498 1632 if (!rec->no_buildid
e20960c0 1633 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 1634 pr_err("Couldn't generate buildids. "
e20960c0 1635 "Use --no-buildid to profile anyway.\n");
45604710 1636 goto out_child;
e20960c0
RR
1637 }
1638
23cbb41c
ACM
1639 err = record__setup_sb_evlist(rec);
1640 if (err)
1641 goto out_child;
657ee553 1642
4ea648ae 1643 err = record__synthesize(rec, false);
c45c86eb 1644 if (err < 0)
45604710 1645 goto out_child;
8d3eca20 1646
d20deb64 1647 if (rec->realtime_prio) {
de9ac07b
PZ
1648 struct sched_param param;
1649
d20deb64 1650 param.sched_priority = rec->realtime_prio;
de9ac07b 1651 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 1652 pr_err("Could not set realtime priority.\n");
8d3eca20 1653 err = -1;
45604710 1654 goto out_child;
de9ac07b
PZ
1655 }
1656 }
1657
774cb499
JO
1658 /*
1659 * When perf is starting the traced process, all the events
1660 * (apart from group members) have enable_on_exec=1 set,
1661 * so don't spoil it by prematurely enabling them.
1662 */
6619a53e 1663 if (!target__none(&opts->target) && !opts->initial_delay)
1c87f165 1664 evlist__enable(rec->evlist);
764e16a3 1665
856e9660
PZ
1666 /*
1667 * Let the child rip
1668 */
e803cf97 1669 if (forks) {
20a8a3cf 1670 struct machine *machine = &session->machines.host;
e5bed564 1671 union perf_event *event;
e907caf3 1672 pid_t tgid;
e5bed564
NK
1673
1674 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1675 if (event == NULL) {
1676 err = -ENOMEM;
1677 goto out_child;
1678 }
1679
e803cf97
NK
1680 /*
1681 * Some H/W events are generated before COMM event
1682 * which is emitted during exec(), so perf script
1683 * cannot see a correct process name for those events.
1684 * Synthesize COMM event to prevent it.
1685 */
e907caf3
HB
1686 tgid = perf_event__synthesize_comm(tool, event,
1687 rec->evlist->workload.pid,
1688 process_synthesized_event,
1689 machine);
1690 free(event);
1691
1692 if (tgid == -1)
1693 goto out_child;
1694
1695 event = malloc(sizeof(event->namespaces) +
1696 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1697 machine->id_hdr_size);
1698 if (event == NULL) {
1699 err = -ENOMEM;
1700 goto out_child;
1701 }
1702
1703 /*
1704 * Synthesize NAMESPACES event for the command specified.
1705 */
1706 perf_event__synthesize_namespaces(tool, event,
1707 rec->evlist->workload.pid,
1708 tgid, process_synthesized_event,
1709 machine);
e5bed564 1710 free(event);
e803cf97 1711
3e2be2da 1712 perf_evlist__start_workload(rec->evlist);
e803cf97 1713 }
856e9660 1714
6619a53e 1715 if (opts->initial_delay) {
0693e680 1716 usleep(opts->initial_delay * USEC_PER_MSEC);
1c87f165 1717 evlist__enable(rec->evlist);
6619a53e
AK
1718 }
1719
5f9cf599 1720 trigger_ready(&auxtrace_snapshot_trigger);
3c1cb7e3 1721 trigger_ready(&switch_output_trigger);
a074865e 1722 perf_hooks__invoke_record_start();
649c48a9 1723 for (;;) {
9f065194 1724 unsigned long long hits = rec->samples;
de9ac07b 1725
05737464
WN
1726 /*
1727 * rec->evlist->bkw_mmap_state is possible to be
1728 * BKW_MMAP_EMPTY here: when done == true and
1729 * hits != rec->samples in previous round.
1730 *
1731 * perf_evlist__toggle_bkw_mmap ensure we never
1732 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1733 */
1734 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1735 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1736
470530bb 1737 if (record__mmap_read_all(rec, false) < 0) {
5f9cf599 1738 trigger_error(&auxtrace_snapshot_trigger);
3c1cb7e3 1739 trigger_error(&switch_output_trigger);
8d3eca20 1740 err = -1;
45604710 1741 goto out_child;
8d3eca20 1742 }
de9ac07b 1743
2dd6d8a1
AH
1744 if (auxtrace_record__snapshot_started) {
1745 auxtrace_record__snapshot_started = 0;
5f9cf599 1746 if (!trigger_is_error(&auxtrace_snapshot_trigger))
ce7b0e42 1747 record__read_auxtrace_snapshot(rec, false);
5f9cf599 1748 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2dd6d8a1
AH
1749 pr_err("AUX area tracing snapshot failed\n");
1750 err = -1;
1751 goto out_child;
1752 }
1753 }
1754
3c1cb7e3 1755 if (trigger_is_hit(&switch_output_trigger)) {
05737464
WN
1756 /*
1757 * If switch_output_trigger is hit, the data in
1758 * overwritable ring buffer should have been collected,
1759 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1760 *
1761 * If SIGUSR2 raise after or during record__mmap_read_all(),
1762 * record__mmap_read_all() didn't collect data from
1763 * overwritable ring buffer. Read again.
1764 */
1765 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1766 continue;
3c1cb7e3
WN
1767 trigger_ready(&switch_output_trigger);
1768
05737464
WN
1769 /*
1770 * Reenable events in overwrite ring buffer after
1771 * record__mmap_read_all(): we should have collected
1772 * data from it.
1773 */
1774 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1775
3c1cb7e3
WN
1776 if (!quiet)
1777 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1778 waking);
1779 waking = 0;
1780 fd = record__switch_output(rec, false);
1781 if (fd < 0) {
1782 pr_err("Failed to switch to new file\n");
1783 trigger_error(&switch_output_trigger);
1784 err = fd;
1785 goto out_child;
1786 }
bfacbe3b
JO
1787
1788 /* re-arm the alarm */
1789 if (rec->switch_output.time)
1790 alarm(rec->switch_output.time);
3c1cb7e3
WN
1791 }
1792
d20deb64 1793 if (hits == rec->samples) {
6dcf45ef 1794 if (done || draining)
649c48a9 1795 break;
80ab2987 1796 err = evlist__poll(rec->evlist, -1);
a515114f
JO
1797 /*
1798 * Propagate error, only if there's any. Ignore positive
1799 * number of returned events and interrupt error.
1800 */
1801 if (err > 0 || (err < 0 && errno == EINTR))
45604710 1802 err = 0;
8b412664 1803 waking++;
6dcf45ef 1804
f4009e7b 1805 if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
6dcf45ef 1806 draining = true;
8b412664
PZ
1807 }
1808
774cb499
JO
1809 /*
1810 * When perf is starting the traced process, at the end events
1811 * die with the process and we wait for that. Thus no need to
1812 * disable events in this case.
1813 */
602ad878 1814 if (done && !disabled && !target__none(&opts->target)) {
5f9cf599 1815 trigger_off(&auxtrace_snapshot_trigger);
e74676de 1816 evlist__disable(rec->evlist);
2711926a
JO
1817 disabled = true;
1818 }
de9ac07b 1819 }
ce7b0e42 1820
5f9cf599 1821 trigger_off(&auxtrace_snapshot_trigger);
3c1cb7e3 1822 trigger_off(&switch_output_trigger);
de9ac07b 1823
ce7b0e42
AS
1824 if (opts->auxtrace_snapshot_on_exit)
1825 record__auxtrace_snapshot_exit(rec);
1826
f33cbe72 1827 if (forks && workload_exec_errno) {
35550da3 1828 char msg[STRERR_BUFSIZE];
c8b5f2c9 1829 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
f33cbe72
ACM
1830 pr_err("Workload failed: %s\n", emsg);
1831 err = -1;
45604710 1832 goto out_child;
f33cbe72
ACM
1833 }
1834
e3d59112 1835 if (!quiet)
45604710 1836 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
b44308f5 1837
4ea648ae
WN
1838 if (target__none(&rec->opts.target))
1839 record__synthesize_workload(rec, true);
1840
45604710 1841out_child:
470530bb 1842 record__mmap_read_all(rec, true);
d3d1af6f
AB
1843 record__aio_mmap_read_sync(rec);
1844
d3c8c08e
AB
1845 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1846 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1847 session->header.env.comp_ratio = ratio + 0.5;
1848 }
1849
45604710
NK
1850 if (forks) {
1851 int exit_status;
addc2785 1852
45604710
NK
1853 if (!child_finished)
1854 kill(rec->evlist->workload.pid, SIGTERM);
1855
1856 wait(&exit_status);
1857
1858 if (err < 0)
1859 status = err;
1860 else if (WIFEXITED(exit_status))
1861 status = WEXITSTATUS(exit_status);
1862 else if (WIFSIGNALED(exit_status))
1863 signr = WTERMSIG(exit_status);
1864 } else
1865 status = err;
1866
4ea648ae 1867 record__synthesize(rec, true);
e3d59112
NK
1868 /* this will be recalculated during process_buildids() */
1869 rec->samples = 0;
1870
ecfd7a9c
WN
1871 if (!err) {
1872 if (!rec->timestamp_filename) {
1873 record__finish_output(rec);
1874 } else {
1875 fd = record__switch_output(rec, true);
1876 if (fd < 0) {
1877 status = fd;
1878 goto out_delete_session;
1879 }
1880 }
1881 }
39d17dac 1882
a074865e
WN
1883 perf_hooks__invoke_record_end();
1884
e3d59112
NK
1885 if (!err && !quiet) {
1886 char samples[128];
ecfd7a9c
WN
1887 const char *postfix = rec->timestamp_filename ?
1888 ".<timestamp>" : "";
e3d59112 1889
ef149c25 1890 if (rec->samples && !rec->opts.full_auxtrace)
e3d59112
NK
1891 scnprintf(samples, sizeof(samples),
1892 " (%" PRIu64 " samples)", rec->samples);
1893 else
1894 samples[0] = '\0';
1895
d3c8c08e 1896 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
8ceb41d7 1897 perf_data__size(data) / 1024.0 / 1024.0,
2d4f2799 1898 data->path, postfix, samples);
d3c8c08e
AB
1899 if (ratio) {
1900 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1901 rec->session->bytes_transferred / 1024.0 / 1024.0,
1902 ratio);
1903 }
1904 fprintf(stderr, " ]\n");
e3d59112
NK
1905 }
1906
39d17dac 1907out_delete_session:
5d7f4116 1908 zstd_fini(&session->zstd_data);
39d17dac 1909 perf_session__delete(session);
657ee553
SL
1910
1911 if (!opts->no_bpf_event)
bc477d79 1912 perf_evlist__stop_sb_thread(rec->sb_evlist);
45604710 1913 return status;
de9ac07b 1914}
0e9b20b8 1915
0883e820 1916static void callchain_debug(struct callchain_param *callchain)
09b0fd45 1917{
aad2b21c 1918 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
a601fdff 1919
0883e820 1920 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
26d33022 1921
0883e820 1922 if (callchain->record_mode == CALLCHAIN_DWARF)
09b0fd45 1923 pr_debug("callchain: stack dump size %d\n",
0883e820 1924 callchain->dump_size);
09b0fd45
JO
1925}
1926
0883e820
ACM
1927int record_opts__parse_callchain(struct record_opts *record,
1928 struct callchain_param *callchain,
1929 const char *arg, bool unset)
09b0fd45 1930{
09b0fd45 1931 int ret;
0883e820 1932 callchain->enabled = !unset;
eb853e80 1933
09b0fd45
JO
1934 /* --no-call-graph */
1935 if (unset) {
0883e820 1936 callchain->record_mode = CALLCHAIN_NONE;
09b0fd45
JO
1937 pr_debug("callchain: disabled\n");
1938 return 0;
1939 }
1940
0883e820 1941 ret = parse_callchain_record_opt(arg, callchain);
5c0cf224
JO
1942 if (!ret) {
1943 /* Enable data address sampling for DWARF unwind. */
0883e820 1944 if (callchain->record_mode == CALLCHAIN_DWARF)
5c0cf224 1945 record->sample_address = true;
0883e820 1946 callchain_debug(callchain);
5c0cf224 1947 }
26d33022
JO
1948
1949 return ret;
1950}
1951
0883e820
ACM
1952int record_parse_callchain_opt(const struct option *opt,
1953 const char *arg,
1954 int unset)
1955{
1956 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1957}
1958
c421e80b 1959int record_callchain_opt(const struct option *opt,
09b0fd45
JO
1960 const char *arg __maybe_unused,
1961 int unset __maybe_unused)
1962{
2ddd5c04 1963 struct callchain_param *callchain = opt->value;
c421e80b 1964
2ddd5c04 1965 callchain->enabled = true;
09b0fd45 1966
2ddd5c04
ACM
1967 if (callchain->record_mode == CALLCHAIN_NONE)
1968 callchain->record_mode = CALLCHAIN_FP;
eb853e80 1969
2ddd5c04 1970 callchain_debug(callchain);
09b0fd45
JO
1971 return 0;
1972}
1973
eb853e80
JO
1974static int perf_record_config(const char *var, const char *value, void *cb)
1975{
7a29c087
NK
1976 struct record *rec = cb;
1977
1978 if (!strcmp(var, "record.build-id")) {
1979 if (!strcmp(value, "cache"))
1980 rec->no_buildid_cache = false;
1981 else if (!strcmp(value, "no-cache"))
1982 rec->no_buildid_cache = true;
1983 else if (!strcmp(value, "skip"))
1984 rec->no_buildid = true;
1985 else
1986 return -1;
1987 return 0;
1988 }
cff17205
YX
1989 if (!strcmp(var, "record.call-graph")) {
1990 var = "call-graph.record-mode";
1991 return perf_default_config(var, value, cb);
1992 }
93f20c0f
AB
1993#ifdef HAVE_AIO_SUPPORT
1994 if (!strcmp(var, "record.aio")) {
1995 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1996 if (!rec->opts.nr_cblocks)
1997 rec->opts.nr_cblocks = nr_cblocks_default;
1998 }
1999#endif
eb853e80 2000
cff17205 2001 return 0;
eb853e80
JO
2002}
2003
814c8c38
PZ
2004struct clockid_map {
2005 const char *name;
2006 int clockid;
2007};
2008
2009#define CLOCKID_MAP(n, c) \
2010 { .name = n, .clockid = (c), }
2011
2012#define CLOCKID_END { .name = NULL, }
2013
2014
2015/*
2016 * Add the missing ones, we need to build on many distros...
2017 */
2018#ifndef CLOCK_MONOTONIC_RAW
2019#define CLOCK_MONOTONIC_RAW 4
2020#endif
2021#ifndef CLOCK_BOOTTIME
2022#define CLOCK_BOOTTIME 7
2023#endif
2024#ifndef CLOCK_TAI
2025#define CLOCK_TAI 11
2026#endif
2027
2028static const struct clockid_map clockids[] = {
2029 /* available for all events, NMI safe */
2030 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2031 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2032
2033 /* available for some events */
2034 CLOCKID_MAP("realtime", CLOCK_REALTIME),
2035 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2036 CLOCKID_MAP("tai", CLOCK_TAI),
2037
2038 /* available for the lazy */
2039 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2040 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2041 CLOCKID_MAP("real", CLOCK_REALTIME),
2042 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2043
2044 CLOCKID_END,
2045};
2046
cf790516
AB
2047static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2048{
2049 struct timespec res;
2050
2051 *res_ns = 0;
2052 if (!clock_getres(clk_id, &res))
2053 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2054 else
2055 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2056
2057 return 0;
2058}
2059
814c8c38
PZ
2060static int parse_clockid(const struct option *opt, const char *str, int unset)
2061{
2062 struct record_opts *opts = (struct record_opts *)opt->value;
2063 const struct clockid_map *cm;
2064 const char *ostr = str;
2065
2066 if (unset) {
2067 opts->use_clockid = 0;
2068 return 0;
2069 }
2070
2071 /* no arg passed */
2072 if (!str)
2073 return 0;
2074
2075 /* no setting it twice */
2076 if (opts->use_clockid)
2077 return -1;
2078
2079 opts->use_clockid = true;
2080
2081 /* if its a number, we're done */
2082 if (sscanf(str, "%d", &opts->clockid) == 1)
cf790516 2083 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
814c8c38
PZ
2084
2085 /* allow a "CLOCK_" prefix to the name */
2086 if (!strncasecmp(str, "CLOCK_", 6))
2087 str += 6;
2088
2089 for (cm = clockids; cm->name; cm++) {
2090 if (!strcasecmp(str, cm->name)) {
2091 opts->clockid = cm->clockid;
cf790516
AB
2092 return get_clockid_res(opts->clockid,
2093 &opts->clockid_res_ns);
814c8c38
PZ
2094 }
2095 }
2096
2097 opts->use_clockid = false;
2098 ui__warning("unknown clockid %s, check man page\n", ostr);
2099 return -1;
2100}
2101
f4fe11b7
AB
2102static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2103{
2104 struct record_opts *opts = (struct record_opts *)opt->value;
2105
2106 if (unset || !str)
2107 return 0;
2108
2109 if (!strcasecmp(str, "node"))
2110 opts->affinity = PERF_AFFINITY_NODE;
2111 else if (!strcasecmp(str, "cpu"))
2112 opts->affinity = PERF_AFFINITY_CPU;
2113
2114 return 0;
2115}
2116
6d575816
JS
2117static int parse_output_max_size(const struct option *opt,
2118 const char *str, int unset)
2119{
2120 unsigned long *s = (unsigned long *)opt->value;
2121 static struct parse_tag tags_size[] = {
2122 { .tag = 'B', .mult = 1 },
2123 { .tag = 'K', .mult = 1 << 10 },
2124 { .tag = 'M', .mult = 1 << 20 },
2125 { .tag = 'G', .mult = 1 << 30 },
2126 { .tag = 0 },
2127 };
2128 unsigned long val;
2129
2130 if (unset) {
2131 *s = 0;
2132 return 0;
2133 }
2134
2135 val = parse_tag_value(str, tags_size);
2136 if (val != (unsigned long) -1) {
2137 *s = val;
2138 return 0;
2139 }
2140
2141 return -1;
2142}
2143
e9db1310
AH
2144static int record__parse_mmap_pages(const struct option *opt,
2145 const char *str,
2146 int unset __maybe_unused)
2147{
2148 struct record_opts *opts = opt->value;
2149 char *s, *p;
2150 unsigned int mmap_pages;
2151 int ret;
2152
2153 if (!str)
2154 return -EINVAL;
2155
2156 s = strdup(str);
2157 if (!s)
2158 return -ENOMEM;
2159
2160 p = strchr(s, ',');
2161 if (p)
2162 *p = '\0';
2163
2164 if (*s) {
2165 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2166 if (ret)
2167 goto out_free;
2168 opts->mmap_pages = mmap_pages;
2169 }
2170
2171 if (!p) {
2172 ret = 0;
2173 goto out_free;
2174 }
2175
2176 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2177 if (ret)
2178 goto out_free;
2179
2180 opts->auxtrace_mmap_pages = mmap_pages;
2181
2182out_free:
2183 free(s);
2184 return ret;
2185}
2186
0c582449
JO
2187static void switch_output_size_warn(struct record *rec)
2188{
9521b5f2 2189 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
0c582449
JO
2190 struct switch_output *s = &rec->switch_output;
2191
2192 wakeup_size /= 2;
2193
2194 if (s->size < wakeup_size) {
2195 char buf[100];
2196
2197 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2198 pr_warning("WARNING: switch-output data size lower than "
2199 "wakeup kernel buffer size (%s) "
2200 "expect bigger perf.data sizes\n", buf);
2201 }
2202}
2203
cb4e1ebb
JO
2204static int switch_output_setup(struct record *rec)
2205{
2206 struct switch_output *s = &rec->switch_output;
dc0c6127
JO
2207 static struct parse_tag tags_size[] = {
2208 { .tag = 'B', .mult = 1 },
2209 { .tag = 'K', .mult = 1 << 10 },
2210 { .tag = 'M', .mult = 1 << 20 },
2211 { .tag = 'G', .mult = 1 << 30 },
2212 { .tag = 0 },
2213 };
bfacbe3b
JO
2214 static struct parse_tag tags_time[] = {
2215 { .tag = 's', .mult = 1 },
2216 { .tag = 'm', .mult = 60 },
2217 { .tag = 'h', .mult = 60*60 },
2218 { .tag = 'd', .mult = 60*60*24 },
2219 { .tag = 0 },
2220 };
dc0c6127 2221 unsigned long val;
cb4e1ebb 2222
899e5ffb
ACM
2223 /*
2224 * If we're using --switch-output-events, then we imply its
2225 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2226 * thread to its parent.
2227 */
2228 if (rec->switch_output_event_set)
2229 goto do_signal;
2230
cb4e1ebb
JO
2231 if (!s->set)
2232 return 0;
2233
2234 if (!strcmp(s->str, "signal")) {
899e5ffb 2235do_signal:
cb4e1ebb
JO
2236 s->signal = true;
2237 pr_debug("switch-output with SIGUSR2 signal\n");
dc0c6127
JO
2238 goto enabled;
2239 }
2240
2241 val = parse_tag_value(s->str, tags_size);
2242 if (val != (unsigned long) -1) {
2243 s->size = val;
2244 pr_debug("switch-output with %s size threshold\n", s->str);
2245 goto enabled;
cb4e1ebb
JO
2246 }
2247
bfacbe3b
JO
2248 val = parse_tag_value(s->str, tags_time);
2249 if (val != (unsigned long) -1) {
2250 s->time = val;
2251 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2252 s->str, s->time);
2253 goto enabled;
2254 }
2255
cb4e1ebb 2256 return -1;
dc0c6127
JO
2257
2258enabled:
2259 rec->timestamp_filename = true;
2260 s->enabled = true;
0c582449
JO
2261
2262 if (s->size && !rec->opts.no_buffering)
2263 switch_output_size_warn(rec);
2264
dc0c6127 2265 return 0;
cb4e1ebb
JO
2266}
2267
e5b2c207 2268static const char * const __record_usage[] = {
9e096753
MG
2269 "perf record [<options>] [<command>]",
2270 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
2271 NULL
2272};
e5b2c207 2273const char * const *record_usage = __record_usage;
0e9b20b8 2274
6e0a9b3d
ACM
2275static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2276 struct perf_sample *sample, struct machine *machine)
2277{
2278 /*
2279 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2280 * no need to add them twice.
2281 */
2282 if (!(event->header.misc & PERF_RECORD_MISC_USER))
2283 return 0;
2284 return perf_event__process_mmap(tool, event, sample, machine);
2285}
2286
2287static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2288 struct perf_sample *sample, struct machine *machine)
2289{
2290 /*
2291 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2292 * no need to add them twice.
2293 */
2294 if (!(event->header.misc & PERF_RECORD_MISC_USER))
2295 return 0;
2296
2297 return perf_event__process_mmap2(tool, event, sample, machine);
2298}
2299
d20deb64 2300/*
8c6f45a7
ACM
2301 * XXX Ideally would be local to cmd_record() and passed to a record__new
2302 * because we need to have access to it in record__exit, that is called
d20deb64
ACM
2303 * after cmd_record() exits, but since record_options need to be accessible to
2304 * builtin-script, leave it here.
2305 *
2306 * At least we don't ouch it in all the other functions here directly.
2307 *
2308 * Just say no to tons of global variables, sigh.
2309 */
8c6f45a7 2310static struct record record = {
d20deb64 2311 .opts = {
8affc2b8 2312 .sample_time = true,
d20deb64
ACM
2313 .mmap_pages = UINT_MAX,
2314 .user_freq = UINT_MAX,
2315 .user_interval = ULLONG_MAX,
447a6013 2316 .freq = 4000,
d1cb9fce
NK
2317 .target = {
2318 .uses_mmap = true,
3aa5939d 2319 .default_per_cpu = true,
d1cb9fce 2320 },
470530bb 2321 .mmap_flush = MMAP_FLUSH_DEFAULT,
d99c22ea 2322 .nr_threads_synthesize = 1,
d20deb64 2323 },
e3d59112
NK
2324 .tool = {
2325 .sample = process_sample_event,
2326 .fork = perf_event__process_fork,
cca8482c 2327 .exit = perf_event__process_exit,
e3d59112 2328 .comm = perf_event__process_comm,
f3b3614a 2329 .namespaces = perf_event__process_namespaces,
6e0a9b3d
ACM
2330 .mmap = build_id__process_mmap,
2331 .mmap2 = build_id__process_mmap2,
cca8482c 2332 .ordered_events = true,
e3d59112 2333 },
d20deb64 2334};
7865e817 2335
76a26549
NK
2336const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2337 "\n\t\t\t\tDefault: fp";
61eaa3be 2338
0aab2136
WN
2339static bool dry_run;
2340
d20deb64
ACM
2341/*
2342 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2343 * with it and switch to use the library functions in perf_evlist that came
b4006796 2344 * from builtin-record.c, i.e. use record_opts,
d20deb64
ACM
2345 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2346 * using pipes, etc.
2347 */
efd21307 2348static struct option __record_options[] = {
d20deb64 2349 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 2350 "event selector. use 'perf list' to list available events",
f120f9d5 2351 parse_events_option),
d20deb64 2352 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 2353 "event filter", parse_filter),
4ba1faa1
WN
2354 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2355 NULL, "don't record events from perf itself",
2356 exclude_perf),
bea03405 2357 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 2358 "record events on existing process id"),
bea03405 2359 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 2360 "record events on existing thread id"),
d20deb64 2361 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 2362 "collect data with this RT SCHED_FIFO priority"),
509051ea 2363 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
acac03fa 2364 "collect data without buffering"),
d20deb64 2365 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 2366 "collect raw sample records from all opened counters"),
bea03405 2367 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 2368 "system-wide collection from all CPUs"),
bea03405 2369 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 2370 "list of cpus to monitor"),
d20deb64 2371 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2d4f2799 2372 OPT_STRING('o', "output", &record.data.path, "file",
abaff32a 2373 "output file name"),
69e7e5b0
AH
2374 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2375 &record.opts.no_inherit_set,
2376 "child tasks do not inherit counters"),
4ea648ae
WN
2377 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2378 "synthesize non-sample events at the end of output"),
626a6b78 2379 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
71184c6a 2380 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
b09c2364
ACM
2381 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2382 "Fail if the specified frequency can't be used"),
67230479
ACM
2383 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2384 "profile at this frequency",
2385 record__parse_freq),
e9db1310
AH
2386 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2387 "number of mmap data pages and AUX area tracing mmap pages",
2388 record__parse_mmap_pages),
470530bb
AB
2389 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2390 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2391 record__mmap_flush_parse),
d20deb64 2392 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 2393 "put the counters into a counter group"),
2ddd5c04 2394 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
09b0fd45
JO
2395 NULL, "enables call-graph recording" ,
2396 &record_callchain_opt),
2397 OPT_CALLBACK(0, "call-graph", &record.opts,
76a26549 2398 "record_mode[,record_size]", record_callchain_help,
09b0fd45 2399 &record_parse_callchain_opt),
c0555642 2400 OPT_INCR('v', "verbose", &verbose,
3da297a6 2401 "be more verbose (show counter open errors, etc)"),
b44308f5 2402 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 2403 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 2404 "per thread counts"),
56100321 2405 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3b0a5daa
KL
2406 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2407 "Record the sample physical addresses"),
b6f35ed7 2408 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3abebc55
AH
2409 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2410 &record.opts.sample_time_set,
2411 "Record the sample timestamps"),
f290aa1f
JO
2412 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2413 "Record the sample period"),
d20deb64 2414 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 2415 "don't sample"),
d2db9a98
WN
2416 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2417 &record.no_buildid_cache_set,
2418 "do not update the buildid cache"),
2419 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2420 &record.no_buildid_set,
2421 "do not collect buildids in perf.data"),
d20deb64 2422 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
2423 "monitor event in cgroup name only",
2424 parse_cgroups),
a6205a35 2425 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
6619a53e 2426 "ms to wait before starting measurement after program start"),
eeb399b5 2427 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
bea03405
NK
2428 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2429 "user to profile"),
a5aabdac
SE
2430
2431 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2432 "branch any", "sample any taken branches",
2433 parse_branch_stack),
2434
2435 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2436 "branch filter mask", "branch stack filter modes",
bdfebd84 2437 parse_branch_stack),
05484298
AK
2438 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2439 "sample by weight (on special events only)"),
475eeab9
AK
2440 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2441 "sample transaction flags (special events only)"),
3aa5939d
AH
2442 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2443 "use per-thread mmaps"),
bcc84ec6
SE
2444 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2445 "sample selected machine registers on interrupt,"
aeea9062 2446 " use '-I?' to list register names", parse_intr_regs),
84c41742
AK
2447 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2448 "sample selected machine registers on interrupt,"
aeea9062 2449 " use '--user-regs=?' to list register names", parse_user_regs),
85c273d2
AK
2450 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2451 "Record running/enabled time of read (:S) events"),
814c8c38
PZ
2452 OPT_CALLBACK('k', "clockid", &record.opts,
2453 "clockid", "clockid to use for events, see clock_gettime()",
2454 parse_clockid),
2dd6d8a1
AH
2455 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2456 "opts", "AUX area tracing Snapshot Mode", ""),
c0a6de06
AH
2457 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2458 "opts", "sample AUX area", ""),
3fcb10e4 2459 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
9d9cad76 2460 "per thread proc mmap processing timeout in ms"),
f3b3614a
HB
2461 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2462 "Record namespaces events"),
8fb4b679
NK
2463 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2464 "Record cgroup events"),
b757bb09
AH
2465 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2466 "Record context switch events"),
85723885
JO
2467 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2468 "Configure all used events to run in kernel space.",
2469 PARSE_OPT_EXCLUSIVE),
2470 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2471 "Configure all used events to run in user space.",
2472 PARSE_OPT_EXCLUSIVE),
53651b28 2473 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2474 "collect kernel callchains"),
2475 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2476 "collect user callchains"),
71dc2326
WN
2477 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2478 "clang binary to use for compiling BPF scriptlets"),
2479 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2480 "options passed to clang when compiling BPF scriptlets"),
7efe0e03
HK
2481 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2482 "file", "vmlinux pathname"),
6156681b
NK
2483 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2484 "Record build-id of all DSOs regardless of hits"),
ecfd7a9c
WN
2485 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2486 "append timestamp to output filename"),
68588baf
JY
2487 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2488 "Record timestamp boundary (time of first/last samples)"),
cb4e1ebb 2489 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
c38dab7d
AK
2490 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2491 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
dc0c6127 2492 "signal"),
899e5ffb
ACM
2493 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2494 "switch output event selector. use 'perf list' to list available events",
2495 parse_events_option_new_evlist),
03724b2e
AK
2496 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2497 "Limit number of switch output generated files"),
0aab2136
WN
2498 OPT_BOOLEAN(0, "dry-run", &dry_run,
2499 "Parse options then exit"),
d3d1af6f 2500#ifdef HAVE_AIO_SUPPORT
93f20c0f
AB
2501 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2502 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
d3d1af6f
AB
2503 record__aio_parse),
2504#endif
f4fe11b7
AB
2505 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2506 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2507 record__parse_affinity),
504c1ad1
AB
2508#ifdef HAVE_ZSTD_SUPPORT
2509 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2510 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2511 record__parse_comp_level),
2512#endif
6d575816
JS
2513 OPT_CALLBACK(0, "max-size", &record.output_max_size,
2514 "size", "Limit the maximum size of the output file", parse_output_max_size),
d99c22ea
SE
2515 OPT_UINTEGER(0, "num-thread-synthesize",
2516 &record.opts.nr_threads_synthesize,
2517 "number of threads to run for event synthesis"),
0e9b20b8
IM
2518 OPT_END()
2519};
2520
e5b2c207
NK
2521struct option *record_options = __record_options;
2522
b0ad8ea6 2523int cmd_record(int argc, const char **argv)
0e9b20b8 2524{
ef149c25 2525 int err;
8c6f45a7 2526 struct record *rec = &record;
16ad2ffb 2527 char errbuf[BUFSIZ];
0e9b20b8 2528
67230479
ACM
2529 setlocale(LC_ALL, "");
2530
48e1cab1
WN
2531#ifndef HAVE_LIBBPF_SUPPORT
2532# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2533 set_nobuild('\0', "clang-path", true);
2534 set_nobuild('\0', "clang-opt", true);
2535# undef set_nobuild
7efe0e03
HK
2536#endif
2537
2538#ifndef HAVE_BPF_PROLOGUE
2539# if !defined (HAVE_DWARF_SUPPORT)
2540# define REASON "NO_DWARF=1"
2541# elif !defined (HAVE_LIBBPF_SUPPORT)
2542# define REASON "NO_LIBBPF=1"
2543# else
2544# define REASON "this architecture doesn't support BPF prologue"
2545# endif
2546# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2547 set_nobuild('\0', "vmlinux", true);
2548# undef set_nobuild
2549# undef REASON
48e1cab1
WN
2550#endif
2551
9d2ed645
AB
2552 rec->opts.affinity = PERF_AFFINITY_SYS;
2553
0f98b11c 2554 rec->evlist = evlist__new();
3e2be2da 2555 if (rec->evlist == NULL)
361c99a6
ACM
2556 return -ENOMEM;
2557
ecc4c561
ACM
2558 err = perf_config(perf_record_config, rec);
2559 if (err)
2560 return err;
eb853e80 2561
bca647aa 2562 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 2563 PARSE_OPT_STOP_AT_NON_OPTION);
68ba3235
NK
2564 if (quiet)
2565 perf_quiet_option();
483635a9
JO
2566
2567 /* Make system wide (-a) the default target. */
602ad878 2568 if (!argc && target__none(&rec->opts.target))
483635a9 2569 rec->opts.target.system_wide = true;
0e9b20b8 2570
bea03405 2571 if (nr_cgroups && !rec->opts.target.system_wide) {
c7118369
NK
2572 usage_with_options_msg(record_usage, record_options,
2573 "cgroup monitoring only available in system-wide mode");
2574
023695d9 2575 }
504c1ad1 2576
eeb399b5
AH
2577 if (rec->opts.kcore)
2578 rec->data.is_dir = true;
2579
504c1ad1
AB
2580 if (rec->opts.comp_level != 0) {
2581 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2582 rec->no_buildid = true;
2583 }
2584
b757bb09
AH
2585 if (rec->opts.record_switch_events &&
2586 !perf_can_record_switch_events()) {
c7118369
NK
2587 ui__error("kernel does not support recording context switch events\n");
2588 parse_options_usage(record_usage, record_options, "switch-events", 0);
2589 return -EINVAL;
b757bb09 2590 }
023695d9 2591
cb4e1ebb
JO
2592 if (switch_output_setup(rec)) {
2593 parse_options_usage(record_usage, record_options, "switch-output", 0);
2594 return -EINVAL;
2595 }
2596
bfacbe3b
JO
2597 if (rec->switch_output.time) {
2598 signal(SIGALRM, alarm_sig_handler);
2599 alarm(rec->switch_output.time);
2600 }
2601
03724b2e
AK
2602 if (rec->switch_output.num_files) {
2603 rec->switch_output.filenames = calloc(sizeof(char *),
2604 rec->switch_output.num_files);
2605 if (!rec->switch_output.filenames)
2606 return -EINVAL;
2607 }
2608
1b36c03e
AH
2609 /*
2610 * Allow aliases to facilitate the lookup of symbols for address
2611 * filters. Refer to auxtrace_parse_filters().
2612 */
2613 symbol_conf.allow_aliases = true;
2614
2615 symbol__init(NULL);
2616
8384a260
AB
2617 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2618 rec->affinity_mask.nbits = cpu__max_cpu();
2619 rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2620 if (!rec->affinity_mask.bits) {
2621 pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2622 return -ENOMEM;
2623 }
2624 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2625 }
2626
4b5ea3bd 2627 err = record__auxtrace_init(rec);
1b36c03e
AH
2628 if (err)
2629 goto out;
2630
0aab2136 2631 if (dry_run)
5c01ad60 2632 goto out;
0aab2136 2633
d7888573
WN
2634 err = bpf__setup_stdout(rec->evlist);
2635 if (err) {
2636 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2637 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2638 errbuf);
5c01ad60 2639 goto out;
d7888573
WN
2640 }
2641
ef149c25
AH
2642 err = -ENOMEM;
2643
0c1d46a8 2644 if (rec->no_buildid_cache || rec->no_buildid) {
a1ac1d3c 2645 disable_buildid_cache();
dc0c6127 2646 } else if (rec->switch_output.enabled) {
0c1d46a8
WN
2647 /*
2648 * In 'perf record --switch-output', disable buildid
2649 * generation by default to reduce data file switching
2650 * overhead. Still generate buildid if they are required
2651 * explicitly using
2652 *
60437ac0 2653 * perf record --switch-output --no-no-buildid \
0c1d46a8
WN
2654 * --no-no-buildid-cache
2655 *
2656 * Following code equals to:
2657 *
2658 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2659 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2660 * disable_buildid_cache();
2661 */
2662 bool disable = true;
2663
2664 if (rec->no_buildid_set && !rec->no_buildid)
2665 disable = false;
2666 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2667 disable = false;
2668 if (disable) {
2669 rec->no_buildid = true;
2670 rec->no_buildid_cache = true;
2671 disable_buildid_cache();
2672 }
2673 }
655000e7 2674
4ea648ae
WN
2675 if (record.opts.overwrite)
2676 record.opts.tail_synthesize = true;
2677
6484d2f9 2678 if (rec->evlist->core.nr_entries == 0 &&
4b4cd503 2679 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
69aad6f1 2680 pr_err("Not enough memory for event selector list\n");
394c01ed 2681 goto out;
bbd36e5e 2682 }
0e9b20b8 2683
69e7e5b0
AH
2684 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2685 rec->opts.no_inherit = true;
2686
602ad878 2687 err = target__validate(&rec->opts.target);
16ad2ffb 2688 if (err) {
602ad878 2689 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
c3dec27b 2690 ui__warning("%s\n", errbuf);
16ad2ffb
NK
2691 }
2692
602ad878 2693 err = target__parse_uid(&rec->opts.target);
16ad2ffb
NK
2694 if (err) {
2695 int saved_errno = errno;
4bd0f2d2 2696
602ad878 2697 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 2698 ui__error("%s", errbuf);
16ad2ffb
NK
2699
2700 err = -saved_errno;
394c01ed 2701 goto out;
16ad2ffb 2702 }
0d37aa34 2703
ca800068
MZ
2704 /* Enable ignoring missing threads when -u/-p option is defined. */
2705 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
23dc4f15 2706
16ad2ffb 2707 err = -ENOMEM;
3e2be2da 2708 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
dd7927f4 2709 usage_with_options(record_usage, record_options);
69aad6f1 2710
ef149c25
AH
2711 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2712 if (err)
394c01ed 2713 goto out;
ef149c25 2714
6156681b
NK
2715 /*
2716 * We take all buildids when the file contains
2717 * AUX area tracing data because we do not decode the
2718 * trace because it would take too long.
2719 */
2720 if (rec->opts.full_auxtrace)
2721 rec->buildid_all = true;
2722
b4006796 2723 if (record_opts__config(&rec->opts)) {
39d17dac 2724 err = -EINVAL;
394c01ed 2725 goto out;
7e4ff9e3
MG
2726 }
2727
93f20c0f
AB
2728 if (rec->opts.nr_cblocks > nr_cblocks_max)
2729 rec->opts.nr_cblocks = nr_cblocks_max;
5d7f4116 2730 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
d3d1af6f 2731
9d2ed645 2732 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
470530bb 2733 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
9d2ed645 2734
51255a8a
AB
2735 if (rec->opts.comp_level > comp_level_max)
2736 rec->opts.comp_level = comp_level_max;
2737 pr_debug("comp level: %d\n", rec->opts.comp_level);
2738
d20deb64 2739 err = __cmd_record(&record, argc, argv);
394c01ed 2740out:
8384a260 2741 bitmap_free(rec->affinity_mask.bits);
c12995a5 2742 evlist__delete(rec->evlist);
d65a458b 2743 symbol__exit();
ef149c25 2744 auxtrace_record__free(rec->itr);
39d17dac 2745 return err;
0e9b20b8 2746}
2dd6d8a1
AH
2747
2748static void snapshot_sig_handler(int sig __maybe_unused)
2749{
dc0c6127
JO
2750 struct record *rec = &record;
2751
5f9cf599
WN
2752 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2753 trigger_hit(&auxtrace_snapshot_trigger);
2754 auxtrace_record__snapshot_started = 1;
2755 if (auxtrace_record__snapshot_start(record.itr))
2756 trigger_error(&auxtrace_snapshot_trigger);
2757 }
3c1cb7e3 2758
dc0c6127 2759 if (switch_output_signal(rec))
3c1cb7e3 2760 trigger_hit(&switch_output_trigger);
2dd6d8a1 2761}
bfacbe3b
JO
2762
2763static void alarm_sig_handler(int sig __maybe_unused)
2764{
2765 struct record *rec = &record;
2766
2767 if (switch_output_time(rec))
2768 trigger_hit(&switch_output_trigger);
2769}