perf evsel: Dummy events never triggers, no need to ask for PERF_SAMPLE_BRANCH_STACK
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
abaff32a 2/*
bf9e1876
IM
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
abaff32a 8 */
16f762a2 9#include "builtin.h"
bf9e1876 10
6122e4e4 11#include "util/build-id.h"
4b6ab94e 12#include <subcmd/parse-options.h>
8ad8db37 13#include "util/parse-events.h"
41840d21 14#include "util/config.h"
6eda5838 15
8f651eae 16#include "util/callchain.h"
f14d5707 17#include "util/cgroup.h"
7c6a1c65 18#include "util/header.h"
66e274f3 19#include "util/event.h"
361c99a6 20#include "util/evlist.h"
69aad6f1 21#include "util/evsel.h"
8f28827a 22#include "util/debug.h"
e0fcfb08 23#include "util/mmap.h"
aeb00b1a 24#include "util/target.h"
94c744b6 25#include "util/session.h"
45694aa7 26#include "util/tool.h"
8d06367f 27#include "util/symbol.h"
aeb00b1a 28#include "util/record.h"
a12b51c4 29#include "util/cpumap.h"
fd78260b 30#include "util/thread_map.h"
f5fc1412 31#include "util/data.h"
bcc84ec6 32#include "util/perf_regs.h"
ef149c25 33#include "util/auxtrace.h"
46bc29b9 34#include "util/tsc.h"
f00898f4 35#include "util/parse-branch-options.h"
bcc84ec6 36#include "util/parse-regs-options.h"
40c7d246 37#include "util/perf_api_probe.h"
71dc2326 38#include "util/llvm-utils.h"
8690a2a7 39#include "util/bpf-loader.h"
5f9cf599 40#include "util/trigger.h"
a074865e 41#include "util/perf-hooks.h"
f13de660 42#include "util/cpu-set-sched.h"
ea49e01c 43#include "util/synthetic-events.h"
c5e4027e 44#include "util/time-utils.h"
58db1d6e 45#include "util/units.h"
7b612e29 46#include "util/bpf-event.h"
d99c22ea 47#include "util/util.h"
d8871ea7 48#include "asm/bug.h"
c1a604df 49#include "perf.h"
7c6a1c65 50
a43783ae 51#include <errno.h>
fd20e811 52#include <inttypes.h>
67230479 53#include <locale.h>
4208735d 54#include <poll.h>
d99c22ea 55#include <pthread.h>
97124d5e 56#include <unistd.h>
de9ac07b 57#include <sched.h>
9607ad3a 58#include <signal.h>
a41794cd 59#include <sys/mman.h>
4208735d 60#include <sys/wait.h>
eeb399b5
AH
61#include <sys/types.h>
62#include <sys/stat.h>
63#include <fcntl.h>
6ef81c55 64#include <linux/err.h>
8520a98d 65#include <linux/string.h>
0693e680 66#include <linux/time64.h>
d8f9da24 67#include <linux/zalloc.h>
8384a260 68#include <linux/bitmap.h>
78da39fa 69
1b43b704 70struct switch_output {
dc0c6127 71 bool enabled;
1b43b704 72 bool signal;
dc0c6127 73 unsigned long size;
bfacbe3b 74 unsigned long time;
cb4e1ebb
JO
75 const char *str;
76 bool set;
03724b2e
AK
77 char **filenames;
78 int num_files;
79 int cur_file;
1b43b704
JO
80};
81
8c6f45a7 82struct record {
45694aa7 83 struct perf_tool tool;
b4006796 84 struct record_opts opts;
d20deb64 85 u64 bytes_written;
8ceb41d7 86 struct perf_data data;
ef149c25 87 struct auxtrace_record *itr;
63503dba 88 struct evlist *evlist;
d20deb64 89 struct perf_session *session;
bc477d79 90 struct evlist *sb_evlist;
899e5ffb 91 pthread_t thread_id;
d20deb64 92 int realtime_prio;
899e5ffb 93 bool switch_output_event_set;
d20deb64 94 bool no_buildid;
d2db9a98 95 bool no_buildid_set;
d20deb64 96 bool no_buildid_cache;
d2db9a98 97 bool no_buildid_cache_set;
6156681b 98 bool buildid_all;
ecfd7a9c 99 bool timestamp_filename;
68588baf 100 bool timestamp_boundary;
1b43b704 101 struct switch_output switch_output;
9f065194 102 unsigned long long samples;
8384a260 103 struct mmap_cpu_mask affinity_mask;
6d575816 104 unsigned long output_max_size; /* = 0: unlimited */
0f82ebc4 105};
a21ca2ca 106
6d575816
JS
107static volatile int done;
108
dc0c6127
JO
109static volatile int auxtrace_record__snapshot_started;
110static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
111static DEFINE_TRIGGER(switch_output_trigger);
112
9d2ed645
AB
113static const char *affinity_tags[PERF_AFFINITY_MAX] = {
114 "SYS", "NODE", "CPU"
115};
116
dc0c6127
JO
117static bool switch_output_signal(struct record *rec)
118{
119 return rec->switch_output.signal &&
120 trigger_is_ready(&switch_output_trigger);
121}
122
123static bool switch_output_size(struct record *rec)
124{
125 return rec->switch_output.size &&
126 trigger_is_ready(&switch_output_trigger) &&
127 (rec->bytes_written >= rec->switch_output.size);
128}
129
bfacbe3b
JO
130static bool switch_output_time(struct record *rec)
131{
132 return rec->switch_output.time &&
133 trigger_is_ready(&switch_output_trigger);
134}
135
6d575816
JS
136static bool record__output_max_size_exceeded(struct record *rec)
137{
138 return rec->output_max_size &&
139 (rec->bytes_written >= rec->output_max_size);
140}
141
a5830532 142static int record__write(struct record *rec, struct mmap *map __maybe_unused,
ded2b8fe 143 void *bf, size_t size)
f5970550 144{
ded2b8fe
JO
145 struct perf_data_file *file = &rec->session->data->file;
146
147 if (perf_data_file__write(file, bf, size) < 0) {
50a9b868
JO
148 pr_err("failed to write perf data, error: %m\n");
149 return -1;
f5970550 150 }
8d3eca20 151
cf8b2e69 152 rec->bytes_written += size;
dc0c6127 153
6d575816
JS
154 if (record__output_max_size_exceeded(rec) && !done) {
155 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
156 " stopping session ]\n",
157 rec->bytes_written >> 10);
158 done = 1;
159 }
160
dc0c6127
JO
161 if (switch_output_size(rec))
162 trigger_hit(&switch_output_trigger);
163
8d3eca20 164 return 0;
f5970550
PZ
165}
166
ef781128
AB
167static int record__aio_enabled(struct record *rec);
168static int record__comp_enabled(struct record *rec);
5d7f4116
AB
169static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
170 void *src, size_t src_size);
171
d3d1af6f
AB
172#ifdef HAVE_AIO_SUPPORT
173static int record__aio_write(struct aiocb *cblock, int trace_fd,
174 void *buf, size_t size, off_t off)
175{
176 int rc;
177
178 cblock->aio_fildes = trace_fd;
179 cblock->aio_buf = buf;
180 cblock->aio_nbytes = size;
181 cblock->aio_offset = off;
182 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
183
184 do {
185 rc = aio_write(cblock);
186 if (rc == 0) {
187 break;
188 } else if (errno != EAGAIN) {
189 cblock->aio_fildes = -1;
190 pr_err("failed to queue perf data, error: %m\n");
191 break;
192 }
193 } while (1);
194
195 return rc;
196}
197
a5830532 198static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
d3d1af6f
AB
199{
200 void *rem_buf;
201 off_t rem_off;
202 size_t rem_size;
203 int rc, aio_errno;
204 ssize_t aio_ret, written;
205
206 aio_errno = aio_error(cblock);
207 if (aio_errno == EINPROGRESS)
208 return 0;
209
210 written = aio_ret = aio_return(cblock);
211 if (aio_ret < 0) {
212 if (aio_errno != EINTR)
213 pr_err("failed to write perf data, error: %m\n");
214 written = 0;
215 }
216
217 rem_size = cblock->aio_nbytes - written;
218
219 if (rem_size == 0) {
220 cblock->aio_fildes = -1;
221 /*
ef781128
AB
222 * md->refcount is incremented in record__aio_pushfn() for
223 * every aio write request started in record__aio_push() so
224 * decrement it because the request is now complete.
d3d1af6f 225 */
80e53d11 226 perf_mmap__put(&md->core);
d3d1af6f
AB
227 rc = 1;
228 } else {
229 /*
230 * aio write request may require restart with the
231 * reminder if the kernel didn't write whole
232 * chunk at once.
233 */
234 rem_off = cblock->aio_offset + written;
235 rem_buf = (void *)(cblock->aio_buf + written);
236 record__aio_write(cblock, cblock->aio_fildes,
237 rem_buf, rem_size, rem_off);
238 rc = 0;
239 }
240
241 return rc;
242}
243
a5830532 244static int record__aio_sync(struct mmap *md, bool sync_all)
d3d1af6f 245{
93f20c0f
AB
246 struct aiocb **aiocb = md->aio.aiocb;
247 struct aiocb *cblocks = md->aio.cblocks;
d3d1af6f 248 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
93f20c0f 249 int i, do_suspend;
d3d1af6f
AB
250
251 do {
93f20c0f
AB
252 do_suspend = 0;
253 for (i = 0; i < md->aio.nr_cblocks; ++i) {
254 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
255 if (sync_all)
256 aiocb[i] = NULL;
257 else
258 return i;
259 } else {
260 /*
261 * Started aio write is not complete yet
262 * so it has to be waited before the
263 * next allocation.
264 */
265 aiocb[i] = &cblocks[i];
266 do_suspend = 1;
267 }
268 }
269 if (!do_suspend)
270 return -1;
d3d1af6f 271
93f20c0f 272 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
d3d1af6f
AB
273 if (!(errno == EAGAIN || errno == EINTR))
274 pr_err("failed to sync perf data, error: %m\n");
275 }
276 } while (1);
277}
278
ef781128
AB
279struct record_aio {
280 struct record *rec;
281 void *data;
282 size_t size;
283};
284
a5830532 285static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
d3d1af6f 286{
ef781128 287 struct record_aio *aio = to;
d3d1af6f 288
ef781128 289 /*
547740f7 290 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
ef781128
AB
291 * to release space in the kernel buffer as fast as possible, calling
292 * perf_mmap__consume() from perf_mmap__push() function.
293 *
294 * That lets the kernel to proceed with storing more profiling data into
295 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
296 *
297 * Coping can be done in two steps in case the chunk of profiling data
298 * crosses the upper bound of the kernel buffer. In this case we first move
299 * part of data from map->start till the upper bound and then the reminder
300 * from the beginning of the kernel buffer till the end of the data chunk.
301 */
302
303 if (record__comp_enabled(aio->rec)) {
304 size = zstd_compress(aio->rec->session, aio->data + aio->size,
bf59b305 305 mmap__mmap_len(map) - aio->size,
ef781128
AB
306 buf, size);
307 } else {
308 memcpy(aio->data + aio->size, buf, size);
309 }
310
311 if (!aio->size) {
312 /*
313 * Increment map->refcount to guard map->aio.data[] buffer
314 * from premature deallocation because map object can be
315 * released earlier than aio write request started on
316 * map->aio.data[] buffer is complete.
317 *
318 * perf_mmap__put() is done at record__aio_complete()
319 * after started aio request completion or at record__aio_push()
320 * if the request failed to start.
321 */
e75710f0 322 perf_mmap__get(&map->core);
ef781128
AB
323 }
324
325 aio->size += size;
326
327 return size;
328}
329
a5830532 330static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
ef781128
AB
331{
332 int ret, idx;
333 int trace_fd = rec->session->data->file.fd;
334 struct record_aio aio = { .rec = rec, .size = 0 };
d3d1af6f 335
ef781128
AB
336 /*
337 * Call record__aio_sync() to wait till map->aio.data[] buffer
338 * becomes available after previous aio write operation.
339 */
340
341 idx = record__aio_sync(map, false);
342 aio.data = map->aio.data[idx];
343 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
344 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
345 return ret;
346
347 rec->samples++;
348 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
d3d1af6f 349 if (!ret) {
ef781128
AB
350 *off += aio.size;
351 rec->bytes_written += aio.size;
d3d1af6f
AB
352 if (switch_output_size(rec))
353 trigger_hit(&switch_output_trigger);
ef781128
AB
354 } else {
355 /*
356 * Decrement map->refcount incremented in record__aio_pushfn()
357 * back if record__aio_write() operation failed to start, otherwise
358 * map->refcount is decremented in record__aio_complete() after
359 * aio write operation finishes successfully.
360 */
80e53d11 361 perf_mmap__put(&map->core);
d3d1af6f
AB
362 }
363
364 return ret;
365}
366
367static off_t record__aio_get_pos(int trace_fd)
368{
369 return lseek(trace_fd, 0, SEEK_CUR);
370}
371
372static void record__aio_set_pos(int trace_fd, off_t pos)
373{
374 lseek(trace_fd, pos, SEEK_SET);
375}
376
377static void record__aio_mmap_read_sync(struct record *rec)
378{
379 int i;
63503dba 380 struct evlist *evlist = rec->evlist;
a5830532 381 struct mmap *maps = evlist->mmap;
d3d1af6f 382
ef781128 383 if (!record__aio_enabled(rec))
d3d1af6f
AB
384 return;
385
c976ee11 386 for (i = 0; i < evlist->core.nr_mmaps; i++) {
a5830532 387 struct mmap *map = &maps[i];
d3d1af6f 388
547740f7 389 if (map->core.base)
93f20c0f 390 record__aio_sync(map, true);
d3d1af6f
AB
391 }
392}
393
394static int nr_cblocks_default = 1;
93f20c0f 395static int nr_cblocks_max = 4;
d3d1af6f
AB
396
397static int record__aio_parse(const struct option *opt,
93f20c0f 398 const char *str,
d3d1af6f
AB
399 int unset)
400{
401 struct record_opts *opts = (struct record_opts *)opt->value;
402
93f20c0f 403 if (unset) {
d3d1af6f 404 opts->nr_cblocks = 0;
93f20c0f
AB
405 } else {
406 if (str)
407 opts->nr_cblocks = strtol(str, NULL, 0);
408 if (!opts->nr_cblocks)
409 opts->nr_cblocks = nr_cblocks_default;
410 }
d3d1af6f
AB
411
412 return 0;
413}
414#else /* HAVE_AIO_SUPPORT */
93f20c0f
AB
415static int nr_cblocks_max = 0;
416
a5830532 417static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
ef781128 418 off_t *off __maybe_unused)
d3d1af6f
AB
419{
420 return -1;
421}
422
423static off_t record__aio_get_pos(int trace_fd __maybe_unused)
424{
425 return -1;
426}
427
428static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
429{
430}
431
432static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
433{
434}
435#endif
436
437static int record__aio_enabled(struct record *rec)
438{
439 return rec->opts.nr_cblocks > 0;
440}
441
470530bb
AB
442#define MMAP_FLUSH_DEFAULT 1
443static int record__mmap_flush_parse(const struct option *opt,
444 const char *str,
445 int unset)
446{
447 int flush_max;
448 struct record_opts *opts = (struct record_opts *)opt->value;
449 static struct parse_tag tags[] = {
450 { .tag = 'B', .mult = 1 },
451 { .tag = 'K', .mult = 1 << 10 },
452 { .tag = 'M', .mult = 1 << 20 },
453 { .tag = 'G', .mult = 1 << 30 },
454 { .tag = 0 },
455 };
456
457 if (unset)
458 return 0;
459
460 if (str) {
461 opts->mmap_flush = parse_tag_value(str, tags);
462 if (opts->mmap_flush == (int)-1)
463 opts->mmap_flush = strtol(str, NULL, 0);
464 }
465
466 if (!opts->mmap_flush)
467 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
468
9521b5f2 469 flush_max = evlist__mmap_size(opts->mmap_pages);
470530bb
AB
470 flush_max /= 4;
471 if (opts->mmap_flush > flush_max)
472 opts->mmap_flush = flush_max;
473
474 return 0;
475}
476
504c1ad1
AB
477#ifdef HAVE_ZSTD_SUPPORT
478static unsigned int comp_level_default = 1;
479
480static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
481{
482 struct record_opts *opts = opt->value;
483
484 if (unset) {
485 opts->comp_level = 0;
486 } else {
487 if (str)
488 opts->comp_level = strtol(str, NULL, 0);
489 if (!opts->comp_level)
490 opts->comp_level = comp_level_default;
491 }
492
493 return 0;
494}
495#endif
51255a8a
AB
496static unsigned int comp_level_max = 22;
497
42e1fd80
AB
498static int record__comp_enabled(struct record *rec)
499{
500 return rec->opts.comp_level > 0;
501}
502
45694aa7 503static int process_synthesized_event(struct perf_tool *tool,
d20deb64 504 union perf_event *event,
1d037ca1
IT
505 struct perf_sample *sample __maybe_unused,
506 struct machine *machine __maybe_unused)
234fbbf5 507{
8c6f45a7 508 struct record *rec = container_of(tool, struct record, tool);
ded2b8fe 509 return record__write(rec, NULL, event, event->header.size);
234fbbf5
ACM
510}
511
d99c22ea
SE
512static int process_locked_synthesized_event(struct perf_tool *tool,
513 union perf_event *event,
514 struct perf_sample *sample __maybe_unused,
515 struct machine *machine __maybe_unused)
516{
517 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
518 int ret;
519
520 pthread_mutex_lock(&synth_lock);
521 ret = process_synthesized_event(tool, event, sample, machine);
522 pthread_mutex_unlock(&synth_lock);
523 return ret;
524}
525
a5830532 526static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
d37f1586
ACM
527{
528 struct record *rec = to;
529
5d7f4116 530 if (record__comp_enabled(rec)) {
bf59b305 531 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
5d7f4116
AB
532 bf = map->data;
533 }
534
d37f1586 535 rec->samples++;
ded2b8fe 536 return record__write(rec, map, bf, size);
d37f1586
ACM
537}
538
2dd6d8a1
AH
539static volatile int signr = -1;
540static volatile int child_finished;
c0bdc1c4 541
2dd6d8a1
AH
542static void sig_handler(int sig)
543{
544 if (sig == SIGCHLD)
545 child_finished = 1;
546 else
547 signr = sig;
548
549 done = 1;
550}
551
a074865e
WN
552static void sigsegv_handler(int sig)
553{
554 perf_hooks__recover();
555 sighandler_dump_stack(sig);
556}
557
2dd6d8a1
AH
558static void record__sig_exit(void)
559{
560 if (signr == -1)
561 return;
562
563 signal(signr, SIG_DFL);
564 raise(signr);
565}
566
e31f0d01
AH
567#ifdef HAVE_AUXTRACE_SUPPORT
568
ef149c25 569static int record__process_auxtrace(struct perf_tool *tool,
a5830532 570 struct mmap *map,
ef149c25
AH
571 union perf_event *event, void *data1,
572 size_t len1, void *data2, size_t len2)
573{
574 struct record *rec = container_of(tool, struct record, tool);
8ceb41d7 575 struct perf_data *data = &rec->data;
ef149c25
AH
576 size_t padding;
577 u8 pad[8] = {0};
578
46e201ef 579 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
99fa2984 580 off_t file_offset;
8ceb41d7 581 int fd = perf_data__fd(data);
99fa2984
AH
582 int err;
583
584 file_offset = lseek(fd, 0, SEEK_CUR);
585 if (file_offset == -1)
586 return -1;
587 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
588 event, file_offset);
589 if (err)
590 return err;
591 }
592
ef149c25
AH
593 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
594 padding = (len1 + len2) & 7;
595 if (padding)
596 padding = 8 - padding;
597
ded2b8fe
JO
598 record__write(rec, map, event, event->header.size);
599 record__write(rec, map, data1, len1);
ef149c25 600 if (len2)
ded2b8fe
JO
601 record__write(rec, map, data2, len2);
602 record__write(rec, map, &pad, padding);
ef149c25
AH
603
604 return 0;
605}
606
607static int record__auxtrace_mmap_read(struct record *rec,
a5830532 608 struct mmap *map)
ef149c25
AH
609{
610 int ret;
611
e035f4ca 612 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
ef149c25
AH
613 record__process_auxtrace);
614 if (ret < 0)
615 return ret;
616
617 if (ret)
618 rec->samples++;
619
620 return 0;
621}
622
2dd6d8a1 623static int record__auxtrace_mmap_read_snapshot(struct record *rec,
a5830532 624 struct mmap *map)
2dd6d8a1
AH
625{
626 int ret;
627
e035f4ca 628 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
2dd6d8a1
AH
629 record__process_auxtrace,
630 rec->opts.auxtrace_snapshot_size);
631 if (ret < 0)
632 return ret;
633
634 if (ret)
635 rec->samples++;
636
637 return 0;
638}
639
640static int record__auxtrace_read_snapshot_all(struct record *rec)
641{
642 int i;
643 int rc = 0;
644
c976ee11 645 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
a5830532 646 struct mmap *map = &rec->evlist->mmap[i];
2dd6d8a1 647
e035f4ca 648 if (!map->auxtrace_mmap.base)
2dd6d8a1
AH
649 continue;
650
e035f4ca 651 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
2dd6d8a1
AH
652 rc = -1;
653 goto out;
654 }
655 }
656out:
657 return rc;
658}
659
ce7b0e42 660static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
2dd6d8a1
AH
661{
662 pr_debug("Recording AUX area tracing snapshot\n");
663 if (record__auxtrace_read_snapshot_all(rec) < 0) {
5f9cf599 664 trigger_error(&auxtrace_snapshot_trigger);
2dd6d8a1 665 } else {
ce7b0e42 666 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
5f9cf599
WN
667 trigger_error(&auxtrace_snapshot_trigger);
668 else
669 trigger_ready(&auxtrace_snapshot_trigger);
2dd6d8a1
AH
670 }
671}
672
ce7b0e42
AS
673static int record__auxtrace_snapshot_exit(struct record *rec)
674{
675 if (trigger_is_error(&auxtrace_snapshot_trigger))
676 return 0;
677
678 if (!auxtrace_record__snapshot_started &&
679 auxtrace_record__snapshot_start(rec->itr))
680 return -1;
681
682 record__read_auxtrace_snapshot(rec, true);
683 if (trigger_is_error(&auxtrace_snapshot_trigger))
684 return -1;
685
686 return 0;
687}
688
4b5ea3bd
AH
689static int record__auxtrace_init(struct record *rec)
690{
691 int err;
692
693 if (!rec->itr) {
694 rec->itr = auxtrace_record__init(rec->evlist, &err);
695 if (err)
696 return err;
697 }
698
699 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
700 rec->opts.auxtrace_snapshot_opts);
701 if (err)
702 return err;
703
c0a6de06
AH
704 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
705 rec->opts.auxtrace_sample_opts);
706 if (err)
707 return err;
708
4b5ea3bd
AH
709 return auxtrace_parse_filters(rec->evlist);
710}
711
e31f0d01
AH
712#else
713
714static inline
715int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
a5830532 716 struct mmap *map __maybe_unused)
e31f0d01
AH
717{
718 return 0;
719}
720
2dd6d8a1 721static inline
ce7b0e42
AS
722void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
723 bool on_exit __maybe_unused)
de9ac07b 724{
f7b7c26e
PZ
725}
726
2dd6d8a1
AH
727static inline
728int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
f7b7c26e 729{
2dd6d8a1 730 return 0;
de9ac07b
PZ
731}
732
ce7b0e42
AS
733static inline
734int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
735{
736 return 0;
737}
738
4b5ea3bd
AH
739static int record__auxtrace_init(struct record *rec __maybe_unused)
740{
741 return 0;
742}
743
2dd6d8a1
AH
744#endif
745
eeb399b5
AH
746static bool record__kcore_readable(struct machine *machine)
747{
748 char kcore[PATH_MAX];
749 int fd;
750
751 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
752
753 fd = open(kcore, O_RDONLY);
754 if (fd < 0)
755 return false;
756
757 close(fd);
758
759 return true;
760}
761
762static int record__kcore_copy(struct machine *machine, struct perf_data *data)
763{
764 char from_dir[PATH_MAX];
765 char kcore_dir[PATH_MAX];
766 int ret;
767
768 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
769
770 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
771 if (ret)
772 return ret;
773
774 return kcore_copy(from_dir, kcore_dir);
775}
776
cda57a8c 777static int record__mmap_evlist(struct record *rec,
63503dba 778 struct evlist *evlist)
cda57a8c
WN
779{
780 struct record_opts *opts = &rec->opts;
c0a6de06
AH
781 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
782 opts->auxtrace_sample_mode;
cda57a8c
WN
783 char msg[512];
784
f13de660
AB
785 if (opts->affinity != PERF_AFFINITY_SYS)
786 cpu__setup_cpunode_map();
787
9521b5f2 788 if (evlist__mmap_ex(evlist, opts->mmap_pages,
cda57a8c 789 opts->auxtrace_mmap_pages,
c0a6de06 790 auxtrace_overwrite,
470530bb 791 opts->nr_cblocks, opts->affinity,
51255a8a 792 opts->mmap_flush, opts->comp_level) < 0) {
cda57a8c
WN
793 if (errno == EPERM) {
794 pr_err("Permission error mapping pages.\n"
795 "Consider increasing "
796 "/proc/sys/kernel/perf_event_mlock_kb,\n"
797 "or try again with a smaller value of -m/--mmap_pages.\n"
798 "(current value: %u,%u)\n",
799 opts->mmap_pages, opts->auxtrace_mmap_pages);
800 return -errno;
801 } else {
802 pr_err("failed to mmap with %d (%s)\n", errno,
c8b5f2c9 803 str_error_r(errno, msg, sizeof(msg)));
cda57a8c
WN
804 if (errno)
805 return -errno;
806 else
807 return -EINVAL;
808 }
809 }
810 return 0;
811}
812
813static int record__mmap(struct record *rec)
814{
815 return record__mmap_evlist(rec, rec->evlist);
816}
817
8c6f45a7 818static int record__open(struct record *rec)
dd7927f4 819{
d6195a6a 820 char msg[BUFSIZ];
32dcd021 821 struct evsel *pos;
63503dba 822 struct evlist *evlist = rec->evlist;
d20deb64 823 struct perf_session *session = rec->session;
b4006796 824 struct record_opts *opts = &rec->opts;
8d3eca20 825 int rc = 0;
dd7927f4 826
d3dbf43c
ACM
827 /*
828 * For initial_delay we need to add a dummy event so that we can track
829 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
830 * real events, the ones asked by the user.
831 */
832 if (opts->initial_delay) {
833 if (perf_evlist__add_dummy(evlist))
834 return -ENOMEM;
835
515dbe48 836 pos = evlist__first(evlist);
d3dbf43c 837 pos->tracking = 0;
515dbe48 838 pos = evlist__last(evlist);
d3dbf43c 839 pos->tracking = 1;
1fc632ce 840 pos->core.attr.enable_on_exec = 1;
d3dbf43c
ACM
841 }
842
e68ae9cf 843 perf_evlist__config(evlist, opts, &callchain_param);
cac21425 844
e5cadb93 845 evlist__for_each_entry(evlist, pos) {
dd7927f4 846try_again:
af663bd0 847 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
ae430892 848 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
bb963e16 849 if (verbose > 0)
c0a54341 850 ui__warning("%s\n", msg);
d6d901c2
ZY
851 goto try_again;
852 }
cf99ad14
AK
853 if ((errno == EINVAL || errno == EBADF) &&
854 pos->leader != pos &&
855 pos->weak_group) {
4804e011 856 pos = perf_evlist__reset_weak_group(evlist, pos, true);
cf99ad14
AK
857 goto try_again;
858 }
56e52e85 859 rc = -errno;
2bb72dbb 860 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
56e52e85 861 ui__error("%s\n", msg);
8d3eca20 862 goto out;
c171b552 863 }
bfd8f72c
AK
864
865 pos->supported = true;
c171b552 866 }
a43d3f08 867
c8b567c8
ACM
868 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
869 pr_warning(
870"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
871"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
872"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
873"file is not found in the buildid cache or in the vmlinux path.\n\n"
874"Samples in kernel modules won't be resolved at all.\n\n"
875"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
876"even with a suitable vmlinux or kallsyms file.\n\n");
877 }
878
23d4aad4 879 if (perf_evlist__apply_filters(evlist, &pos)) {
62d94b00 880 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
8ab2e96d 881 pos->filter, evsel__name(pos), errno,
c8b5f2c9 882 str_error_r(errno, msg, sizeof(msg)));
8d3eca20 883 rc = -1;
5d8bb1ec
MP
884 goto out;
885 }
886
cda57a8c
WN
887 rc = record__mmap(rec);
888 if (rc)
8d3eca20 889 goto out;
0a27d7f9 890
563aecb2 891 session->evlist = evlist;
7b56cce2 892 perf_session__set_id_hdr_size(session);
8d3eca20
DA
893out:
894 return rc;
16c8a109
PZ
895}
896
e3d59112
NK
897static int process_sample_event(struct perf_tool *tool,
898 union perf_event *event,
899 struct perf_sample *sample,
32dcd021 900 struct evsel *evsel,
e3d59112
NK
901 struct machine *machine)
902{
903 struct record *rec = container_of(tool, struct record, tool);
904
68588baf
JY
905 if (rec->evlist->first_sample_time == 0)
906 rec->evlist->first_sample_time = sample->time;
907
908 rec->evlist->last_sample_time = sample->time;
e3d59112 909
68588baf
JY
910 if (rec->buildid_all)
911 return 0;
912
913 rec->samples++;
e3d59112
NK
914 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
915}
916
8c6f45a7 917static int process_buildids(struct record *rec)
6122e4e4 918{
f5fc1412 919 struct perf_session *session = rec->session;
6122e4e4 920
45112e89 921 if (perf_data__size(&rec->data) == 0)
9f591fd7
ACM
922 return 0;
923
00dc8657
NK
924 /*
925 * During this process, it'll load kernel map and replace the
926 * dso->long_name to a real pathname it found. In this case
927 * we prefer the vmlinux path like
928 * /lib/modules/3.16.4/build/vmlinux
929 *
930 * rather than build-id path (in debug directory).
931 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
932 */
933 symbol_conf.ignore_vmlinux_buildid = true;
934
6156681b
NK
935 /*
936 * If --buildid-all is given, it marks all DSO regardless of hits,
68588baf
JY
937 * so no need to process samples. But if timestamp_boundary is enabled,
938 * it still needs to walk on all samples to get the timestamps of
939 * first/last samples.
6156681b 940 */
68588baf 941 if (rec->buildid_all && !rec->timestamp_boundary)
6156681b
NK
942 rec->tool.sample = NULL;
943
b7b61cbe 944 return perf_session__process_events(session);
6122e4e4
ACM
945}
946
8115d60c 947static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
948{
949 int err;
45694aa7 950 struct perf_tool *tool = data;
a1645ce1
ZY
951 /*
952 *As for guest kernel when processing subcommand record&report,
953 *we arrange module mmap prior to guest kernel mmap and trigger
954 *a preload dso because default guest module symbols are loaded
955 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
956 *method is used to avoid symbol missing when the first addr is
957 *in module instead of in guest kernel.
958 */
45694aa7 959 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 960 machine);
a1645ce1
ZY
961 if (err < 0)
962 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 963 " relocation symbol.\n", machine->pid);
a1645ce1 964
a1645ce1
ZY
965 /*
966 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
967 * have no _text sometimes.
968 */
45694aa7 969 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 970 machine);
a1645ce1
ZY
971 if (err < 0)
972 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 973 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
974}
975
98402807
FW
976static struct perf_event_header finished_round_event = {
977 .size = sizeof(struct perf_event_header),
978 .type = PERF_RECORD_FINISHED_ROUND,
979};
980
a5830532 981static void record__adjust_affinity(struct record *rec, struct mmap *map)
f13de660
AB
982{
983 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
8384a260
AB
984 !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
985 rec->affinity_mask.nbits)) {
986 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
987 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
988 map->affinity_mask.bits, rec->affinity_mask.nbits);
989 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
990 (cpu_set_t *)rec->affinity_mask.bits);
991 if (verbose == 2)
992 mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
f13de660
AB
993 }
994}
995
5d7f4116
AB
996static size_t process_comp_header(void *record, size_t increment)
997{
72932371 998 struct perf_record_compressed *event = record;
5d7f4116
AB
999 size_t size = sizeof(*event);
1000
1001 if (increment) {
1002 event->header.size += increment;
1003 return increment;
1004 }
1005
1006 event->header.type = PERF_RECORD_COMPRESSED;
1007 event->header.size = size;
1008
1009 return size;
1010}
1011
1012static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1013 void *src, size_t src_size)
1014{
1015 size_t compressed;
72932371 1016 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
5d7f4116
AB
1017
1018 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1019 max_record_size, process_comp_header);
1020
1021 session->bytes_transferred += src_size;
1022 session->bytes_compressed += compressed;
1023
1024 return compressed;
1025}
1026
63503dba 1027static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
470530bb 1028 bool overwrite, bool synch)
98402807 1029{
dcabb507 1030 u64 bytes_written = rec->bytes_written;
0e2e63dd 1031 int i;
8d3eca20 1032 int rc = 0;
a5830532 1033 struct mmap *maps;
d3d1af6f 1034 int trace_fd = rec->data.file.fd;
ef781128 1035 off_t off = 0;
98402807 1036
cb21686b
WN
1037 if (!evlist)
1038 return 0;
ef149c25 1039
0b72d69a 1040 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
a4ea0ec4
WN
1041 if (!maps)
1042 return 0;
1043
0b72d69a 1044 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
54cc54de
WN
1045 return 0;
1046
d3d1af6f
AB
1047 if (record__aio_enabled(rec))
1048 off = record__aio_get_pos(trace_fd);
1049
c976ee11 1050 for (i = 0; i < evlist->core.nr_mmaps; i++) {
470530bb 1051 u64 flush = 0;
a5830532 1052 struct mmap *map = &maps[i];
cb21686b 1053
547740f7 1054 if (map->core.base) {
f13de660 1055 record__adjust_affinity(rec, map);
470530bb 1056 if (synch) {
65aa2e6b
JO
1057 flush = map->core.flush;
1058 map->core.flush = 1;
470530bb 1059 }
d3d1af6f 1060 if (!record__aio_enabled(rec)) {
ef781128 1061 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
470530bb 1062 if (synch)
65aa2e6b 1063 map->core.flush = flush;
d3d1af6f
AB
1064 rc = -1;
1065 goto out;
1066 }
1067 } else {
ef781128 1068 if (record__aio_push(rec, map, &off) < 0) {
d3d1af6f 1069 record__aio_set_pos(trace_fd, off);
470530bb 1070 if (synch)
65aa2e6b 1071 map->core.flush = flush;
d3d1af6f
AB
1072 rc = -1;
1073 goto out;
1074 }
8d3eca20 1075 }
470530bb 1076 if (synch)
65aa2e6b 1077 map->core.flush = flush;
8d3eca20 1078 }
ef149c25 1079
e035f4ca 1080 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
c0a6de06 1081 !rec->opts.auxtrace_sample_mode &&
e035f4ca 1082 record__auxtrace_mmap_read(rec, map) != 0) {
ef149c25
AH
1083 rc = -1;
1084 goto out;
1085 }
98402807
FW
1086 }
1087
d3d1af6f
AB
1088 if (record__aio_enabled(rec))
1089 record__aio_set_pos(trace_fd, off);
1090
dcabb507
JO
1091 /*
1092 * Mark the round finished in case we wrote
1093 * at least one event.
1094 */
1095 if (bytes_written != rec->bytes_written)
ded2b8fe 1096 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
8d3eca20 1097
0b72d69a 1098 if (overwrite)
54cc54de 1099 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
8d3eca20
DA
1100out:
1101 return rc;
98402807
FW
1102}
1103
470530bb 1104static int record__mmap_read_all(struct record *rec, bool synch)
cb21686b
WN
1105{
1106 int err;
1107
470530bb 1108 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
cb21686b
WN
1109 if (err)
1110 return err;
1111
470530bb 1112 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
cb21686b
WN
1113}
1114
8c6f45a7 1115static void record__init_features(struct record *rec)
57706abc 1116{
57706abc
DA
1117 struct perf_session *session = rec->session;
1118 int feat;
1119
1120 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1121 perf_header__set_feat(&session->header, feat);
1122
1123 if (rec->no_buildid)
1124 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1125
ce9036a6 1126 if (!have_tracepoints(&rec->evlist->core.entries))
57706abc
DA
1127 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1128
1129 if (!rec->opts.branch_stack)
1130 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
ef149c25
AH
1131
1132 if (!rec->opts.full_auxtrace)
1133 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
ffa517ad 1134
cf790516
AB
1135 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1136 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1137
258031c0 1138 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
42e1fd80
AB
1139 if (!record__comp_enabled(rec))
1140 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
258031c0 1141
ffa517ad 1142 perf_header__clear_feat(&session->header, HEADER_STAT);
57706abc
DA
1143}
1144
e1ab48ba
WN
1145static void
1146record__finish_output(struct record *rec)
1147{
8ceb41d7
JO
1148 struct perf_data *data = &rec->data;
1149 int fd = perf_data__fd(data);
e1ab48ba 1150
8ceb41d7 1151 if (data->is_pipe)
e1ab48ba
WN
1152 return;
1153
1154 rec->session->header.data_size += rec->bytes_written;
45112e89 1155 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
e1ab48ba
WN
1156
1157 if (!rec->no_buildid) {
1158 process_buildids(rec);
1159
1160 if (rec->buildid_all)
1161 dsos__hit_all(rec->session);
1162 }
1163 perf_session__write_header(rec->session, rec->evlist, fd, true);
1164
1165 return;
1166}
1167
4ea648ae 1168static int record__synthesize_workload(struct record *rec, bool tail)
be7b0c9e 1169{
9d6aae72 1170 int err;
9749b90e 1171 struct perf_thread_map *thread_map;
be7b0c9e 1172
4ea648ae
WN
1173 if (rec->opts.tail_synthesize != tail)
1174 return 0;
1175
9d6aae72
ACM
1176 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1177 if (thread_map == NULL)
1178 return -1;
1179
1180 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
be7b0c9e
WN
1181 process_synthesized_event,
1182 &rec->session->machines.host,
3fcb10e4 1183 rec->opts.sample_address);
7836e52e 1184 perf_thread_map__put(thread_map);
9d6aae72 1185 return err;
be7b0c9e
WN
1186}
1187
4ea648ae 1188static int record__synthesize(struct record *rec, bool tail);
3c1cb7e3 1189
ecfd7a9c
WN
1190static int
1191record__switch_output(struct record *rec, bool at_exit)
1192{
8ceb41d7 1193 struct perf_data *data = &rec->data;
ecfd7a9c 1194 int fd, err;
03724b2e 1195 char *new_filename;
ecfd7a9c
WN
1196
1197 /* Same Size: "2015122520103046"*/
1198 char timestamp[] = "InvalidTimestamp";
1199
d3d1af6f
AB
1200 record__aio_mmap_read_sync(rec);
1201
4ea648ae
WN
1202 record__synthesize(rec, true);
1203 if (target__none(&rec->opts.target))
1204 record__synthesize_workload(rec, true);
1205
ecfd7a9c
WN
1206 rec->samples = 0;
1207 record__finish_output(rec);
1208 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1209 if (err) {
1210 pr_err("Failed to get current timestamp\n");
1211 return -EINVAL;
1212 }
1213
8ceb41d7 1214 fd = perf_data__switch(data, timestamp,
ecfd7a9c 1215 rec->session->header.data_offset,
03724b2e 1216 at_exit, &new_filename);
ecfd7a9c
WN
1217 if (fd >= 0 && !at_exit) {
1218 rec->bytes_written = 0;
1219 rec->session->header.data_size = 0;
1220 }
1221
1222 if (!quiet)
1223 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
2d4f2799 1224 data->path, timestamp);
3c1cb7e3 1225
03724b2e
AK
1226 if (rec->switch_output.num_files) {
1227 int n = rec->switch_output.cur_file + 1;
1228
1229 if (n >= rec->switch_output.num_files)
1230 n = 0;
1231 rec->switch_output.cur_file = n;
1232 if (rec->switch_output.filenames[n]) {
1233 remove(rec->switch_output.filenames[n]);
d8f9da24 1234 zfree(&rec->switch_output.filenames[n]);
03724b2e
AK
1235 }
1236 rec->switch_output.filenames[n] = new_filename;
1237 } else {
1238 free(new_filename);
1239 }
1240
3c1cb7e3 1241 /* Output tracking events */
be7b0c9e 1242 if (!at_exit) {
4ea648ae 1243 record__synthesize(rec, false);
3c1cb7e3 1244
be7b0c9e
WN
1245 /*
1246 * In 'perf record --switch-output' without -a,
1247 * record__synthesize() in record__switch_output() won't
1248 * generate tracking events because there's no thread_map
1249 * in evlist. Which causes newly created perf.data doesn't
1250 * contain map and comm information.
1251 * Create a fake thread_map and directly call
1252 * perf_event__synthesize_thread_map() for those events.
1253 */
1254 if (target__none(&rec->opts.target))
4ea648ae 1255 record__synthesize_workload(rec, false);
be7b0c9e 1256 }
ecfd7a9c
WN
1257 return fd;
1258}
1259
f33cbe72
ACM
1260static volatile int workload_exec_errno;
1261
1262/*
1263 * perf_evlist__prepare_workload will send a SIGUSR1
1264 * if the fork fails, since we asked by setting its
1265 * want_signal to true.
1266 */
45604710
NK
1267static void workload_exec_failed_signal(int signo __maybe_unused,
1268 siginfo_t *info,
f33cbe72
ACM
1269 void *ucontext __maybe_unused)
1270{
1271 workload_exec_errno = info->si_value.sival_int;
1272 done = 1;
f33cbe72
ACM
1273 child_finished = 1;
1274}
1275
2dd6d8a1 1276static void snapshot_sig_handler(int sig);
bfacbe3b 1277static void alarm_sig_handler(int sig);
2dd6d8a1 1278
ee667f94 1279static const struct perf_event_mmap_page *
63503dba 1280perf_evlist__pick_pc(struct evlist *evlist)
ee667f94 1281{
b2cb615d 1282 if (evlist) {
547740f7
JO
1283 if (evlist->mmap && evlist->mmap[0].core.base)
1284 return evlist->mmap[0].core.base;
1285 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1286 return evlist->overwrite_mmap[0].core.base;
b2cb615d 1287 }
ee667f94
WN
1288 return NULL;
1289}
1290
c45628b0
WN
1291static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1292{
ee667f94
WN
1293 const struct perf_event_mmap_page *pc;
1294
1295 pc = perf_evlist__pick_pc(rec->evlist);
1296 if (pc)
1297 return pc;
c45628b0
WN
1298 return NULL;
1299}
1300
4ea648ae 1301static int record__synthesize(struct record *rec, bool tail)
c45c86eb
WN
1302{
1303 struct perf_session *session = rec->session;
1304 struct machine *machine = &session->machines.host;
8ceb41d7 1305 struct perf_data *data = &rec->data;
c45c86eb
WN
1306 struct record_opts *opts = &rec->opts;
1307 struct perf_tool *tool = &rec->tool;
8ceb41d7 1308 int fd = perf_data__fd(data);
c45c86eb 1309 int err = 0;
d99c22ea 1310 event_op f = process_synthesized_event;
c45c86eb 1311
4ea648ae
WN
1312 if (rec->opts.tail_synthesize != tail)
1313 return 0;
1314
8ceb41d7 1315 if (data->is_pipe) {
a2015516
JO
1316 /*
1317 * We need to synthesize events first, because some
1318 * features works on top of them (on report side).
1319 */
318ec184 1320 err = perf_event__synthesize_attrs(tool, rec->evlist,
c45c86eb
WN
1321 process_synthesized_event);
1322 if (err < 0) {
1323 pr_err("Couldn't synthesize attrs.\n");
1324 goto out;
1325 }
1326
a2015516
JO
1327 err = perf_event__synthesize_features(tool, session, rec->evlist,
1328 process_synthesized_event);
1329 if (err < 0) {
1330 pr_err("Couldn't synthesize features.\n");
1331 return err;
1332 }
1333
ce9036a6 1334 if (have_tracepoints(&rec->evlist->core.entries)) {
c45c86eb
WN
1335 /*
1336 * FIXME err <= 0 here actually means that
1337 * there were no tracepoints so its not really
1338 * an error, just that we don't need to
1339 * synthesize anything. We really have to
1340 * return this more properly and also
1341 * propagate errors that now are calling die()
1342 */
1343 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1344 process_synthesized_event);
1345 if (err <= 0) {
1346 pr_err("Couldn't record tracing data.\n");
1347 goto out;
1348 }
1349 rec->bytes_written += err;
1350 }
1351 }
1352
c45628b0 1353 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
46bc29b9
AH
1354 process_synthesized_event, machine);
1355 if (err)
1356 goto out;
1357
c0a6de06
AH
1358 /* Synthesize id_index before auxtrace_info */
1359 if (rec->opts.auxtrace_sample_mode) {
1360 err = perf_event__synthesize_id_index(tool,
1361 process_synthesized_event,
1362 session->evlist, machine);
1363 if (err)
1364 goto out;
1365 }
1366
c45c86eb
WN
1367 if (rec->opts.full_auxtrace) {
1368 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1369 session, process_synthesized_event);
1370 if (err)
1371 goto out;
1372 }
1373
6c443954
ACM
1374 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1375 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1376 machine);
1377 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1378 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1379 "Check /proc/kallsyms permission or run as root.\n");
1380
1381 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1382 machine);
1383 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1384 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1385 "Check /proc/modules permission or run as root.\n");
1386 }
c45c86eb
WN
1387
1388 if (perf_guest) {
1389 machines__process_guests(&session->machines,
1390 perf_event__synthesize_guest_os, tool);
1391 }
1392
bfd8f72c
AK
1393 err = perf_event__synthesize_extra_attr(&rec->tool,
1394 rec->evlist,
1395 process_synthesized_event,
1396 data->is_pipe);
1397 if (err)
1398 goto out;
1399
03617c22 1400 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
373565d2
AK
1401 process_synthesized_event,
1402 NULL);
1403 if (err < 0) {
1404 pr_err("Couldn't synthesize thread map.\n");
1405 return err;
1406 }
1407
f72f901d 1408 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
373565d2
AK
1409 process_synthesized_event, NULL);
1410 if (err < 0) {
1411 pr_err("Couldn't synthesize cpu map.\n");
1412 return err;
1413 }
1414
e5416950 1415 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
7b612e29
SL
1416 machine, opts);
1417 if (err < 0)
1418 pr_warning("Couldn't synthesize bpf events.\n");
1419
ab64069f
NK
1420 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1421 machine);
1422 if (err < 0)
1423 pr_warning("Couldn't synthesize cgroup events.\n");
1424
d99c22ea
SE
1425 if (rec->opts.nr_threads_synthesize > 1) {
1426 perf_set_multithreaded();
1427 f = process_locked_synthesized_event;
1428 }
1429
03617c22 1430 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
d99c22ea
SE
1431 f, opts->sample_address,
1432 rec->opts.nr_threads_synthesize);
1433
1434 if (rec->opts.nr_threads_synthesize > 1)
1435 perf_set_singlethreaded();
1436
c45c86eb
WN
1437out:
1438 return err;
1439}
1440
899e5ffb
ACM
1441static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1442{
1443 struct record *rec = data;
1444 pthread_kill(rec->thread_id, SIGUSR2);
1445 return 0;
1446}
1447
23cbb41c
ACM
1448static int record__setup_sb_evlist(struct record *rec)
1449{
1450 struct record_opts *opts = &rec->opts;
1451
1452 if (rec->sb_evlist != NULL) {
1453 /*
1454 * We get here if --switch-output-event populated the
1455 * sb_evlist, so associate a callback that will send a SIGUSR2
1456 * to the main thread.
1457 */
1458 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1459 rec->thread_id = pthread_self();
1460 }
1461
1462 if (!opts->no_bpf_event) {
1463 if (rec->sb_evlist == NULL) {
1464 rec->sb_evlist = evlist__new();
1465
1466 if (rec->sb_evlist == NULL) {
1467 pr_err("Couldn't create side band evlist.\n.");
1468 return -1;
1469 }
1470 }
1471
1472 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1473 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1474 return -1;
1475 }
1476 }
1477
1478 if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1479 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1480 opts->no_bpf_event = true;
1481 }
1482
1483 return 0;
1484}
1485
8c6f45a7 1486static int __cmd_record(struct record *rec, int argc, const char **argv)
16c8a109 1487{
57706abc 1488 int err;
45604710 1489 int status = 0;
8b412664 1490 unsigned long waking = 0;
46be604b 1491 const bool forks = argc > 0;
45694aa7 1492 struct perf_tool *tool = &rec->tool;
b4006796 1493 struct record_opts *opts = &rec->opts;
8ceb41d7 1494 struct perf_data *data = &rec->data;
d20deb64 1495 struct perf_session *session;
6dcf45ef 1496 bool disabled = false, draining = false;
42aa276f 1497 int fd;
d3c8c08e 1498 float ratio = 0;
de9ac07b 1499
45604710 1500 atexit(record__sig_exit);
f5970550
PZ
1501 signal(SIGCHLD, sig_handler);
1502 signal(SIGINT, sig_handler);
804f7ac7 1503 signal(SIGTERM, sig_handler);
a074865e 1504 signal(SIGSEGV, sigsegv_handler);
c0bdc1c4 1505
f3b3614a
HB
1506 if (rec->opts.record_namespaces)
1507 tool->namespace_events = true;
1508
8fb4b679
NK
1509 if (rec->opts.record_cgroup) {
1510#ifdef HAVE_FILE_HANDLE
1511 tool->cgroup_events = true;
1512#else
1513 pr_err("cgroup tracking is not supported\n");
1514 return -1;
1515#endif
1516 }
1517
dc0c6127 1518 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2dd6d8a1 1519 signal(SIGUSR2, snapshot_sig_handler);
3c1cb7e3
WN
1520 if (rec->opts.auxtrace_snapshot_mode)
1521 trigger_on(&auxtrace_snapshot_trigger);
dc0c6127 1522 if (rec->switch_output.enabled)
3c1cb7e3 1523 trigger_on(&switch_output_trigger);
c0bdc1c4 1524 } else {
2dd6d8a1 1525 signal(SIGUSR2, SIG_IGN);
c0bdc1c4 1526 }
f5970550 1527
8ceb41d7 1528 session = perf_session__new(data, false, tool);
6ef81c55 1529 if (IS_ERR(session)) {
ffa91880 1530 pr_err("Perf session creation failed.\n");
6ef81c55 1531 return PTR_ERR(session);
a9a70bbc
ACM
1532 }
1533
8ceb41d7 1534 fd = perf_data__fd(data);
d20deb64
ACM
1535 rec->session = session;
1536
5d7f4116
AB
1537 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1538 pr_err("Compression initialization failed.\n");
1539 return -1;
1540 }
1541
1542 session->header.env.comp_type = PERF_COMP_ZSTD;
1543 session->header.env.comp_level = rec->opts.comp_level;
1544
eeb399b5
AH
1545 if (rec->opts.kcore &&
1546 !record__kcore_readable(&session->machines.host)) {
1547 pr_err("ERROR: kcore is not readable.\n");
1548 return -1;
1549 }
1550
8c6f45a7 1551 record__init_features(rec);
330aa675 1552
cf790516
AB
1553 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1554 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1555
d4db3f16 1556 if (forks) {
3e2be2da 1557 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
8ceb41d7 1558 argv, data->is_pipe,
735f7e0b 1559 workload_exec_failed_signal);
35b9d88e
ACM
1560 if (err < 0) {
1561 pr_err("Couldn't run the workload!\n");
45604710 1562 status = err;
35b9d88e 1563 goto out_delete_session;
856e9660 1564 }
856e9660
PZ
1565 }
1566
ad46e48c
JO
1567 /*
1568 * If we have just single event and are sending data
1569 * through pipe, we need to force the ids allocation,
1570 * because we synthesize event name through the pipe
1571 * and need the id for that.
1572 */
6484d2f9 1573 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
ad46e48c
JO
1574 rec->opts.sample_id = true;
1575
8c6f45a7 1576 if (record__open(rec) != 0) {
8d3eca20 1577 err = -1;
45604710 1578 goto out_child;
8d3eca20 1579 }
f6fa4375 1580 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
de9ac07b 1581
eeb399b5
AH
1582 if (rec->opts.kcore) {
1583 err = record__kcore_copy(&session->machines.host, data);
1584 if (err) {
1585 pr_err("ERROR: Failed to copy kcore\n");
1586 goto out_child;
1587 }
1588 }
1589
8690a2a7
WN
1590 err = bpf__apply_obj_config();
1591 if (err) {
1592 char errbuf[BUFSIZ];
1593
1594 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1595 pr_err("ERROR: Apply config to BPF failed: %s\n",
1596 errbuf);
1597 goto out_child;
1598 }
1599
cca8482c
AH
1600 /*
1601 * Normally perf_session__new would do this, but it doesn't have the
1602 * evlist.
1603 */
1604 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1605 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1606 rec->tool.ordered_events = false;
1607 }
1608
3e2be2da 1609 if (!rec->evlist->nr_groups)
a8bb559b
NK
1610 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1611
8ceb41d7 1612 if (data->is_pipe) {
42aa276f 1613 err = perf_header__write_pipe(fd);
529870e3 1614 if (err < 0)
45604710 1615 goto out_child;
563aecb2 1616 } else {
42aa276f 1617 err = perf_session__write_header(session, rec->evlist, fd, false);
d5eed904 1618 if (err < 0)
45604710 1619 goto out_child;
56b03f3c
ACM
1620 }
1621
b38d85ef 1622 err = -1;
d3665498 1623 if (!rec->no_buildid
e20960c0 1624 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 1625 pr_err("Couldn't generate buildids. "
e20960c0 1626 "Use --no-buildid to profile anyway.\n");
45604710 1627 goto out_child;
e20960c0
RR
1628 }
1629
23cbb41c
ACM
1630 err = record__setup_sb_evlist(rec);
1631 if (err)
1632 goto out_child;
657ee553 1633
4ea648ae 1634 err = record__synthesize(rec, false);
c45c86eb 1635 if (err < 0)
45604710 1636 goto out_child;
8d3eca20 1637
d20deb64 1638 if (rec->realtime_prio) {
de9ac07b
PZ
1639 struct sched_param param;
1640
d20deb64 1641 param.sched_priority = rec->realtime_prio;
de9ac07b 1642 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 1643 pr_err("Could not set realtime priority.\n");
8d3eca20 1644 err = -1;
45604710 1645 goto out_child;
de9ac07b
PZ
1646 }
1647 }
1648
774cb499
JO
1649 /*
1650 * When perf is starting the traced process, all the events
1651 * (apart from group members) have enable_on_exec=1 set,
1652 * so don't spoil it by prematurely enabling them.
1653 */
6619a53e 1654 if (!target__none(&opts->target) && !opts->initial_delay)
1c87f165 1655 evlist__enable(rec->evlist);
764e16a3 1656
856e9660
PZ
1657 /*
1658 * Let the child rip
1659 */
e803cf97 1660 if (forks) {
20a8a3cf 1661 struct machine *machine = &session->machines.host;
e5bed564 1662 union perf_event *event;
e907caf3 1663 pid_t tgid;
e5bed564
NK
1664
1665 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1666 if (event == NULL) {
1667 err = -ENOMEM;
1668 goto out_child;
1669 }
1670
e803cf97
NK
1671 /*
1672 * Some H/W events are generated before COMM event
1673 * which is emitted during exec(), so perf script
1674 * cannot see a correct process name for those events.
1675 * Synthesize COMM event to prevent it.
1676 */
e907caf3
HB
1677 tgid = perf_event__synthesize_comm(tool, event,
1678 rec->evlist->workload.pid,
1679 process_synthesized_event,
1680 machine);
1681 free(event);
1682
1683 if (tgid == -1)
1684 goto out_child;
1685
1686 event = malloc(sizeof(event->namespaces) +
1687 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1688 machine->id_hdr_size);
1689 if (event == NULL) {
1690 err = -ENOMEM;
1691 goto out_child;
1692 }
1693
1694 /*
1695 * Synthesize NAMESPACES event for the command specified.
1696 */
1697 perf_event__synthesize_namespaces(tool, event,
1698 rec->evlist->workload.pid,
1699 tgid, process_synthesized_event,
1700 machine);
e5bed564 1701 free(event);
e803cf97 1702
3e2be2da 1703 perf_evlist__start_workload(rec->evlist);
e803cf97 1704 }
856e9660 1705
6619a53e 1706 if (opts->initial_delay) {
0693e680 1707 usleep(opts->initial_delay * USEC_PER_MSEC);
1c87f165 1708 evlist__enable(rec->evlist);
6619a53e
AK
1709 }
1710
5f9cf599 1711 trigger_ready(&auxtrace_snapshot_trigger);
3c1cb7e3 1712 trigger_ready(&switch_output_trigger);
a074865e 1713 perf_hooks__invoke_record_start();
649c48a9 1714 for (;;) {
9f065194 1715 unsigned long long hits = rec->samples;
de9ac07b 1716
05737464
WN
1717 /*
1718 * rec->evlist->bkw_mmap_state is possible to be
1719 * BKW_MMAP_EMPTY here: when done == true and
1720 * hits != rec->samples in previous round.
1721 *
1722 * perf_evlist__toggle_bkw_mmap ensure we never
1723 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1724 */
1725 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1726 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1727
470530bb 1728 if (record__mmap_read_all(rec, false) < 0) {
5f9cf599 1729 trigger_error(&auxtrace_snapshot_trigger);
3c1cb7e3 1730 trigger_error(&switch_output_trigger);
8d3eca20 1731 err = -1;
45604710 1732 goto out_child;
8d3eca20 1733 }
de9ac07b 1734
2dd6d8a1
AH
1735 if (auxtrace_record__snapshot_started) {
1736 auxtrace_record__snapshot_started = 0;
5f9cf599 1737 if (!trigger_is_error(&auxtrace_snapshot_trigger))
ce7b0e42 1738 record__read_auxtrace_snapshot(rec, false);
5f9cf599 1739 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2dd6d8a1
AH
1740 pr_err("AUX area tracing snapshot failed\n");
1741 err = -1;
1742 goto out_child;
1743 }
1744 }
1745
3c1cb7e3 1746 if (trigger_is_hit(&switch_output_trigger)) {
05737464
WN
1747 /*
1748 * If switch_output_trigger is hit, the data in
1749 * overwritable ring buffer should have been collected,
1750 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1751 *
1752 * If SIGUSR2 raise after or during record__mmap_read_all(),
1753 * record__mmap_read_all() didn't collect data from
1754 * overwritable ring buffer. Read again.
1755 */
1756 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1757 continue;
3c1cb7e3
WN
1758 trigger_ready(&switch_output_trigger);
1759
05737464
WN
1760 /*
1761 * Reenable events in overwrite ring buffer after
1762 * record__mmap_read_all(): we should have collected
1763 * data from it.
1764 */
1765 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1766
3c1cb7e3
WN
1767 if (!quiet)
1768 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1769 waking);
1770 waking = 0;
1771 fd = record__switch_output(rec, false);
1772 if (fd < 0) {
1773 pr_err("Failed to switch to new file\n");
1774 trigger_error(&switch_output_trigger);
1775 err = fd;
1776 goto out_child;
1777 }
bfacbe3b
JO
1778
1779 /* re-arm the alarm */
1780 if (rec->switch_output.time)
1781 alarm(rec->switch_output.time);
3c1cb7e3
WN
1782 }
1783
d20deb64 1784 if (hits == rec->samples) {
6dcf45ef 1785 if (done || draining)
649c48a9 1786 break;
80ab2987 1787 err = evlist__poll(rec->evlist, -1);
a515114f
JO
1788 /*
1789 * Propagate error, only if there's any. Ignore positive
1790 * number of returned events and interrupt error.
1791 */
1792 if (err > 0 || (err < 0 && errno == EINTR))
45604710 1793 err = 0;
8b412664 1794 waking++;
6dcf45ef 1795
f4009e7b 1796 if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
6dcf45ef 1797 draining = true;
8b412664
PZ
1798 }
1799
774cb499
JO
1800 /*
1801 * When perf is starting the traced process, at the end events
1802 * die with the process and we wait for that. Thus no need to
1803 * disable events in this case.
1804 */
602ad878 1805 if (done && !disabled && !target__none(&opts->target)) {
5f9cf599 1806 trigger_off(&auxtrace_snapshot_trigger);
e74676de 1807 evlist__disable(rec->evlist);
2711926a
JO
1808 disabled = true;
1809 }
de9ac07b 1810 }
ce7b0e42 1811
5f9cf599 1812 trigger_off(&auxtrace_snapshot_trigger);
3c1cb7e3 1813 trigger_off(&switch_output_trigger);
de9ac07b 1814
ce7b0e42
AS
1815 if (opts->auxtrace_snapshot_on_exit)
1816 record__auxtrace_snapshot_exit(rec);
1817
f33cbe72 1818 if (forks && workload_exec_errno) {
35550da3 1819 char msg[STRERR_BUFSIZE];
c8b5f2c9 1820 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
f33cbe72
ACM
1821 pr_err("Workload failed: %s\n", emsg);
1822 err = -1;
45604710 1823 goto out_child;
f33cbe72
ACM
1824 }
1825
e3d59112 1826 if (!quiet)
45604710 1827 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
b44308f5 1828
4ea648ae
WN
1829 if (target__none(&rec->opts.target))
1830 record__synthesize_workload(rec, true);
1831
45604710 1832out_child:
470530bb 1833 record__mmap_read_all(rec, true);
d3d1af6f
AB
1834 record__aio_mmap_read_sync(rec);
1835
d3c8c08e
AB
1836 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1837 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1838 session->header.env.comp_ratio = ratio + 0.5;
1839 }
1840
45604710
NK
1841 if (forks) {
1842 int exit_status;
addc2785 1843
45604710
NK
1844 if (!child_finished)
1845 kill(rec->evlist->workload.pid, SIGTERM);
1846
1847 wait(&exit_status);
1848
1849 if (err < 0)
1850 status = err;
1851 else if (WIFEXITED(exit_status))
1852 status = WEXITSTATUS(exit_status);
1853 else if (WIFSIGNALED(exit_status))
1854 signr = WTERMSIG(exit_status);
1855 } else
1856 status = err;
1857
4ea648ae 1858 record__synthesize(rec, true);
e3d59112
NK
1859 /* this will be recalculated during process_buildids() */
1860 rec->samples = 0;
1861
ecfd7a9c
WN
1862 if (!err) {
1863 if (!rec->timestamp_filename) {
1864 record__finish_output(rec);
1865 } else {
1866 fd = record__switch_output(rec, true);
1867 if (fd < 0) {
1868 status = fd;
1869 goto out_delete_session;
1870 }
1871 }
1872 }
39d17dac 1873
a074865e
WN
1874 perf_hooks__invoke_record_end();
1875
e3d59112
NK
1876 if (!err && !quiet) {
1877 char samples[128];
ecfd7a9c
WN
1878 const char *postfix = rec->timestamp_filename ?
1879 ".<timestamp>" : "";
e3d59112 1880
ef149c25 1881 if (rec->samples && !rec->opts.full_auxtrace)
e3d59112
NK
1882 scnprintf(samples, sizeof(samples),
1883 " (%" PRIu64 " samples)", rec->samples);
1884 else
1885 samples[0] = '\0';
1886
d3c8c08e 1887 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
8ceb41d7 1888 perf_data__size(data) / 1024.0 / 1024.0,
2d4f2799 1889 data->path, postfix, samples);
d3c8c08e
AB
1890 if (ratio) {
1891 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1892 rec->session->bytes_transferred / 1024.0 / 1024.0,
1893 ratio);
1894 }
1895 fprintf(stderr, " ]\n");
e3d59112
NK
1896 }
1897
39d17dac 1898out_delete_session:
5d7f4116 1899 zstd_fini(&session->zstd_data);
39d17dac 1900 perf_session__delete(session);
657ee553
SL
1901
1902 if (!opts->no_bpf_event)
bc477d79 1903 perf_evlist__stop_sb_thread(rec->sb_evlist);
45604710 1904 return status;
de9ac07b 1905}
0e9b20b8 1906
0883e820 1907static void callchain_debug(struct callchain_param *callchain)
09b0fd45 1908{
aad2b21c 1909 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
a601fdff 1910
0883e820 1911 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
26d33022 1912
0883e820 1913 if (callchain->record_mode == CALLCHAIN_DWARF)
09b0fd45 1914 pr_debug("callchain: stack dump size %d\n",
0883e820 1915 callchain->dump_size);
09b0fd45
JO
1916}
1917
0883e820
ACM
1918int record_opts__parse_callchain(struct record_opts *record,
1919 struct callchain_param *callchain,
1920 const char *arg, bool unset)
09b0fd45 1921{
09b0fd45 1922 int ret;
0883e820 1923 callchain->enabled = !unset;
eb853e80 1924
09b0fd45
JO
1925 /* --no-call-graph */
1926 if (unset) {
0883e820 1927 callchain->record_mode = CALLCHAIN_NONE;
09b0fd45
JO
1928 pr_debug("callchain: disabled\n");
1929 return 0;
1930 }
1931
0883e820 1932 ret = parse_callchain_record_opt(arg, callchain);
5c0cf224
JO
1933 if (!ret) {
1934 /* Enable data address sampling for DWARF unwind. */
0883e820 1935 if (callchain->record_mode == CALLCHAIN_DWARF)
5c0cf224 1936 record->sample_address = true;
0883e820 1937 callchain_debug(callchain);
5c0cf224 1938 }
26d33022
JO
1939
1940 return ret;
1941}
1942
0883e820
ACM
1943int record_parse_callchain_opt(const struct option *opt,
1944 const char *arg,
1945 int unset)
1946{
1947 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1948}
1949
c421e80b 1950int record_callchain_opt(const struct option *opt,
09b0fd45
JO
1951 const char *arg __maybe_unused,
1952 int unset __maybe_unused)
1953{
2ddd5c04 1954 struct callchain_param *callchain = opt->value;
c421e80b 1955
2ddd5c04 1956 callchain->enabled = true;
09b0fd45 1957
2ddd5c04
ACM
1958 if (callchain->record_mode == CALLCHAIN_NONE)
1959 callchain->record_mode = CALLCHAIN_FP;
eb853e80 1960
2ddd5c04 1961 callchain_debug(callchain);
09b0fd45
JO
1962 return 0;
1963}
1964
eb853e80
JO
1965static int perf_record_config(const char *var, const char *value, void *cb)
1966{
7a29c087
NK
1967 struct record *rec = cb;
1968
1969 if (!strcmp(var, "record.build-id")) {
1970 if (!strcmp(value, "cache"))
1971 rec->no_buildid_cache = false;
1972 else if (!strcmp(value, "no-cache"))
1973 rec->no_buildid_cache = true;
1974 else if (!strcmp(value, "skip"))
1975 rec->no_buildid = true;
1976 else
1977 return -1;
1978 return 0;
1979 }
cff17205
YX
1980 if (!strcmp(var, "record.call-graph")) {
1981 var = "call-graph.record-mode";
1982 return perf_default_config(var, value, cb);
1983 }
93f20c0f
AB
1984#ifdef HAVE_AIO_SUPPORT
1985 if (!strcmp(var, "record.aio")) {
1986 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1987 if (!rec->opts.nr_cblocks)
1988 rec->opts.nr_cblocks = nr_cblocks_default;
1989 }
1990#endif
eb853e80 1991
cff17205 1992 return 0;
eb853e80
JO
1993}
1994
814c8c38
PZ
1995struct clockid_map {
1996 const char *name;
1997 int clockid;
1998};
1999
2000#define CLOCKID_MAP(n, c) \
2001 { .name = n, .clockid = (c), }
2002
2003#define CLOCKID_END { .name = NULL, }
2004
2005
2006/*
2007 * Add the missing ones, we need to build on many distros...
2008 */
2009#ifndef CLOCK_MONOTONIC_RAW
2010#define CLOCK_MONOTONIC_RAW 4
2011#endif
2012#ifndef CLOCK_BOOTTIME
2013#define CLOCK_BOOTTIME 7
2014#endif
2015#ifndef CLOCK_TAI
2016#define CLOCK_TAI 11
2017#endif
2018
2019static const struct clockid_map clockids[] = {
2020 /* available for all events, NMI safe */
2021 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2022 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2023
2024 /* available for some events */
2025 CLOCKID_MAP("realtime", CLOCK_REALTIME),
2026 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2027 CLOCKID_MAP("tai", CLOCK_TAI),
2028
2029 /* available for the lazy */
2030 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2031 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2032 CLOCKID_MAP("real", CLOCK_REALTIME),
2033 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2034
2035 CLOCKID_END,
2036};
2037
cf790516
AB
2038static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2039{
2040 struct timespec res;
2041
2042 *res_ns = 0;
2043 if (!clock_getres(clk_id, &res))
2044 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2045 else
2046 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2047
2048 return 0;
2049}
2050
814c8c38
PZ
2051static int parse_clockid(const struct option *opt, const char *str, int unset)
2052{
2053 struct record_opts *opts = (struct record_opts *)opt->value;
2054 const struct clockid_map *cm;
2055 const char *ostr = str;
2056
2057 if (unset) {
2058 opts->use_clockid = 0;
2059 return 0;
2060 }
2061
2062 /* no arg passed */
2063 if (!str)
2064 return 0;
2065
2066 /* no setting it twice */
2067 if (opts->use_clockid)
2068 return -1;
2069
2070 opts->use_clockid = true;
2071
2072 /* if its a number, we're done */
2073 if (sscanf(str, "%d", &opts->clockid) == 1)
cf790516 2074 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
814c8c38
PZ
2075
2076 /* allow a "CLOCK_" prefix to the name */
2077 if (!strncasecmp(str, "CLOCK_", 6))
2078 str += 6;
2079
2080 for (cm = clockids; cm->name; cm++) {
2081 if (!strcasecmp(str, cm->name)) {
2082 opts->clockid = cm->clockid;
cf790516
AB
2083 return get_clockid_res(opts->clockid,
2084 &opts->clockid_res_ns);
814c8c38
PZ
2085 }
2086 }
2087
2088 opts->use_clockid = false;
2089 ui__warning("unknown clockid %s, check man page\n", ostr);
2090 return -1;
2091}
2092
f4fe11b7
AB
2093static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2094{
2095 struct record_opts *opts = (struct record_opts *)opt->value;
2096
2097 if (unset || !str)
2098 return 0;
2099
2100 if (!strcasecmp(str, "node"))
2101 opts->affinity = PERF_AFFINITY_NODE;
2102 else if (!strcasecmp(str, "cpu"))
2103 opts->affinity = PERF_AFFINITY_CPU;
2104
2105 return 0;
2106}
2107
6d575816
JS
2108static int parse_output_max_size(const struct option *opt,
2109 const char *str, int unset)
2110{
2111 unsigned long *s = (unsigned long *)opt->value;
2112 static struct parse_tag tags_size[] = {
2113 { .tag = 'B', .mult = 1 },
2114 { .tag = 'K', .mult = 1 << 10 },
2115 { .tag = 'M', .mult = 1 << 20 },
2116 { .tag = 'G', .mult = 1 << 30 },
2117 { .tag = 0 },
2118 };
2119 unsigned long val;
2120
2121 if (unset) {
2122 *s = 0;
2123 return 0;
2124 }
2125
2126 val = parse_tag_value(str, tags_size);
2127 if (val != (unsigned long) -1) {
2128 *s = val;
2129 return 0;
2130 }
2131
2132 return -1;
2133}
2134
e9db1310
AH
2135static int record__parse_mmap_pages(const struct option *opt,
2136 const char *str,
2137 int unset __maybe_unused)
2138{
2139 struct record_opts *opts = opt->value;
2140 char *s, *p;
2141 unsigned int mmap_pages;
2142 int ret;
2143
2144 if (!str)
2145 return -EINVAL;
2146
2147 s = strdup(str);
2148 if (!s)
2149 return -ENOMEM;
2150
2151 p = strchr(s, ',');
2152 if (p)
2153 *p = '\0';
2154
2155 if (*s) {
2156 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2157 if (ret)
2158 goto out_free;
2159 opts->mmap_pages = mmap_pages;
2160 }
2161
2162 if (!p) {
2163 ret = 0;
2164 goto out_free;
2165 }
2166
2167 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2168 if (ret)
2169 goto out_free;
2170
2171 opts->auxtrace_mmap_pages = mmap_pages;
2172
2173out_free:
2174 free(s);
2175 return ret;
2176}
2177
0c582449
JO
2178static void switch_output_size_warn(struct record *rec)
2179{
9521b5f2 2180 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
0c582449
JO
2181 struct switch_output *s = &rec->switch_output;
2182
2183 wakeup_size /= 2;
2184
2185 if (s->size < wakeup_size) {
2186 char buf[100];
2187
2188 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2189 pr_warning("WARNING: switch-output data size lower than "
2190 "wakeup kernel buffer size (%s) "
2191 "expect bigger perf.data sizes\n", buf);
2192 }
2193}
2194
cb4e1ebb
JO
2195static int switch_output_setup(struct record *rec)
2196{
2197 struct switch_output *s = &rec->switch_output;
dc0c6127
JO
2198 static struct parse_tag tags_size[] = {
2199 { .tag = 'B', .mult = 1 },
2200 { .tag = 'K', .mult = 1 << 10 },
2201 { .tag = 'M', .mult = 1 << 20 },
2202 { .tag = 'G', .mult = 1 << 30 },
2203 { .tag = 0 },
2204 };
bfacbe3b
JO
2205 static struct parse_tag tags_time[] = {
2206 { .tag = 's', .mult = 1 },
2207 { .tag = 'm', .mult = 60 },
2208 { .tag = 'h', .mult = 60*60 },
2209 { .tag = 'd', .mult = 60*60*24 },
2210 { .tag = 0 },
2211 };
dc0c6127 2212 unsigned long val;
cb4e1ebb 2213
899e5ffb
ACM
2214 /*
2215 * If we're using --switch-output-events, then we imply its
2216 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2217 * thread to its parent.
2218 */
2219 if (rec->switch_output_event_set)
2220 goto do_signal;
2221
cb4e1ebb
JO
2222 if (!s->set)
2223 return 0;
2224
2225 if (!strcmp(s->str, "signal")) {
899e5ffb 2226do_signal:
cb4e1ebb
JO
2227 s->signal = true;
2228 pr_debug("switch-output with SIGUSR2 signal\n");
dc0c6127
JO
2229 goto enabled;
2230 }
2231
2232 val = parse_tag_value(s->str, tags_size);
2233 if (val != (unsigned long) -1) {
2234 s->size = val;
2235 pr_debug("switch-output with %s size threshold\n", s->str);
2236 goto enabled;
cb4e1ebb
JO
2237 }
2238
bfacbe3b
JO
2239 val = parse_tag_value(s->str, tags_time);
2240 if (val != (unsigned long) -1) {
2241 s->time = val;
2242 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2243 s->str, s->time);
2244 goto enabled;
2245 }
2246
cb4e1ebb 2247 return -1;
dc0c6127
JO
2248
2249enabled:
2250 rec->timestamp_filename = true;
2251 s->enabled = true;
0c582449
JO
2252
2253 if (s->size && !rec->opts.no_buffering)
2254 switch_output_size_warn(rec);
2255
dc0c6127 2256 return 0;
cb4e1ebb
JO
2257}
2258
e5b2c207 2259static const char * const __record_usage[] = {
9e096753
MG
2260 "perf record [<options>] [<command>]",
2261 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
2262 NULL
2263};
e5b2c207 2264const char * const *record_usage = __record_usage;
0e9b20b8 2265
6e0a9b3d
ACM
2266static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2267 struct perf_sample *sample, struct machine *machine)
2268{
2269 /*
2270 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2271 * no need to add them twice.
2272 */
2273 if (!(event->header.misc & PERF_RECORD_MISC_USER))
2274 return 0;
2275 return perf_event__process_mmap(tool, event, sample, machine);
2276}
2277
2278static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2279 struct perf_sample *sample, struct machine *machine)
2280{
2281 /*
2282 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2283 * no need to add them twice.
2284 */
2285 if (!(event->header.misc & PERF_RECORD_MISC_USER))
2286 return 0;
2287
2288 return perf_event__process_mmap2(tool, event, sample, machine);
2289}
2290
d20deb64 2291/*
8c6f45a7
ACM
2292 * XXX Ideally would be local to cmd_record() and passed to a record__new
2293 * because we need to have access to it in record__exit, that is called
d20deb64
ACM
2294 * after cmd_record() exits, but since record_options need to be accessible to
2295 * builtin-script, leave it here.
2296 *
2297 * At least we don't ouch it in all the other functions here directly.
2298 *
2299 * Just say no to tons of global variables, sigh.
2300 */
8c6f45a7 2301static struct record record = {
d20deb64 2302 .opts = {
8affc2b8 2303 .sample_time = true,
d20deb64
ACM
2304 .mmap_pages = UINT_MAX,
2305 .user_freq = UINT_MAX,
2306 .user_interval = ULLONG_MAX,
447a6013 2307 .freq = 4000,
d1cb9fce
NK
2308 .target = {
2309 .uses_mmap = true,
3aa5939d 2310 .default_per_cpu = true,
d1cb9fce 2311 },
470530bb 2312 .mmap_flush = MMAP_FLUSH_DEFAULT,
d99c22ea 2313 .nr_threads_synthesize = 1,
d20deb64 2314 },
e3d59112
NK
2315 .tool = {
2316 .sample = process_sample_event,
2317 .fork = perf_event__process_fork,
cca8482c 2318 .exit = perf_event__process_exit,
e3d59112 2319 .comm = perf_event__process_comm,
f3b3614a 2320 .namespaces = perf_event__process_namespaces,
6e0a9b3d
ACM
2321 .mmap = build_id__process_mmap,
2322 .mmap2 = build_id__process_mmap2,
cca8482c 2323 .ordered_events = true,
e3d59112 2324 },
d20deb64 2325};
7865e817 2326
76a26549
NK
2327const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2328 "\n\t\t\t\tDefault: fp";
61eaa3be 2329
0aab2136
WN
2330static bool dry_run;
2331
d20deb64
ACM
2332/*
2333 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2334 * with it and switch to use the library functions in perf_evlist that came
b4006796 2335 * from builtin-record.c, i.e. use record_opts,
d20deb64
ACM
2336 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2337 * using pipes, etc.
2338 */
efd21307 2339static struct option __record_options[] = {
d20deb64 2340 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 2341 "event selector. use 'perf list' to list available events",
f120f9d5 2342 parse_events_option),
d20deb64 2343 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 2344 "event filter", parse_filter),
4ba1faa1
WN
2345 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2346 NULL, "don't record events from perf itself",
2347 exclude_perf),
bea03405 2348 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 2349 "record events on existing process id"),
bea03405 2350 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 2351 "record events on existing thread id"),
d20deb64 2352 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 2353 "collect data with this RT SCHED_FIFO priority"),
509051ea 2354 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
acac03fa 2355 "collect data without buffering"),
d20deb64 2356 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 2357 "collect raw sample records from all opened counters"),
bea03405 2358 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 2359 "system-wide collection from all CPUs"),
bea03405 2360 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 2361 "list of cpus to monitor"),
d20deb64 2362 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2d4f2799 2363 OPT_STRING('o', "output", &record.data.path, "file",
abaff32a 2364 "output file name"),
69e7e5b0
AH
2365 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2366 &record.opts.no_inherit_set,
2367 "child tasks do not inherit counters"),
4ea648ae
WN
2368 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2369 "synthesize non-sample events at the end of output"),
626a6b78 2370 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
71184c6a 2371 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
b09c2364
ACM
2372 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2373 "Fail if the specified frequency can't be used"),
67230479
ACM
2374 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2375 "profile at this frequency",
2376 record__parse_freq),
e9db1310
AH
2377 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2378 "number of mmap data pages and AUX area tracing mmap pages",
2379 record__parse_mmap_pages),
470530bb
AB
2380 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2381 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2382 record__mmap_flush_parse),
d20deb64 2383 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 2384 "put the counters into a counter group"),
2ddd5c04 2385 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
09b0fd45
JO
2386 NULL, "enables call-graph recording" ,
2387 &record_callchain_opt),
2388 OPT_CALLBACK(0, "call-graph", &record.opts,
76a26549 2389 "record_mode[,record_size]", record_callchain_help,
09b0fd45 2390 &record_parse_callchain_opt),
c0555642 2391 OPT_INCR('v', "verbose", &verbose,
3da297a6 2392 "be more verbose (show counter open errors, etc)"),
b44308f5 2393 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 2394 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 2395 "per thread counts"),
56100321 2396 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3b0a5daa
KL
2397 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2398 "Record the sample physical addresses"),
b6f35ed7 2399 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3abebc55
AH
2400 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2401 &record.opts.sample_time_set,
2402 "Record the sample timestamps"),
f290aa1f
JO
2403 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2404 "Record the sample period"),
d20deb64 2405 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 2406 "don't sample"),
d2db9a98
WN
2407 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2408 &record.no_buildid_cache_set,
2409 "do not update the buildid cache"),
2410 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2411 &record.no_buildid_set,
2412 "do not collect buildids in perf.data"),
d20deb64 2413 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
2414 "monitor event in cgroup name only",
2415 parse_cgroups),
a6205a35 2416 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
6619a53e 2417 "ms to wait before starting measurement after program start"),
eeb399b5 2418 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
bea03405
NK
2419 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2420 "user to profile"),
a5aabdac
SE
2421
2422 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2423 "branch any", "sample any taken branches",
2424 parse_branch_stack),
2425
2426 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2427 "branch filter mask", "branch stack filter modes",
bdfebd84 2428 parse_branch_stack),
05484298
AK
2429 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2430 "sample by weight (on special events only)"),
475eeab9
AK
2431 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2432 "sample transaction flags (special events only)"),
3aa5939d
AH
2433 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2434 "use per-thread mmaps"),
bcc84ec6
SE
2435 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2436 "sample selected machine registers on interrupt,"
aeea9062 2437 " use '-I?' to list register names", parse_intr_regs),
84c41742
AK
2438 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2439 "sample selected machine registers on interrupt,"
aeea9062 2440 " use '--user-regs=?' to list register names", parse_user_regs),
85c273d2
AK
2441 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2442 "Record running/enabled time of read (:S) events"),
814c8c38
PZ
2443 OPT_CALLBACK('k', "clockid", &record.opts,
2444 "clockid", "clockid to use for events, see clock_gettime()",
2445 parse_clockid),
2dd6d8a1
AH
2446 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2447 "opts", "AUX area tracing Snapshot Mode", ""),
c0a6de06
AH
2448 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2449 "opts", "sample AUX area", ""),
3fcb10e4 2450 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
9d9cad76 2451 "per thread proc mmap processing timeout in ms"),
f3b3614a
HB
2452 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2453 "Record namespaces events"),
8fb4b679
NK
2454 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2455 "Record cgroup events"),
b757bb09
AH
2456 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2457 "Record context switch events"),
85723885
JO
2458 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2459 "Configure all used events to run in kernel space.",
2460 PARSE_OPT_EXCLUSIVE),
2461 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2462 "Configure all used events to run in user space.",
2463 PARSE_OPT_EXCLUSIVE),
53651b28 2464 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2465 "collect kernel callchains"),
2466 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2467 "collect user callchains"),
71dc2326
WN
2468 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2469 "clang binary to use for compiling BPF scriptlets"),
2470 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2471 "options passed to clang when compiling BPF scriptlets"),
7efe0e03
HK
2472 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2473 "file", "vmlinux pathname"),
6156681b
NK
2474 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2475 "Record build-id of all DSOs regardless of hits"),
ecfd7a9c
WN
2476 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2477 "append timestamp to output filename"),
68588baf
JY
2478 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2479 "Record timestamp boundary (time of first/last samples)"),
cb4e1ebb 2480 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
c38dab7d
AK
2481 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2482 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
dc0c6127 2483 "signal"),
899e5ffb
ACM
2484 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2485 "switch output event selector. use 'perf list' to list available events",
2486 parse_events_option_new_evlist),
03724b2e
AK
2487 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2488 "Limit number of switch output generated files"),
0aab2136
WN
2489 OPT_BOOLEAN(0, "dry-run", &dry_run,
2490 "Parse options then exit"),
d3d1af6f 2491#ifdef HAVE_AIO_SUPPORT
93f20c0f
AB
2492 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2493 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
d3d1af6f
AB
2494 record__aio_parse),
2495#endif
f4fe11b7
AB
2496 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2497 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2498 record__parse_affinity),
504c1ad1
AB
2499#ifdef HAVE_ZSTD_SUPPORT
2500 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2501 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2502 record__parse_comp_level),
2503#endif
6d575816
JS
2504 OPT_CALLBACK(0, "max-size", &record.output_max_size,
2505 "size", "Limit the maximum size of the output file", parse_output_max_size),
d99c22ea
SE
2506 OPT_UINTEGER(0, "num-thread-synthesize",
2507 &record.opts.nr_threads_synthesize,
2508 "number of threads to run for event synthesis"),
0e9b20b8
IM
2509 OPT_END()
2510};
2511
e5b2c207
NK
2512struct option *record_options = __record_options;
2513
b0ad8ea6 2514int cmd_record(int argc, const char **argv)
0e9b20b8 2515{
ef149c25 2516 int err;
8c6f45a7 2517 struct record *rec = &record;
16ad2ffb 2518 char errbuf[BUFSIZ];
0e9b20b8 2519
67230479
ACM
2520 setlocale(LC_ALL, "");
2521
48e1cab1
WN
2522#ifndef HAVE_LIBBPF_SUPPORT
2523# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2524 set_nobuild('\0', "clang-path", true);
2525 set_nobuild('\0', "clang-opt", true);
2526# undef set_nobuild
7efe0e03
HK
2527#endif
2528
2529#ifndef HAVE_BPF_PROLOGUE
2530# if !defined (HAVE_DWARF_SUPPORT)
2531# define REASON "NO_DWARF=1"
2532# elif !defined (HAVE_LIBBPF_SUPPORT)
2533# define REASON "NO_LIBBPF=1"
2534# else
2535# define REASON "this architecture doesn't support BPF prologue"
2536# endif
2537# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2538 set_nobuild('\0', "vmlinux", true);
2539# undef set_nobuild
2540# undef REASON
48e1cab1
WN
2541#endif
2542
9d2ed645
AB
2543 rec->opts.affinity = PERF_AFFINITY_SYS;
2544
0f98b11c 2545 rec->evlist = evlist__new();
3e2be2da 2546 if (rec->evlist == NULL)
361c99a6
ACM
2547 return -ENOMEM;
2548
ecc4c561
ACM
2549 err = perf_config(perf_record_config, rec);
2550 if (err)
2551 return err;
eb853e80 2552
bca647aa 2553 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 2554 PARSE_OPT_STOP_AT_NON_OPTION);
68ba3235
NK
2555 if (quiet)
2556 perf_quiet_option();
483635a9
JO
2557
2558 /* Make system wide (-a) the default target. */
602ad878 2559 if (!argc && target__none(&rec->opts.target))
483635a9 2560 rec->opts.target.system_wide = true;
0e9b20b8 2561
bea03405 2562 if (nr_cgroups && !rec->opts.target.system_wide) {
c7118369
NK
2563 usage_with_options_msg(record_usage, record_options,
2564 "cgroup monitoring only available in system-wide mode");
2565
023695d9 2566 }
504c1ad1 2567
eeb399b5
AH
2568 if (rec->opts.kcore)
2569 rec->data.is_dir = true;
2570
504c1ad1
AB
2571 if (rec->opts.comp_level != 0) {
2572 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2573 rec->no_buildid = true;
2574 }
2575
b757bb09
AH
2576 if (rec->opts.record_switch_events &&
2577 !perf_can_record_switch_events()) {
c7118369
NK
2578 ui__error("kernel does not support recording context switch events\n");
2579 parse_options_usage(record_usage, record_options, "switch-events", 0);
2580 return -EINVAL;
b757bb09 2581 }
023695d9 2582
cb4e1ebb
JO
2583 if (switch_output_setup(rec)) {
2584 parse_options_usage(record_usage, record_options, "switch-output", 0);
2585 return -EINVAL;
2586 }
2587
bfacbe3b
JO
2588 if (rec->switch_output.time) {
2589 signal(SIGALRM, alarm_sig_handler);
2590 alarm(rec->switch_output.time);
2591 }
2592
03724b2e
AK
2593 if (rec->switch_output.num_files) {
2594 rec->switch_output.filenames = calloc(sizeof(char *),
2595 rec->switch_output.num_files);
2596 if (!rec->switch_output.filenames)
2597 return -EINVAL;
2598 }
2599
1b36c03e
AH
2600 /*
2601 * Allow aliases to facilitate the lookup of symbols for address
2602 * filters. Refer to auxtrace_parse_filters().
2603 */
2604 symbol_conf.allow_aliases = true;
2605
2606 symbol__init(NULL);
2607
8384a260
AB
2608 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2609 rec->affinity_mask.nbits = cpu__max_cpu();
2610 rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2611 if (!rec->affinity_mask.bits) {
2612 pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2613 return -ENOMEM;
2614 }
2615 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2616 }
2617
4b5ea3bd 2618 err = record__auxtrace_init(rec);
1b36c03e
AH
2619 if (err)
2620 goto out;
2621
0aab2136 2622 if (dry_run)
5c01ad60 2623 goto out;
0aab2136 2624
d7888573
WN
2625 err = bpf__setup_stdout(rec->evlist);
2626 if (err) {
2627 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2628 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2629 errbuf);
5c01ad60 2630 goto out;
d7888573
WN
2631 }
2632
ef149c25
AH
2633 err = -ENOMEM;
2634
0c1d46a8 2635 if (rec->no_buildid_cache || rec->no_buildid) {
a1ac1d3c 2636 disable_buildid_cache();
dc0c6127 2637 } else if (rec->switch_output.enabled) {
0c1d46a8
WN
2638 /*
2639 * In 'perf record --switch-output', disable buildid
2640 * generation by default to reduce data file switching
2641 * overhead. Still generate buildid if they are required
2642 * explicitly using
2643 *
60437ac0 2644 * perf record --switch-output --no-no-buildid \
0c1d46a8
WN
2645 * --no-no-buildid-cache
2646 *
2647 * Following code equals to:
2648 *
2649 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2650 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2651 * disable_buildid_cache();
2652 */
2653 bool disable = true;
2654
2655 if (rec->no_buildid_set && !rec->no_buildid)
2656 disable = false;
2657 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2658 disable = false;
2659 if (disable) {
2660 rec->no_buildid = true;
2661 rec->no_buildid_cache = true;
2662 disable_buildid_cache();
2663 }
2664 }
655000e7 2665
4ea648ae
WN
2666 if (record.opts.overwrite)
2667 record.opts.tail_synthesize = true;
2668
6484d2f9 2669 if (rec->evlist->core.nr_entries == 0 &&
4b4cd503 2670 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
69aad6f1 2671 pr_err("Not enough memory for event selector list\n");
394c01ed 2672 goto out;
bbd36e5e 2673 }
0e9b20b8 2674
69e7e5b0
AH
2675 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2676 rec->opts.no_inherit = true;
2677
602ad878 2678 err = target__validate(&rec->opts.target);
16ad2ffb 2679 if (err) {
602ad878 2680 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
c3dec27b 2681 ui__warning("%s\n", errbuf);
16ad2ffb
NK
2682 }
2683
602ad878 2684 err = target__parse_uid(&rec->opts.target);
16ad2ffb
NK
2685 if (err) {
2686 int saved_errno = errno;
4bd0f2d2 2687
602ad878 2688 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 2689 ui__error("%s", errbuf);
16ad2ffb
NK
2690
2691 err = -saved_errno;
394c01ed 2692 goto out;
16ad2ffb 2693 }
0d37aa34 2694
ca800068
MZ
2695 /* Enable ignoring missing threads when -u/-p option is defined. */
2696 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
23dc4f15 2697
16ad2ffb 2698 err = -ENOMEM;
3e2be2da 2699 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
dd7927f4 2700 usage_with_options(record_usage, record_options);
69aad6f1 2701
ef149c25
AH
2702 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2703 if (err)
394c01ed 2704 goto out;
ef149c25 2705
6156681b
NK
2706 /*
2707 * We take all buildids when the file contains
2708 * AUX area tracing data because we do not decode the
2709 * trace because it would take too long.
2710 */
2711 if (rec->opts.full_auxtrace)
2712 rec->buildid_all = true;
2713
b4006796 2714 if (record_opts__config(&rec->opts)) {
39d17dac 2715 err = -EINVAL;
394c01ed 2716 goto out;
7e4ff9e3
MG
2717 }
2718
93f20c0f
AB
2719 if (rec->opts.nr_cblocks > nr_cblocks_max)
2720 rec->opts.nr_cblocks = nr_cblocks_max;
5d7f4116 2721 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
d3d1af6f 2722
9d2ed645 2723 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
470530bb 2724 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
9d2ed645 2725
51255a8a
AB
2726 if (rec->opts.comp_level > comp_level_max)
2727 rec->opts.comp_level = comp_level_max;
2728 pr_debug("comp level: %d\n", rec->opts.comp_level);
2729
d20deb64 2730 err = __cmd_record(&record, argc, argv);
394c01ed 2731out:
8384a260 2732 bitmap_free(rec->affinity_mask.bits);
c12995a5 2733 evlist__delete(rec->evlist);
d65a458b 2734 symbol__exit();
ef149c25 2735 auxtrace_record__free(rec->itr);
39d17dac 2736 return err;
0e9b20b8 2737}
2dd6d8a1
AH
2738
2739static void snapshot_sig_handler(int sig __maybe_unused)
2740{
dc0c6127
JO
2741 struct record *rec = &record;
2742
5f9cf599
WN
2743 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2744 trigger_hit(&auxtrace_snapshot_trigger);
2745 auxtrace_record__snapshot_started = 1;
2746 if (auxtrace_record__snapshot_start(record.itr))
2747 trigger_error(&auxtrace_snapshot_trigger);
2748 }
3c1cb7e3 2749
dc0c6127 2750 if (switch_output_signal(rec))
3c1cb7e3 2751 trigger_hit(&switch_output_trigger);
2dd6d8a1 2752}
bfacbe3b
JO
2753
2754static void alarm_sig_handler(int sig __maybe_unused)
2755{
2756 struct record *rec = &record;
2757
2758 if (switch_output_time(rec))
2759 trigger_hit(&switch_output_trigger);
2760}