perf tools: Fix segfault accessing sample_id xyarray
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
abaff32a 2/*
bf9e1876
IM
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
abaff32a 8 */
16f762a2 9#include "builtin.h"
bf9e1876 10
6122e4e4 11#include "util/build-id.h"
4b6ab94e 12#include <subcmd/parse-options.h>
8ad8db37 13#include "util/parse-events.h"
41840d21 14#include "util/config.h"
6eda5838 15
8f651eae 16#include "util/callchain.h"
f14d5707 17#include "util/cgroup.h"
7c6a1c65 18#include "util/header.h"
66e274f3 19#include "util/event.h"
361c99a6 20#include "util/evlist.h"
69aad6f1 21#include "util/evsel.h"
8f28827a 22#include "util/debug.h"
e0fcfb08 23#include "util/mmap.h"
aeb00b1a 24#include "util/target.h"
94c744b6 25#include "util/session.h"
45694aa7 26#include "util/tool.h"
8d06367f 27#include "util/symbol.h"
aeb00b1a 28#include "util/record.h"
a12b51c4 29#include "util/cpumap.h"
fd78260b 30#include "util/thread_map.h"
f5fc1412 31#include "util/data.h"
bcc84ec6 32#include "util/perf_regs.h"
ef149c25 33#include "util/auxtrace.h"
46bc29b9 34#include "util/tsc.h"
f00898f4 35#include "util/parse-branch-options.h"
bcc84ec6 36#include "util/parse-regs-options.h"
40c7d246 37#include "util/perf_api_probe.h"
71dc2326 38#include "util/llvm-utils.h"
8690a2a7 39#include "util/bpf-loader.h"
5f9cf599 40#include "util/trigger.h"
a074865e 41#include "util/perf-hooks.h"
f13de660 42#include "util/cpu-set-sched.h"
ea49e01c 43#include "util/synthetic-events.h"
c5e4027e 44#include "util/time-utils.h"
58db1d6e 45#include "util/units.h"
7b612e29 46#include "util/bpf-event.h"
d99c22ea 47#include "util/util.h"
70943490 48#include "util/pfm.h"
6953beb4 49#include "util/clockid.h"
b53a0755
JY
50#include "util/pmu-hybrid.h"
51#include "util/evlist-hybrid.h"
d8871ea7 52#include "asm/bug.h"
c1a604df 53#include "perf.h"
f466e5ed 54#include "cputopo.h"
7c6a1c65 55
a43783ae 56#include <errno.h>
fd20e811 57#include <inttypes.h>
67230479 58#include <locale.h>
4208735d 59#include <poll.h>
d99c22ea 60#include <pthread.h>
97124d5e 61#include <unistd.h>
415ccb58
AB
62#ifndef HAVE_GETTID
63#include <syscall.h>
64#endif
de9ac07b 65#include <sched.h>
9607ad3a 66#include <signal.h>
da231338
AM
67#ifdef HAVE_EVENTFD_SUPPORT
68#include <sys/eventfd.h>
69#endif
a41794cd 70#include <sys/mman.h>
4208735d 71#include <sys/wait.h>
eeb399b5
AH
72#include <sys/types.h>
73#include <sys/stat.h>
74#include <fcntl.h>
6ef81c55 75#include <linux/err.h>
8520a98d 76#include <linux/string.h>
0693e680 77#include <linux/time64.h>
d8f9da24 78#include <linux/zalloc.h>
8384a260 79#include <linux/bitmap.h>
d1e325cf 80#include <sys/time.h>
78da39fa 81
1b43b704 82struct switch_output {
dc0c6127 83 bool enabled;
1b43b704 84 bool signal;
dc0c6127 85 unsigned long size;
bfacbe3b 86 unsigned long time;
cb4e1ebb
JO
87 const char *str;
88 bool set;
03724b2e
AK
89 char **filenames;
90 int num_files;
91 int cur_file;
1b43b704
JO
92};
93
7954f716
AB
94struct thread_mask {
95 struct mmap_cpu_mask maps;
96 struct mmap_cpu_mask affinity;
97};
98
415ccb58
AB
99struct record_thread {
100 pid_t tid;
101 struct thread_mask *mask;
102 struct {
103 int msg[2];
104 int ack[2];
105 } pipes;
106 struct fdarray pollfd;
107 int ctlfd_pos;
108 int nr_mmaps;
109 struct mmap **maps;
110 struct mmap **overwrite_maps;
111 struct record *rec;
396b626b
AB
112 unsigned long long samples;
113 unsigned long waking;
ae9c7242 114 u64 bytes_written;
610fbc01
AB
115 u64 bytes_transferred;
116 u64 bytes_compressed;
415ccb58
AB
117};
118
396b626b
AB
119static __thread struct record_thread *thread;
120
1e5de7d9
AB
121enum thread_msg {
122 THREAD_MSG__UNDEFINED = 0,
123 THREAD_MSG__READY,
124 THREAD_MSG__MAX,
125};
126
127static const char *thread_msg_tags[THREAD_MSG__MAX] = {
128 "UNDEFINED", "READY"
129};
130
06380a84
AB
131enum thread_spec {
132 THREAD_SPEC__UNDEFINED = 0,
133 THREAD_SPEC__CPU,
f466e5ed
AB
134 THREAD_SPEC__CORE,
135 THREAD_SPEC__PACKAGE,
136 THREAD_SPEC__NUMA,
137 THREAD_SPEC__USER,
138 THREAD_SPEC__MAX,
139};
140
141static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
142 "undefined", "cpu", "core", "package", "numa", "user"
06380a84
AB
143};
144
8c6f45a7 145struct record {
45694aa7 146 struct perf_tool tool;
b4006796 147 struct record_opts opts;
d20deb64 148 u64 bytes_written;
8ceb41d7 149 struct perf_data data;
ef149c25 150 struct auxtrace_record *itr;
63503dba 151 struct evlist *evlist;
d20deb64 152 struct perf_session *session;
bc477d79 153 struct evlist *sb_evlist;
899e5ffb 154 pthread_t thread_id;
d20deb64 155 int realtime_prio;
899e5ffb 156 bool switch_output_event_set;
d20deb64 157 bool no_buildid;
d2db9a98 158 bool no_buildid_set;
d20deb64 159 bool no_buildid_cache;
d2db9a98 160 bool no_buildid_cache_set;
6156681b 161 bool buildid_all;
e29386c8 162 bool buildid_mmap;
ecfd7a9c 163 bool timestamp_filename;
68588baf 164 bool timestamp_boundary;
1b43b704 165 struct switch_output switch_output;
9f065194 166 unsigned long long samples;
6d575816 167 unsigned long output_max_size; /* = 0: unlimited */
9bce13ea 168 struct perf_debuginfod debuginfod;
7954f716
AB
169 int nr_threads;
170 struct thread_mask *thread_masks;
415ccb58 171 struct record_thread *thread_data;
0f82ebc4 172};
a21ca2ca 173
6d575816
JS
174static volatile int done;
175
dc0c6127
JO
176static volatile int auxtrace_record__snapshot_started;
177static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
178static DEFINE_TRIGGER(switch_output_trigger);
179
9d2ed645
AB
180static const char *affinity_tags[PERF_AFFINITY_MAX] = {
181 "SYS", "NODE", "CPU"
182};
183
415ccb58
AB
184#ifndef HAVE_GETTID
185static inline pid_t gettid(void)
186{
187 return (pid_t)syscall(__NR_gettid);
188}
189#endif
190
3217e9fe
AB
191static int record__threads_enabled(struct record *rec)
192{
193 return rec->opts.threads_spec;
194}
195
dc0c6127
JO
196static bool switch_output_signal(struct record *rec)
197{
198 return rec->switch_output.signal &&
199 trigger_is_ready(&switch_output_trigger);
200}
201
202static bool switch_output_size(struct record *rec)
203{
204 return rec->switch_output.size &&
205 trigger_is_ready(&switch_output_trigger) &&
206 (rec->bytes_written >= rec->switch_output.size);
207}
208
bfacbe3b
JO
209static bool switch_output_time(struct record *rec)
210{
211 return rec->switch_output.time &&
212 trigger_is_ready(&switch_output_trigger);
213}
214
ae9c7242
AB
215static u64 record__bytes_written(struct record *rec)
216{
217 int t;
218 u64 bytes_written = rec->bytes_written;
219 struct record_thread *thread_data = rec->thread_data;
220
221 for (t = 0; t < rec->nr_threads; t++)
222 bytes_written += thread_data[t].bytes_written;
223
224 return bytes_written;
225}
226
6d575816
JS
227static bool record__output_max_size_exceeded(struct record *rec)
228{
229 return rec->output_max_size &&
ae9c7242 230 (record__bytes_written(rec) >= rec->output_max_size);
6d575816
JS
231}
232
a5830532 233static int record__write(struct record *rec, struct mmap *map __maybe_unused,
ded2b8fe 234 void *bf, size_t size)
f5970550 235{
ded2b8fe
JO
236 struct perf_data_file *file = &rec->session->data->file;
237
56f735ff
AB
238 if (map && map->file)
239 file = map->file;
240
ded2b8fe 241 if (perf_data_file__write(file, bf, size) < 0) {
50a9b868
JO
242 pr_err("failed to write perf data, error: %m\n");
243 return -1;
f5970550 244 }
8d3eca20 245
ae9c7242
AB
246 if (map && map->file)
247 thread->bytes_written += size;
248 else
56f735ff 249 rec->bytes_written += size;
dc0c6127 250
6d575816
JS
251 if (record__output_max_size_exceeded(rec) && !done) {
252 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
253 " stopping session ]\n",
ae9c7242 254 record__bytes_written(rec) >> 10);
6d575816
JS
255 done = 1;
256 }
257
dc0c6127
JO
258 if (switch_output_size(rec))
259 trigger_hit(&switch_output_trigger);
260
8d3eca20 261 return 0;
f5970550
PZ
262}
263
ef781128
AB
264static int record__aio_enabled(struct record *rec);
265static int record__comp_enabled(struct record *rec);
75f5f1fc
AB
266static size_t zstd_compress(struct perf_session *session, struct mmap *map,
267 void *dst, size_t dst_size, void *src, size_t src_size);
5d7f4116 268
d3d1af6f
AB
269#ifdef HAVE_AIO_SUPPORT
270static int record__aio_write(struct aiocb *cblock, int trace_fd,
271 void *buf, size_t size, off_t off)
272{
273 int rc;
274
275 cblock->aio_fildes = trace_fd;
276 cblock->aio_buf = buf;
277 cblock->aio_nbytes = size;
278 cblock->aio_offset = off;
279 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
280
281 do {
282 rc = aio_write(cblock);
283 if (rc == 0) {
284 break;
285 } else if (errno != EAGAIN) {
286 cblock->aio_fildes = -1;
287 pr_err("failed to queue perf data, error: %m\n");
288 break;
289 }
290 } while (1);
291
292 return rc;
293}
294
a5830532 295static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
d3d1af6f
AB
296{
297 void *rem_buf;
298 off_t rem_off;
299 size_t rem_size;
300 int rc, aio_errno;
301 ssize_t aio_ret, written;
302
303 aio_errno = aio_error(cblock);
304 if (aio_errno == EINPROGRESS)
305 return 0;
306
307 written = aio_ret = aio_return(cblock);
308 if (aio_ret < 0) {
309 if (aio_errno != EINTR)
310 pr_err("failed to write perf data, error: %m\n");
311 written = 0;
312 }
313
314 rem_size = cblock->aio_nbytes - written;
315
316 if (rem_size == 0) {
317 cblock->aio_fildes = -1;
318 /*
ef781128
AB
319 * md->refcount is incremented in record__aio_pushfn() for
320 * every aio write request started in record__aio_push() so
321 * decrement it because the request is now complete.
d3d1af6f 322 */
80e53d11 323 perf_mmap__put(&md->core);
d3d1af6f
AB
324 rc = 1;
325 } else {
326 /*
327 * aio write request may require restart with the
328 * reminder if the kernel didn't write whole
329 * chunk at once.
330 */
331 rem_off = cblock->aio_offset + written;
332 rem_buf = (void *)(cblock->aio_buf + written);
333 record__aio_write(cblock, cblock->aio_fildes,
334 rem_buf, rem_size, rem_off);
335 rc = 0;
336 }
337
338 return rc;
339}
340
a5830532 341static int record__aio_sync(struct mmap *md, bool sync_all)
d3d1af6f 342{
93f20c0f
AB
343 struct aiocb **aiocb = md->aio.aiocb;
344 struct aiocb *cblocks = md->aio.cblocks;
d3d1af6f 345 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
93f20c0f 346 int i, do_suspend;
d3d1af6f
AB
347
348 do {
93f20c0f
AB
349 do_suspend = 0;
350 for (i = 0; i < md->aio.nr_cblocks; ++i) {
351 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
352 if (sync_all)
353 aiocb[i] = NULL;
354 else
355 return i;
356 } else {
357 /*
358 * Started aio write is not complete yet
359 * so it has to be waited before the
360 * next allocation.
361 */
362 aiocb[i] = &cblocks[i];
363 do_suspend = 1;
364 }
365 }
366 if (!do_suspend)
367 return -1;
d3d1af6f 368
93f20c0f 369 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
d3d1af6f
AB
370 if (!(errno == EAGAIN || errno == EINTR))
371 pr_err("failed to sync perf data, error: %m\n");
372 }
373 } while (1);
374}
375
ef781128
AB
376struct record_aio {
377 struct record *rec;
378 void *data;
379 size_t size;
380};
381
a5830532 382static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
d3d1af6f 383{
ef781128 384 struct record_aio *aio = to;
d3d1af6f 385
ef781128 386 /*
547740f7 387 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
ef781128
AB
388 * to release space in the kernel buffer as fast as possible, calling
389 * perf_mmap__consume() from perf_mmap__push() function.
390 *
391 * That lets the kernel to proceed with storing more profiling data into
392 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
393 *
394 * Coping can be done in two steps in case the chunk of profiling data
395 * crosses the upper bound of the kernel buffer. In this case we first move
396 * part of data from map->start till the upper bound and then the reminder
397 * from the beginning of the kernel buffer till the end of the data chunk.
398 */
399
400 if (record__comp_enabled(aio->rec)) {
75f5f1fc 401 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
bf59b305 402 mmap__mmap_len(map) - aio->size,
ef781128
AB
403 buf, size);
404 } else {
405 memcpy(aio->data + aio->size, buf, size);
406 }
407
408 if (!aio->size) {
409 /*
410 * Increment map->refcount to guard map->aio.data[] buffer
411 * from premature deallocation because map object can be
412 * released earlier than aio write request started on
413 * map->aio.data[] buffer is complete.
414 *
415 * perf_mmap__put() is done at record__aio_complete()
416 * after started aio request completion or at record__aio_push()
417 * if the request failed to start.
418 */
e75710f0 419 perf_mmap__get(&map->core);
ef781128
AB
420 }
421
422 aio->size += size;
423
424 return size;
425}
426
a5830532 427static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
ef781128
AB
428{
429 int ret, idx;
430 int trace_fd = rec->session->data->file.fd;
431 struct record_aio aio = { .rec = rec, .size = 0 };
d3d1af6f 432
ef781128
AB
433 /*
434 * Call record__aio_sync() to wait till map->aio.data[] buffer
435 * becomes available after previous aio write operation.
436 */
437
438 idx = record__aio_sync(map, false);
439 aio.data = map->aio.data[idx];
440 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
441 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
442 return ret;
443
444 rec->samples++;
445 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
d3d1af6f 446 if (!ret) {
ef781128
AB
447 *off += aio.size;
448 rec->bytes_written += aio.size;
d3d1af6f
AB
449 if (switch_output_size(rec))
450 trigger_hit(&switch_output_trigger);
ef781128
AB
451 } else {
452 /*
453 * Decrement map->refcount incremented in record__aio_pushfn()
454 * back if record__aio_write() operation failed to start, otherwise
455 * map->refcount is decremented in record__aio_complete() after
456 * aio write operation finishes successfully.
457 */
80e53d11 458 perf_mmap__put(&map->core);
d3d1af6f
AB
459 }
460
461 return ret;
462}
463
464static off_t record__aio_get_pos(int trace_fd)
465{
466 return lseek(trace_fd, 0, SEEK_CUR);
467}
468
469static void record__aio_set_pos(int trace_fd, off_t pos)
470{
471 lseek(trace_fd, pos, SEEK_SET);
472}
473
474static void record__aio_mmap_read_sync(struct record *rec)
475{
476 int i;
63503dba 477 struct evlist *evlist = rec->evlist;
a5830532 478 struct mmap *maps = evlist->mmap;
d3d1af6f 479
ef781128 480 if (!record__aio_enabled(rec))
d3d1af6f
AB
481 return;
482
c976ee11 483 for (i = 0; i < evlist->core.nr_mmaps; i++) {
a5830532 484 struct mmap *map = &maps[i];
d3d1af6f 485
547740f7 486 if (map->core.base)
93f20c0f 487 record__aio_sync(map, true);
d3d1af6f
AB
488 }
489}
490
491static int nr_cblocks_default = 1;
93f20c0f 492static int nr_cblocks_max = 4;
d3d1af6f
AB
493
494static int record__aio_parse(const struct option *opt,
93f20c0f 495 const char *str,
d3d1af6f
AB
496 int unset)
497{
498 struct record_opts *opts = (struct record_opts *)opt->value;
499
93f20c0f 500 if (unset) {
d3d1af6f 501 opts->nr_cblocks = 0;
93f20c0f
AB
502 } else {
503 if (str)
504 opts->nr_cblocks = strtol(str, NULL, 0);
505 if (!opts->nr_cblocks)
506 opts->nr_cblocks = nr_cblocks_default;
507 }
d3d1af6f
AB
508
509 return 0;
510}
511#else /* HAVE_AIO_SUPPORT */
93f20c0f
AB
512static int nr_cblocks_max = 0;
513
a5830532 514static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
ef781128 515 off_t *off __maybe_unused)
d3d1af6f
AB
516{
517 return -1;
518}
519
520static off_t record__aio_get_pos(int trace_fd __maybe_unused)
521{
522 return -1;
523}
524
525static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
526{
527}
528
529static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
530{
531}
532#endif
533
534static int record__aio_enabled(struct record *rec)
535{
536 return rec->opts.nr_cblocks > 0;
537}
538
470530bb
AB
539#define MMAP_FLUSH_DEFAULT 1
540static int record__mmap_flush_parse(const struct option *opt,
541 const char *str,
542 int unset)
543{
544 int flush_max;
545 struct record_opts *opts = (struct record_opts *)opt->value;
546 static struct parse_tag tags[] = {
547 { .tag = 'B', .mult = 1 },
548 { .tag = 'K', .mult = 1 << 10 },
549 { .tag = 'M', .mult = 1 << 20 },
550 { .tag = 'G', .mult = 1 << 30 },
551 { .tag = 0 },
552 };
553
554 if (unset)
555 return 0;
556
557 if (str) {
558 opts->mmap_flush = parse_tag_value(str, tags);
559 if (opts->mmap_flush == (int)-1)
560 opts->mmap_flush = strtol(str, NULL, 0);
561 }
562
563 if (!opts->mmap_flush)
564 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
565
9521b5f2 566 flush_max = evlist__mmap_size(opts->mmap_pages);
470530bb
AB
567 flush_max /= 4;
568 if (opts->mmap_flush > flush_max)
569 opts->mmap_flush = flush_max;
570
571 return 0;
572}
573
504c1ad1
AB
574#ifdef HAVE_ZSTD_SUPPORT
575static unsigned int comp_level_default = 1;
576
577static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
578{
579 struct record_opts *opts = opt->value;
580
581 if (unset) {
582 opts->comp_level = 0;
583 } else {
584 if (str)
585 opts->comp_level = strtol(str, NULL, 0);
586 if (!opts->comp_level)
587 opts->comp_level = comp_level_default;
588 }
589
590 return 0;
591}
592#endif
51255a8a
AB
593static unsigned int comp_level_max = 22;
594
42e1fd80
AB
595static int record__comp_enabled(struct record *rec)
596{
597 return rec->opts.comp_level > 0;
598}
599
45694aa7 600static int process_synthesized_event(struct perf_tool *tool,
d20deb64 601 union perf_event *event,
1d037ca1
IT
602 struct perf_sample *sample __maybe_unused,
603 struct machine *machine __maybe_unused)
234fbbf5 604{
8c6f45a7 605 struct record *rec = container_of(tool, struct record, tool);
ded2b8fe 606 return record__write(rec, NULL, event, event->header.size);
234fbbf5
ACM
607}
608
d99c22ea
SE
609static int process_locked_synthesized_event(struct perf_tool *tool,
610 union perf_event *event,
611 struct perf_sample *sample __maybe_unused,
612 struct machine *machine __maybe_unused)
613{
614 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
615 int ret;
616
617 pthread_mutex_lock(&synth_lock);
618 ret = process_synthesized_event(tool, event, sample, machine);
619 pthread_mutex_unlock(&synth_lock);
620 return ret;
621}
622
a5830532 623static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
d37f1586
ACM
624{
625 struct record *rec = to;
626
5d7f4116 627 if (record__comp_enabled(rec)) {
75f5f1fc 628 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
5d7f4116
AB
629 bf = map->data;
630 }
631
396b626b 632 thread->samples++;
ded2b8fe 633 return record__write(rec, map, bf, size);
d37f1586
ACM
634}
635
2dd6d8a1
AH
636static volatile int signr = -1;
637static volatile int child_finished;
da231338
AM
638#ifdef HAVE_EVENTFD_SUPPORT
639static int done_fd = -1;
640#endif
c0bdc1c4 641
2dd6d8a1
AH
642static void sig_handler(int sig)
643{
644 if (sig == SIGCHLD)
645 child_finished = 1;
646 else
647 signr = sig;
648
649 done = 1;
da231338
AM
650#ifdef HAVE_EVENTFD_SUPPORT
651{
652 u64 tmp = 1;
653 /*
654 * It is possible for this signal handler to run after done is checked
655 * in the main loop, but before the perf counter fds are polled. If this
656 * happens, the poll() will continue to wait even though done is set,
657 * and will only break out if either another signal is received, or the
658 * counters are ready for read. To ensure the poll() doesn't sleep when
659 * done is set, use an eventfd (done_fd) to wake up the poll().
660 */
661 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
662 pr_err("failed to signal wakeup fd, error: %m\n");
663}
664#endif // HAVE_EVENTFD_SUPPORT
2dd6d8a1
AH
665}
666
a074865e
WN
667static void sigsegv_handler(int sig)
668{
669 perf_hooks__recover();
670 sighandler_dump_stack(sig);
671}
672
2dd6d8a1
AH
673static void record__sig_exit(void)
674{
675 if (signr == -1)
676 return;
677
678 signal(signr, SIG_DFL);
679 raise(signr);
680}
681
e31f0d01
AH
682#ifdef HAVE_AUXTRACE_SUPPORT
683
ef149c25 684static int record__process_auxtrace(struct perf_tool *tool,
a5830532 685 struct mmap *map,
ef149c25
AH
686 union perf_event *event, void *data1,
687 size_t len1, void *data2, size_t len2)
688{
689 struct record *rec = container_of(tool, struct record, tool);
8ceb41d7 690 struct perf_data *data = &rec->data;
ef149c25
AH
691 size_t padding;
692 u8 pad[8] = {0};
693
46e201ef 694 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
99fa2984 695 off_t file_offset;
8ceb41d7 696 int fd = perf_data__fd(data);
99fa2984
AH
697 int err;
698
699 file_offset = lseek(fd, 0, SEEK_CUR);
700 if (file_offset == -1)
701 return -1;
702 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
703 event, file_offset);
704 if (err)
705 return err;
706 }
707
ef149c25
AH
708 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
709 padding = (len1 + len2) & 7;
710 if (padding)
711 padding = 8 - padding;
712
ded2b8fe
JO
713 record__write(rec, map, event, event->header.size);
714 record__write(rec, map, data1, len1);
ef149c25 715 if (len2)
ded2b8fe
JO
716 record__write(rec, map, data2, len2);
717 record__write(rec, map, &pad, padding);
ef149c25
AH
718
719 return 0;
720}
721
722static int record__auxtrace_mmap_read(struct record *rec,
a5830532 723 struct mmap *map)
ef149c25
AH
724{
725 int ret;
726
e035f4ca 727 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
ef149c25
AH
728 record__process_auxtrace);
729 if (ret < 0)
730 return ret;
731
732 if (ret)
733 rec->samples++;
734
735 return 0;
736}
737
2dd6d8a1 738static int record__auxtrace_mmap_read_snapshot(struct record *rec,
a5830532 739 struct mmap *map)
2dd6d8a1
AH
740{
741 int ret;
742
e035f4ca 743 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
2dd6d8a1
AH
744 record__process_auxtrace,
745 rec->opts.auxtrace_snapshot_size);
746 if (ret < 0)
747 return ret;
748
749 if (ret)
750 rec->samples++;
751
752 return 0;
753}
754
755static int record__auxtrace_read_snapshot_all(struct record *rec)
756{
757 int i;
758 int rc = 0;
759
c976ee11 760 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
a5830532 761 struct mmap *map = &rec->evlist->mmap[i];
2dd6d8a1 762
e035f4ca 763 if (!map->auxtrace_mmap.base)
2dd6d8a1
AH
764 continue;
765
e035f4ca 766 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
2dd6d8a1
AH
767 rc = -1;
768 goto out;
769 }
770 }
771out:
772 return rc;
773}
774
ce7b0e42 775static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
2dd6d8a1
AH
776{
777 pr_debug("Recording AUX area tracing snapshot\n");
778 if (record__auxtrace_read_snapshot_all(rec) < 0) {
5f9cf599 779 trigger_error(&auxtrace_snapshot_trigger);
2dd6d8a1 780 } else {
ce7b0e42 781 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
5f9cf599
WN
782 trigger_error(&auxtrace_snapshot_trigger);
783 else
784 trigger_ready(&auxtrace_snapshot_trigger);
2dd6d8a1
AH
785 }
786}
787
ce7b0e42
AS
788static int record__auxtrace_snapshot_exit(struct record *rec)
789{
790 if (trigger_is_error(&auxtrace_snapshot_trigger))
791 return 0;
792
793 if (!auxtrace_record__snapshot_started &&
794 auxtrace_record__snapshot_start(rec->itr))
795 return -1;
796
797 record__read_auxtrace_snapshot(rec, true);
798 if (trigger_is_error(&auxtrace_snapshot_trigger))
799 return -1;
800
801 return 0;
802}
803
4b5ea3bd
AH
804static int record__auxtrace_init(struct record *rec)
805{
806 int err;
807
b5f2511d
AB
808 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
809 && record__threads_enabled(rec)) {
810 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
811 return -EINVAL;
812 }
813
4b5ea3bd
AH
814 if (!rec->itr) {
815 rec->itr = auxtrace_record__init(rec->evlist, &err);
816 if (err)
817 return err;
818 }
819
820 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
821 rec->opts.auxtrace_snapshot_opts);
822 if (err)
823 return err;
824
c0a6de06
AH
825 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
826 rec->opts.auxtrace_sample_opts);
827 if (err)
828 return err;
829
d58b3f7e
AH
830 auxtrace_regroup_aux_output(rec->evlist);
831
4b5ea3bd
AH
832 return auxtrace_parse_filters(rec->evlist);
833}
834
e31f0d01
AH
835#else
836
837static inline
838int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
a5830532 839 struct mmap *map __maybe_unused)
e31f0d01
AH
840{
841 return 0;
842}
843
2dd6d8a1 844static inline
ce7b0e42
AS
845void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
846 bool on_exit __maybe_unused)
de9ac07b 847{
f7b7c26e
PZ
848}
849
2dd6d8a1
AH
850static inline
851int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
f7b7c26e 852{
2dd6d8a1 853 return 0;
de9ac07b
PZ
854}
855
ce7b0e42
AS
856static inline
857int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
858{
859 return 0;
860}
861
4b5ea3bd
AH
862static int record__auxtrace_init(struct record *rec __maybe_unused)
863{
864 return 0;
865}
866
2dd6d8a1
AH
867#endif
868
246eba8e
AH
869static int record__config_text_poke(struct evlist *evlist)
870{
871 struct evsel *evsel;
872 int err;
873
874 /* Nothing to do if text poke is already configured */
875 evlist__for_each_entry(evlist, evsel) {
876 if (evsel->core.attr.text_poke)
877 return 0;
878 }
879
880 err = parse_events(evlist, "dummy:u", NULL);
881 if (err)
882 return err;
883
884 evsel = evlist__last(evlist);
885
886 evsel->core.attr.freq = 0;
887 evsel->core.attr.sample_period = 1;
888 evsel->core.attr.text_poke = 1;
889 evsel->core.attr.ksymbol = 1;
890
891 evsel->core.system_wide = true;
892 evsel->no_aux_samples = true;
893 evsel->immediate = true;
894
895 /* Text poke must be collected on all CPUs */
896 perf_cpu_map__put(evsel->core.own_cpus);
897 evsel->core.own_cpus = perf_cpu_map__new(NULL);
898 perf_cpu_map__put(evsel->core.cpus);
899 evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
900
901 evsel__set_sample_bit(evsel, TIME);
902
903 return 0;
904}
905
eeb399b5
AH
906static bool record__kcore_readable(struct machine *machine)
907{
908 char kcore[PATH_MAX];
909 int fd;
910
911 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
912
913 fd = open(kcore, O_RDONLY);
914 if (fd < 0)
915 return false;
916
917 close(fd);
918
919 return true;
920}
921
922static int record__kcore_copy(struct machine *machine, struct perf_data *data)
923{
924 char from_dir[PATH_MAX];
925 char kcore_dir[PATH_MAX];
926 int ret;
927
928 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
929
930 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
931 if (ret)
932 return ret;
933
934 return kcore_copy(from_dir, kcore_dir);
935}
936
415ccb58
AB
937static void record__thread_data_init_pipes(struct record_thread *thread_data)
938{
939 thread_data->pipes.msg[0] = -1;
940 thread_data->pipes.msg[1] = -1;
941 thread_data->pipes.ack[0] = -1;
942 thread_data->pipes.ack[1] = -1;
943}
944
945static int record__thread_data_open_pipes(struct record_thread *thread_data)
946{
947 if (pipe(thread_data->pipes.msg))
948 return -EINVAL;
949
950 if (pipe(thread_data->pipes.ack)) {
951 close(thread_data->pipes.msg[0]);
952 thread_data->pipes.msg[0] = -1;
953 close(thread_data->pipes.msg[1]);
954 thread_data->pipes.msg[1] = -1;
955 return -EINVAL;
956 }
957
958 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
959 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
960 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
961
962 return 0;
963}
964
965static void record__thread_data_close_pipes(struct record_thread *thread_data)
966{
967 if (thread_data->pipes.msg[0] != -1) {
968 close(thread_data->pipes.msg[0]);
969 thread_data->pipes.msg[0] = -1;
970 }
971 if (thread_data->pipes.msg[1] != -1) {
972 close(thread_data->pipes.msg[1]);
973 thread_data->pipes.msg[1] = -1;
974 }
975 if (thread_data->pipes.ack[0] != -1) {
976 close(thread_data->pipes.ack[0]);
977 thread_data->pipes.ack[0] = -1;
978 }
979 if (thread_data->pipes.ack[1] != -1) {
980 close(thread_data->pipes.ack[1]);
981 thread_data->pipes.ack[1] = -1;
982 }
983}
984
985static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
986{
987 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
988 struct mmap *mmap = evlist->mmap;
989 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
0df6ade7 990 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
415ccb58
AB
991
992 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
993 thread_data->mask->maps.nbits);
994 if (mmap) {
995 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
996 if (!thread_data->maps)
997 return -ENOMEM;
998 }
999 if (overwrite_mmap) {
1000 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1001 if (!thread_data->overwrite_maps) {
1002 zfree(&thread_data->maps);
1003 return -ENOMEM;
1004 }
1005 }
1006 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1007 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1008
1009 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1010 if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
1011 if (thread_data->maps) {
1012 thread_data->maps[tm] = &mmap[m];
1013 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1014 thread_data, cpus->map[m].cpu, tm, m);
1015 }
1016 if (thread_data->overwrite_maps) {
1017 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1018 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1019 thread_data, cpus->map[m].cpu, tm, m);
1020 }
1021 tm++;
1022 }
1023 }
1024
1025 return 0;
1026}
1027
1028static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1029{
1030 int f, tm, pos;
1031 struct mmap *map, *overwrite_map;
1032
1033 fdarray__init(&thread_data->pollfd, 64);
1034
1035 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1036 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1037 overwrite_map = thread_data->overwrite_maps ?
1038 thread_data->overwrite_maps[tm] : NULL;
1039
1040 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1041 void *ptr = evlist->core.pollfd.priv[f].ptr;
1042
1043 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1044 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1045 &evlist->core.pollfd);
1046 if (pos < 0)
1047 return pos;
1048 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1049 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1050 }
1051 }
1052 }
1053
1054 return 0;
1055}
1056
1057static void record__free_thread_data(struct record *rec)
1058{
1059 int t;
1060 struct record_thread *thread_data = rec->thread_data;
1061
1062 if (thread_data == NULL)
1063 return;
1064
1065 for (t = 0; t < rec->nr_threads; t++) {
1066 record__thread_data_close_pipes(&thread_data[t]);
1067 zfree(&thread_data[t].maps);
1068 zfree(&thread_data[t].overwrite_maps);
1069 fdarray__exit(&thread_data[t].pollfd);
1070 }
1071
1072 zfree(&rec->thread_data);
1073}
1074
1075static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1076{
1077 int t, ret;
1078 struct record_thread *thread_data;
1079
1080 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1081 if (!rec->thread_data) {
1082 pr_err("Failed to allocate thread data\n");
1083 return -ENOMEM;
1084 }
1085 thread_data = rec->thread_data;
1086
1087 for (t = 0; t < rec->nr_threads; t++)
1088 record__thread_data_init_pipes(&thread_data[t]);
1089
1090 for (t = 0; t < rec->nr_threads; t++) {
1091 thread_data[t].rec = rec;
1092 thread_data[t].mask = &rec->thread_masks[t];
1093 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1094 if (ret) {
1095 pr_err("Failed to initialize thread[%d] maps\n", t);
1096 goto out_free;
1097 }
1098 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1099 if (ret) {
1100 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1101 goto out_free;
1102 }
1103 if (t) {
1104 thread_data[t].tid = -1;
1105 ret = record__thread_data_open_pipes(&thread_data[t]);
1106 if (ret) {
1107 pr_err("Failed to open thread[%d] communication pipes\n", t);
1108 goto out_free;
1109 }
1110 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1111 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1112 if (ret < 0) {
1113 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1114 goto out_free;
1115 }
1116 thread_data[t].ctlfd_pos = ret;
1117 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1118 thread_data, thread_data[t].ctlfd_pos,
1119 thread_data[t].pipes.msg[0]);
1120 } else {
1121 thread_data[t].tid = gettid();
1122 if (evlist->ctl_fd.pos == -1)
1123 continue;
1124 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1125 &evlist->core.pollfd);
1126 if (ret < 0) {
1127 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1128 goto out_free;
1129 }
1130 thread_data[t].ctlfd_pos = ret;
1131 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1132 thread_data, thread_data[t].ctlfd_pos,
1133 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1134 }
1135 }
1136
1137 return 0;
1138
1139out_free:
1140 record__free_thread_data(rec);
1141
1142 return ret;
1143}
1144
cda57a8c 1145static int record__mmap_evlist(struct record *rec,
63503dba 1146 struct evlist *evlist)
cda57a8c 1147{
56f735ff 1148 int i, ret;
cda57a8c 1149 struct record_opts *opts = &rec->opts;
c0a6de06
AH
1150 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1151 opts->auxtrace_sample_mode;
cda57a8c
WN
1152 char msg[512];
1153
f13de660
AB
1154 if (opts->affinity != PERF_AFFINITY_SYS)
1155 cpu__setup_cpunode_map();
1156
9521b5f2 1157 if (evlist__mmap_ex(evlist, opts->mmap_pages,
cda57a8c 1158 opts->auxtrace_mmap_pages,
c0a6de06 1159 auxtrace_overwrite,
470530bb 1160 opts->nr_cblocks, opts->affinity,
51255a8a 1161 opts->mmap_flush, opts->comp_level) < 0) {
cda57a8c
WN
1162 if (errno == EPERM) {
1163 pr_err("Permission error mapping pages.\n"
1164 "Consider increasing "
1165 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1166 "or try again with a smaller value of -m/--mmap_pages.\n"
1167 "(current value: %u,%u)\n",
1168 opts->mmap_pages, opts->auxtrace_mmap_pages);
1169 return -errno;
1170 } else {
1171 pr_err("failed to mmap with %d (%s)\n", errno,
c8b5f2c9 1172 str_error_r(errno, msg, sizeof(msg)));
cda57a8c
WN
1173 if (errno)
1174 return -errno;
1175 else
1176 return -EINVAL;
1177 }
1178 }
415ccb58
AB
1179
1180 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1181 return -1;
1182
1183 ret = record__alloc_thread_data(rec, evlist);
1184 if (ret)
1185 return ret;
1186
56f735ff
AB
1187 if (record__threads_enabled(rec)) {
1188 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
65e7c963
AB
1189 if (ret) {
1190 pr_err("Failed to create data directory: %s\n", strerror(-ret));
56f735ff 1191 return ret;
65e7c963 1192 }
56f735ff
AB
1193 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1194 if (evlist->mmap)
1195 evlist->mmap[i].file = &rec->data.dir.files[i];
1196 if (evlist->overwrite_mmap)
1197 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1198 }
1199 }
1200
cda57a8c
WN
1201 return 0;
1202}
1203
1204static int record__mmap(struct record *rec)
1205{
1206 return record__mmap_evlist(rec, rec->evlist);
1207}
1208
8c6f45a7 1209static int record__open(struct record *rec)
dd7927f4 1210{
d6195a6a 1211 char msg[BUFSIZ];
32dcd021 1212 struct evsel *pos;
63503dba 1213 struct evlist *evlist = rec->evlist;
d20deb64 1214 struct perf_session *session = rec->session;
b4006796 1215 struct record_opts *opts = &rec->opts;
8d3eca20 1216 int rc = 0;
dd7927f4 1217
d3dbf43c 1218 /*
b91e5492
KL
1219 * For initial_delay, system wide or a hybrid system, we need to add a
1220 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1221 * of waiting or event synthesis.
d3dbf43c 1222 */
b91e5492
KL
1223 if (opts->initial_delay || target__has_cpu(&opts->target) ||
1224 perf_pmu__has_hybrid()) {
e80db255 1225 pos = evlist__get_tracking_event(evlist);
442ad225
AH
1226 if (!evsel__is_dummy_event(pos)) {
1227 /* Set up dummy event. */
facbf0b9 1228 if (evlist__add_dummy(evlist))
442ad225
AH
1229 return -ENOMEM;
1230 pos = evlist__last(evlist);
e80db255 1231 evlist__set_tracking_event(evlist, pos);
442ad225
AH
1232 }
1233
0a892c1c
IR
1234 /*
1235 * Enable the dummy event when the process is forked for
1236 * initial_delay, immediately for system wide.
1237 */
bb07d62e
NK
1238 if (opts->initial_delay && !pos->immediate &&
1239 !target__has_cpu(&opts->target))
0a892c1c
IR
1240 pos->core.attr.enable_on_exec = 1;
1241 else
1242 pos->immediate = 1;
d3dbf43c
ACM
1243 }
1244
78e1bc25 1245 evlist__config(evlist, opts, &callchain_param);
cac21425 1246
e5cadb93 1247 evlist__for_each_entry(evlist, pos) {
dd7927f4 1248try_again:
af663bd0 1249 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
ae430892 1250 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
bb963e16 1251 if (verbose > 0)
c0a54341 1252 ui__warning("%s\n", msg);
d6d901c2
ZY
1253 goto try_again;
1254 }
cf99ad14 1255 if ((errno == EINVAL || errno == EBADF) &&
fba7c866 1256 pos->core.leader != &pos->core &&
cf99ad14 1257 pos->weak_group) {
64b4778b 1258 pos = evlist__reset_weak_group(evlist, pos, true);
cf99ad14
AK
1259 goto try_again;
1260 }
56e52e85 1261 rc = -errno;
2bb72dbb 1262 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
56e52e85 1263 ui__error("%s\n", msg);
8d3eca20 1264 goto out;
c171b552 1265 }
bfd8f72c
AK
1266
1267 pos->supported = true;
c171b552 1268 }
a43d3f08 1269
78e1bc25 1270 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
c8b567c8
ACM
1271 pr_warning(
1272"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1273"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1274"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1275"file is not found in the buildid cache or in the vmlinux path.\n\n"
1276"Samples in kernel modules won't be resolved at all.\n\n"
1277"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1278"even with a suitable vmlinux or kallsyms file.\n\n");
1279 }
1280
24bf91a7 1281 if (evlist__apply_filters(evlist, &pos)) {
62d94b00 1282 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
8ab2e96d 1283 pos->filter, evsel__name(pos), errno,
c8b5f2c9 1284 str_error_r(errno, msg, sizeof(msg)));
8d3eca20 1285 rc = -1;
5d8bb1ec
MP
1286 goto out;
1287 }
1288
cda57a8c
WN
1289 rc = record__mmap(rec);
1290 if (rc)
8d3eca20 1291 goto out;
0a27d7f9 1292
563aecb2 1293 session->evlist = evlist;
7b56cce2 1294 perf_session__set_id_hdr_size(session);
8d3eca20
DA
1295out:
1296 return rc;
16c8a109
PZ
1297}
1298
66286ed3
AH
1299static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1300{
1301 if (rec->evlist->first_sample_time == 0)
1302 rec->evlist->first_sample_time = sample_time;
1303
1304 if (sample_time)
1305 rec->evlist->last_sample_time = sample_time;
1306}
1307
e3d59112
NK
1308static int process_sample_event(struct perf_tool *tool,
1309 union perf_event *event,
1310 struct perf_sample *sample,
32dcd021 1311 struct evsel *evsel,
e3d59112
NK
1312 struct machine *machine)
1313{
1314 struct record *rec = container_of(tool, struct record, tool);
1315
66286ed3 1316 set_timestamp_boundary(rec, sample->time);
e3d59112 1317
68588baf
JY
1318 if (rec->buildid_all)
1319 return 0;
1320
1321 rec->samples++;
e3d59112
NK
1322 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1323}
1324
8c6f45a7 1325static int process_buildids(struct record *rec)
6122e4e4 1326{
f5fc1412 1327 struct perf_session *session = rec->session;
6122e4e4 1328
45112e89 1329 if (perf_data__size(&rec->data) == 0)
9f591fd7
ACM
1330 return 0;
1331
00dc8657
NK
1332 /*
1333 * During this process, it'll load kernel map and replace the
1334 * dso->long_name to a real pathname it found. In this case
1335 * we prefer the vmlinux path like
1336 * /lib/modules/3.16.4/build/vmlinux
1337 *
1338 * rather than build-id path (in debug directory).
1339 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1340 */
1341 symbol_conf.ignore_vmlinux_buildid = true;
1342
6156681b
NK
1343 /*
1344 * If --buildid-all is given, it marks all DSO regardless of hits,
68588baf
JY
1345 * so no need to process samples. But if timestamp_boundary is enabled,
1346 * it still needs to walk on all samples to get the timestamps of
1347 * first/last samples.
6156681b 1348 */
68588baf 1349 if (rec->buildid_all && !rec->timestamp_boundary)
6156681b
NK
1350 rec->tool.sample = NULL;
1351
b7b61cbe 1352 return perf_session__process_events(session);
6122e4e4
ACM
1353}
1354
8115d60c 1355static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
1356{
1357 int err;
45694aa7 1358 struct perf_tool *tool = data;
a1645ce1
ZY
1359 /*
1360 *As for guest kernel when processing subcommand record&report,
1361 *we arrange module mmap prior to guest kernel mmap and trigger
1362 *a preload dso because default guest module symbols are loaded
1363 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1364 *method is used to avoid symbol missing when the first addr is
1365 *in module instead of in guest kernel.
1366 */
45694aa7 1367 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 1368 machine);
a1645ce1
ZY
1369 if (err < 0)
1370 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 1371 " relocation symbol.\n", machine->pid);
a1645ce1 1372
a1645ce1
ZY
1373 /*
1374 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1375 * have no _text sometimes.
1376 */
45694aa7 1377 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 1378 machine);
a1645ce1
ZY
1379 if (err < 0)
1380 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 1381 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
1382}
1383
98402807
FW
1384static struct perf_event_header finished_round_event = {
1385 .size = sizeof(struct perf_event_header),
1386 .type = PERF_RECORD_FINISHED_ROUND,
1387};
1388
a5830532 1389static void record__adjust_affinity(struct record *rec, struct mmap *map)
f13de660
AB
1390{
1391 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
396b626b
AB
1392 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1393 thread->mask->affinity.nbits)) {
1394 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1395 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1396 map->affinity_mask.bits, thread->mask->affinity.nbits);
1397 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1398 (cpu_set_t *)thread->mask->affinity.bits);
1399 if (verbose == 2) {
1400 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1401 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1402 }
f13de660
AB
1403 }
1404}
1405
5d7f4116
AB
1406static size_t process_comp_header(void *record, size_t increment)
1407{
72932371 1408 struct perf_record_compressed *event = record;
5d7f4116
AB
1409 size_t size = sizeof(*event);
1410
1411 if (increment) {
1412 event->header.size += increment;
1413 return increment;
1414 }
1415
1416 event->header.type = PERF_RECORD_COMPRESSED;
1417 event->header.size = size;
1418
1419 return size;
1420}
1421
75f5f1fc
AB
1422static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1423 void *dst, size_t dst_size, void *src, size_t src_size)
5d7f4116
AB
1424{
1425 size_t compressed;
72932371 1426 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
75f5f1fc 1427 struct zstd_data *zstd_data = &session->zstd_data;
5d7f4116 1428
75f5f1fc
AB
1429 if (map && map->file)
1430 zstd_data = &map->zstd_data;
1431
1432 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
5d7f4116
AB
1433 max_record_size, process_comp_header);
1434
610fbc01
AB
1435 if (map && map->file) {
1436 thread->bytes_transferred += src_size;
1437 thread->bytes_compressed += compressed;
1438 } else {
1439 session->bytes_transferred += src_size;
1440 session->bytes_compressed += compressed;
1441 }
5d7f4116
AB
1442
1443 return compressed;
1444}
1445
63503dba 1446static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
470530bb 1447 bool overwrite, bool synch)
98402807 1448{
dcabb507 1449 u64 bytes_written = rec->bytes_written;
0e2e63dd 1450 int i;
8d3eca20 1451 int rc = 0;
396b626b
AB
1452 int nr_mmaps;
1453 struct mmap **maps;
d3d1af6f 1454 int trace_fd = rec->data.file.fd;
ef781128 1455 off_t off = 0;
98402807 1456
cb21686b
WN
1457 if (!evlist)
1458 return 0;
ef149c25 1459
396b626b
AB
1460 nr_mmaps = thread->nr_mmaps;
1461 maps = overwrite ? thread->overwrite_maps : thread->maps;
1462
a4ea0ec4
WN
1463 if (!maps)
1464 return 0;
1465
0b72d69a 1466 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
54cc54de
WN
1467 return 0;
1468
d3d1af6f
AB
1469 if (record__aio_enabled(rec))
1470 off = record__aio_get_pos(trace_fd);
1471
396b626b 1472 for (i = 0; i < nr_mmaps; i++) {
470530bb 1473 u64 flush = 0;
396b626b 1474 struct mmap *map = maps[i];
cb21686b 1475
547740f7 1476 if (map->core.base) {
f13de660 1477 record__adjust_affinity(rec, map);
470530bb 1478 if (synch) {
65aa2e6b
JO
1479 flush = map->core.flush;
1480 map->core.flush = 1;
470530bb 1481 }
d3d1af6f 1482 if (!record__aio_enabled(rec)) {
ef781128 1483 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
470530bb 1484 if (synch)
65aa2e6b 1485 map->core.flush = flush;
d3d1af6f
AB
1486 rc = -1;
1487 goto out;
1488 }
1489 } else {
ef781128 1490 if (record__aio_push(rec, map, &off) < 0) {
d3d1af6f 1491 record__aio_set_pos(trace_fd, off);
470530bb 1492 if (synch)
65aa2e6b 1493 map->core.flush = flush;
d3d1af6f
AB
1494 rc = -1;
1495 goto out;
1496 }
8d3eca20 1497 }
470530bb 1498 if (synch)
65aa2e6b 1499 map->core.flush = flush;
8d3eca20 1500 }
ef149c25 1501
e035f4ca 1502 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
c0a6de06 1503 !rec->opts.auxtrace_sample_mode &&
e035f4ca 1504 record__auxtrace_mmap_read(rec, map) != 0) {
ef149c25
AH
1505 rc = -1;
1506 goto out;
1507 }
98402807
FW
1508 }
1509
d3d1af6f
AB
1510 if (record__aio_enabled(rec))
1511 record__aio_set_pos(trace_fd, off);
1512
dcabb507
JO
1513 /*
1514 * Mark the round finished in case we wrote
1515 * at least one event.
56f735ff
AB
1516 *
1517 * No need for round events in directory mode,
1518 * because per-cpu maps and files have data
1519 * sorted by kernel.
dcabb507 1520 */
56f735ff 1521 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
ded2b8fe 1522 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
8d3eca20 1523
0b72d69a 1524 if (overwrite)
ade9d208 1525 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
8d3eca20
DA
1526out:
1527 return rc;
98402807
FW
1528}
1529
470530bb 1530static int record__mmap_read_all(struct record *rec, bool synch)
cb21686b
WN
1531{
1532 int err;
1533
470530bb 1534 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
cb21686b
WN
1535 if (err)
1536 return err;
1537
470530bb 1538 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
cb21686b
WN
1539}
1540
396b626b
AB
1541static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1542 void *arg __maybe_unused)
1543{
1544 struct perf_mmap *map = fda->priv[fd].ptr;
1545
1546 if (map)
1547 perf_mmap__put(map);
1548}
1549
3217e9fe
AB
1550static void *record__thread(void *arg)
1551{
1552 enum thread_msg msg = THREAD_MSG__READY;
1553 bool terminate = false;
1554 struct fdarray *pollfd;
1555 int err, ctlfd_pos;
1556
1557 thread = arg;
1558 thread->tid = gettid();
1559
1560 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1561 if (err == -1)
1562 pr_warning("threads[%d]: failed to notify on start: %s\n",
1563 thread->tid, strerror(errno));
1564
1565 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1566
1567 pollfd = &thread->pollfd;
1568 ctlfd_pos = thread->ctlfd_pos;
1569
1570 for (;;) {
1571 unsigned long long hits = thread->samples;
1572
1573 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1574 break;
1575
1576 if (hits == thread->samples) {
1577
1578 err = fdarray__poll(pollfd, -1);
1579 /*
1580 * Propagate error, only if there's any. Ignore positive
1581 * number of returned events and interrupt error.
1582 */
1583 if (err > 0 || (err < 0 && errno == EINTR))
1584 err = 0;
1585 thread->waking++;
1586
1587 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1588 record__thread_munmap_filtered, NULL) == 0)
1589 break;
1590 }
1591
1592 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1593 terminate = true;
1594 close(thread->pipes.msg[0]);
1595 thread->pipes.msg[0] = -1;
1596 pollfd->entries[ctlfd_pos].fd = -1;
1597 pollfd->entries[ctlfd_pos].events = 0;
1598 }
1599
1600 pollfd->entries[ctlfd_pos].revents = 0;
1601 }
1602 record__mmap_read_all(thread->rec, true);
1603
1604 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1605 if (err == -1)
1606 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1607 thread->tid, strerror(errno));
1608
1609 return NULL;
1610}
1611
8c6f45a7 1612static void record__init_features(struct record *rec)
57706abc 1613{
57706abc
DA
1614 struct perf_session *session = rec->session;
1615 int feat;
1616
1617 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1618 perf_header__set_feat(&session->header, feat);
1619
1620 if (rec->no_buildid)
1621 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1622
ce9036a6 1623 if (!have_tracepoints(&rec->evlist->core.entries))
57706abc
DA
1624 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1625
1626 if (!rec->opts.branch_stack)
1627 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
ef149c25
AH
1628
1629 if (!rec->opts.full_auxtrace)
1630 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
ffa517ad 1631
cf790516
AB
1632 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1633 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1634
d1e325cf
JO
1635 if (!rec->opts.use_clockid)
1636 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1637
56f735ff
AB
1638 if (!record__threads_enabled(rec))
1639 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1640
42e1fd80
AB
1641 if (!record__comp_enabled(rec))
1642 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
258031c0 1643
ffa517ad 1644 perf_header__clear_feat(&session->header, HEADER_STAT);
57706abc
DA
1645}
1646
e1ab48ba
WN
1647static void
1648record__finish_output(struct record *rec)
1649{
56f735ff 1650 int i;
8ceb41d7
JO
1651 struct perf_data *data = &rec->data;
1652 int fd = perf_data__fd(data);
e1ab48ba 1653
8ceb41d7 1654 if (data->is_pipe)
e1ab48ba
WN
1655 return;
1656
1657 rec->session->header.data_size += rec->bytes_written;
45112e89 1658 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
56f735ff
AB
1659 if (record__threads_enabled(rec)) {
1660 for (i = 0; i < data->dir.nr; i++)
1661 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1662 }
e1ab48ba
WN
1663
1664 if (!rec->no_buildid) {
1665 process_buildids(rec);
1666
1667 if (rec->buildid_all)
1668 dsos__hit_all(rec->session);
1669 }
1670 perf_session__write_header(rec->session, rec->evlist, fd, true);
1671
1672 return;
1673}
1674
4ea648ae 1675static int record__synthesize_workload(struct record *rec, bool tail)
be7b0c9e 1676{
9d6aae72 1677 int err;
9749b90e 1678 struct perf_thread_map *thread_map;
41b740b6 1679 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
be7b0c9e 1680
4ea648ae
WN
1681 if (rec->opts.tail_synthesize != tail)
1682 return 0;
1683
9d6aae72
ACM
1684 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1685 if (thread_map == NULL)
1686 return -1;
1687
1688 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
be7b0c9e
WN
1689 process_synthesized_event,
1690 &rec->session->machines.host,
41b740b6 1691 needs_mmap,
3fcb10e4 1692 rec->opts.sample_address);
7836e52e 1693 perf_thread_map__put(thread_map);
9d6aae72 1694 return err;
be7b0c9e
WN
1695}
1696
4ea648ae 1697static int record__synthesize(struct record *rec, bool tail);
3c1cb7e3 1698
ecfd7a9c
WN
1699static int
1700record__switch_output(struct record *rec, bool at_exit)
1701{
8ceb41d7 1702 struct perf_data *data = &rec->data;
ecfd7a9c 1703 int fd, err;
03724b2e 1704 char *new_filename;
ecfd7a9c
WN
1705
1706 /* Same Size: "2015122520103046"*/
1707 char timestamp[] = "InvalidTimestamp";
1708
d3d1af6f
AB
1709 record__aio_mmap_read_sync(rec);
1710
4ea648ae
WN
1711 record__synthesize(rec, true);
1712 if (target__none(&rec->opts.target))
1713 record__synthesize_workload(rec, true);
1714
ecfd7a9c
WN
1715 rec->samples = 0;
1716 record__finish_output(rec);
1717 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1718 if (err) {
1719 pr_err("Failed to get current timestamp\n");
1720 return -EINVAL;
1721 }
1722
8ceb41d7 1723 fd = perf_data__switch(data, timestamp,
ecfd7a9c 1724 rec->session->header.data_offset,
03724b2e 1725 at_exit, &new_filename);
ecfd7a9c
WN
1726 if (fd >= 0 && !at_exit) {
1727 rec->bytes_written = 0;
1728 rec->session->header.data_size = 0;
1729 }
1730
1731 if (!quiet)
1732 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
2d4f2799 1733 data->path, timestamp);
3c1cb7e3 1734
03724b2e
AK
1735 if (rec->switch_output.num_files) {
1736 int n = rec->switch_output.cur_file + 1;
1737
1738 if (n >= rec->switch_output.num_files)
1739 n = 0;
1740 rec->switch_output.cur_file = n;
1741 if (rec->switch_output.filenames[n]) {
1742 remove(rec->switch_output.filenames[n]);
d8f9da24 1743 zfree(&rec->switch_output.filenames[n]);
03724b2e
AK
1744 }
1745 rec->switch_output.filenames[n] = new_filename;
1746 } else {
1747 free(new_filename);
1748 }
1749
3c1cb7e3 1750 /* Output tracking events */
be7b0c9e 1751 if (!at_exit) {
4ea648ae 1752 record__synthesize(rec, false);
3c1cb7e3 1753
be7b0c9e
WN
1754 /*
1755 * In 'perf record --switch-output' without -a,
1756 * record__synthesize() in record__switch_output() won't
1757 * generate tracking events because there's no thread_map
1758 * in evlist. Which causes newly created perf.data doesn't
1759 * contain map and comm information.
1760 * Create a fake thread_map and directly call
1761 * perf_event__synthesize_thread_map() for those events.
1762 */
1763 if (target__none(&rec->opts.target))
4ea648ae 1764 record__synthesize_workload(rec, false);
be7b0c9e 1765 }
ecfd7a9c
WN
1766 return fd;
1767}
1768
f33cbe72
ACM
1769static volatile int workload_exec_errno;
1770
1771/*
7b392ef0 1772 * evlist__prepare_workload will send a SIGUSR1
f33cbe72
ACM
1773 * if the fork fails, since we asked by setting its
1774 * want_signal to true.
1775 */
45604710
NK
1776static void workload_exec_failed_signal(int signo __maybe_unused,
1777 siginfo_t *info,
f33cbe72
ACM
1778 void *ucontext __maybe_unused)
1779{
1780 workload_exec_errno = info->si_value.sival_int;
1781 done = 1;
f33cbe72
ACM
1782 child_finished = 1;
1783}
1784
2dd6d8a1 1785static void snapshot_sig_handler(int sig);
bfacbe3b 1786static void alarm_sig_handler(int sig);
2dd6d8a1 1787
db0ea13c 1788static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
ee667f94 1789{
b2cb615d 1790 if (evlist) {
547740f7
JO
1791 if (evlist->mmap && evlist->mmap[0].core.base)
1792 return evlist->mmap[0].core.base;
1793 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1794 return evlist->overwrite_mmap[0].core.base;
b2cb615d 1795 }
ee667f94
WN
1796 return NULL;
1797}
1798
c45628b0
WN
1799static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1800{
db0ea13c 1801 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
ee667f94
WN
1802 if (pc)
1803 return pc;
c45628b0
WN
1804 return NULL;
1805}
1806
4ea648ae 1807static int record__synthesize(struct record *rec, bool tail)
c45c86eb
WN
1808{
1809 struct perf_session *session = rec->session;
1810 struct machine *machine = &session->machines.host;
8ceb41d7 1811 struct perf_data *data = &rec->data;
c45c86eb
WN
1812 struct record_opts *opts = &rec->opts;
1813 struct perf_tool *tool = &rec->tool;
c45c86eb 1814 int err = 0;
d99c22ea 1815 event_op f = process_synthesized_event;
c45c86eb 1816
4ea648ae
WN
1817 if (rec->opts.tail_synthesize != tail)
1818 return 0;
1819
8ceb41d7 1820 if (data->is_pipe) {
c3a057dc 1821 err = perf_event__synthesize_for_pipe(tool, session, data,
a2015516 1822 process_synthesized_event);
c3a057dc
NK
1823 if (err < 0)
1824 goto out;
a2015516 1825
c3a057dc 1826 rec->bytes_written += err;
c45c86eb
WN
1827 }
1828
c45628b0 1829 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
46bc29b9
AH
1830 process_synthesized_event, machine);
1831 if (err)
1832 goto out;
1833
c0a6de06 1834 /* Synthesize id_index before auxtrace_info */
61750473 1835 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
c0a6de06
AH
1836 err = perf_event__synthesize_id_index(tool,
1837 process_synthesized_event,
1838 session->evlist, machine);
1839 if (err)
1840 goto out;
1841 }
1842
c45c86eb
WN
1843 if (rec->opts.full_auxtrace) {
1844 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1845 session, process_synthesized_event);
1846 if (err)
1847 goto out;
1848 }
1849
78e1bc25 1850 if (!evlist__exclude_kernel(rec->evlist)) {
6c443954
ACM
1851 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1852 machine);
1853 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1854 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1855 "Check /proc/kallsyms permission or run as root.\n");
1856
1857 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1858 machine);
1859 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1860 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1861 "Check /proc/modules permission or run as root.\n");
1862 }
c45c86eb
WN
1863
1864 if (perf_guest) {
1865 machines__process_guests(&session->machines,
1866 perf_event__synthesize_guest_os, tool);
1867 }
1868
bfd8f72c
AK
1869 err = perf_event__synthesize_extra_attr(&rec->tool,
1870 rec->evlist,
1871 process_synthesized_event,
1872 data->is_pipe);
1873 if (err)
1874 goto out;
1875
03617c22 1876 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
373565d2
AK
1877 process_synthesized_event,
1878 NULL);
1879 if (err < 0) {
1880 pr_err("Couldn't synthesize thread map.\n");
1881 return err;
1882 }
1883
0df6ade7 1884 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus,
373565d2
AK
1885 process_synthesized_event, NULL);
1886 if (err < 0) {
1887 pr_err("Couldn't synthesize cpu map.\n");
1888 return err;
1889 }
1890
e5416950 1891 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
7b612e29
SL
1892 machine, opts);
1893 if (err < 0)
1894 pr_warning("Couldn't synthesize bpf events.\n");
1895
41b740b6
NK
1896 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1897 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1898 machine);
1899 if (err < 0)
1900 pr_warning("Couldn't synthesize cgroup events.\n");
1901 }
ab64069f 1902
d99c22ea
SE
1903 if (rec->opts.nr_threads_synthesize > 1) {
1904 perf_set_multithreaded();
1905 f = process_locked_synthesized_event;
1906 }
1907
41b740b6
NK
1908 if (rec->opts.synth & PERF_SYNTH_TASK) {
1909 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1910
1911 err = __machine__synthesize_threads(machine, tool, &opts->target,
1912 rec->evlist->core.threads,
1913 f, needs_mmap, opts->sample_address,
1914 rec->opts.nr_threads_synthesize);
1915 }
d99c22ea
SE
1916
1917 if (rec->opts.nr_threads_synthesize > 1)
1918 perf_set_singlethreaded();
1919
c45c86eb
WN
1920out:
1921 return err;
1922}
1923
899e5ffb
ACM
1924static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1925{
1926 struct record *rec = data;
1927 pthread_kill(rec->thread_id, SIGUSR2);
1928 return 0;
1929}
1930
23cbb41c
ACM
1931static int record__setup_sb_evlist(struct record *rec)
1932{
1933 struct record_opts *opts = &rec->opts;
1934
1935 if (rec->sb_evlist != NULL) {
1936 /*
1937 * We get here if --switch-output-event populated the
1938 * sb_evlist, so associate a callback that will send a SIGUSR2
1939 * to the main thread.
1940 */
1941 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1942 rec->thread_id = pthread_self();
1943 }
1101c872 1944#ifdef HAVE_LIBBPF_SUPPORT
23cbb41c
ACM
1945 if (!opts->no_bpf_event) {
1946 if (rec->sb_evlist == NULL) {
1947 rec->sb_evlist = evlist__new();
1948
1949 if (rec->sb_evlist == NULL) {
1950 pr_err("Couldn't create side band evlist.\n.");
1951 return -1;
1952 }
1953 }
1954
1955 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1956 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1957 return -1;
1958 }
1959 }
1101c872 1960#endif
08c83997 1961 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
23cbb41c
ACM
1962 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1963 opts->no_bpf_event = true;
1964 }
1965
1966 return 0;
1967}
1968
d1e325cf
JO
1969static int record__init_clock(struct record *rec)
1970{
1971 struct perf_session *session = rec->session;
1972 struct timespec ref_clockid;
1973 struct timeval ref_tod;
1974 u64 ref;
1975
1976 if (!rec->opts.use_clockid)
1977 return 0;
1978
9d88a1a1
JO
1979 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1980 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1981
d1e325cf
JO
1982 session->header.env.clock.clockid = rec->opts.clockid;
1983
1984 if (gettimeofday(&ref_tod, NULL) != 0) {
1985 pr_err("gettimeofday failed, cannot set reference time.\n");
1986 return -1;
1987 }
1988
1989 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1990 pr_err("clock_gettime failed, cannot set reference time.\n");
1991 return -1;
1992 }
1993
1994 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1995 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1996
1997 session->header.env.clock.tod_ns = ref;
1998
1999 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2000 (u64) ref_clockid.tv_nsec;
2001
2002 session->header.env.clock.clockid_ns = ref;
2003 return 0;
2004}
2005
d20aff15
AH
2006static void hit_auxtrace_snapshot_trigger(struct record *rec)
2007{
2008 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2009 trigger_hit(&auxtrace_snapshot_trigger);
2010 auxtrace_record__snapshot_started = 1;
2011 if (auxtrace_record__snapshot_start(rec->itr))
2012 trigger_error(&auxtrace_snapshot_trigger);
2013 }
2014}
2015
91c0f5ec
JY
2016static void record__uniquify_name(struct record *rec)
2017{
2018 struct evsel *pos;
2019 struct evlist *evlist = rec->evlist;
2020 char *new_name;
2021 int ret;
2022
2023 if (!perf_pmu__has_hybrid())
2024 return;
2025
2026 evlist__for_each_entry(evlist, pos) {
2027 if (!evsel__is_hybrid(pos))
2028 continue;
2029
2030 if (strchr(pos->name, '/'))
2031 continue;
2032
2033 ret = asprintf(&new_name, "%s/%s/",
2034 pos->pmu_name, pos->name);
2035 if (ret) {
2036 free(pos->name);
2037 pos->name = new_name;
2038 }
2039 }
2040}
2041
1e5de7d9
AB
2042static int record__terminate_thread(struct record_thread *thread_data)
2043{
2044 int err;
2045 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2046 pid_t tid = thread_data->tid;
2047
2048 close(thread_data->pipes.msg[1]);
2049 thread_data->pipes.msg[1] = -1;
2050 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2051 if (err > 0)
2052 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2053 else
2054 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2055 thread->tid, tid);
2056
2057 return 0;
2058}
2059
396b626b
AB
2060static int record__start_threads(struct record *rec)
2061{
3217e9fe 2062 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
396b626b 2063 struct record_thread *thread_data = rec->thread_data;
3217e9fe
AB
2064 sigset_t full, mask;
2065 pthread_t handle;
2066 pthread_attr_t attrs;
396b626b
AB
2067
2068 thread = &thread_data[0];
2069
3217e9fe
AB
2070 if (!record__threads_enabled(rec))
2071 return 0;
2072
2073 sigfillset(&full);
2074 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2075 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2076 return -1;
2077 }
2078
2079 pthread_attr_init(&attrs);
2080 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2081
2082 for (t = 1; t < nr_threads; t++) {
2083 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2084
2085#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2086 pthread_attr_setaffinity_np(&attrs,
2087 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2088 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2089#endif
2090 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2091 for (tt = 1; tt < t; tt++)
2092 record__terminate_thread(&thread_data[t]);
2093 pr_err("Failed to start threads: %s\n", strerror(errno));
2094 ret = -1;
2095 goto out_err;
2096 }
2097
2098 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2099 if (err > 0)
2100 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2101 thread_msg_tags[msg]);
2102 else
2103 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2104 thread->tid, rec->thread_data[t].tid);
2105 }
2106
2107 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2108 (cpu_set_t *)thread->mask->affinity.bits);
2109
396b626b
AB
2110 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2111
3217e9fe
AB
2112out_err:
2113 pthread_attr_destroy(&attrs);
2114
2115 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2116 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2117 ret = -1;
2118 }
2119
2120 return ret;
396b626b
AB
2121}
2122
2123static int record__stop_threads(struct record *rec)
2124{
2125 int t;
2126 struct record_thread *thread_data = rec->thread_data;
2127
1e5de7d9
AB
2128 for (t = 1; t < rec->nr_threads; t++)
2129 record__terminate_thread(&thread_data[t]);
2130
610fbc01 2131 for (t = 0; t < rec->nr_threads; t++) {
396b626b 2132 rec->samples += thread_data[t].samples;
610fbc01
AB
2133 if (!record__threads_enabled(rec))
2134 continue;
2135 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2136 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2137 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2138 thread_data[t].samples, thread_data[t].waking);
2139 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2140 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2141 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2142 else
2143 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2144 }
396b626b
AB
2145
2146 return 0;
2147}
2148
2149static unsigned long record__waking(struct record *rec)
2150{
2151 int t;
2152 unsigned long waking = 0;
2153 struct record_thread *thread_data = rec->thread_data;
2154
2155 for (t = 0; t < rec->nr_threads; t++)
2156 waking += thread_data[t].waking;
2157
2158 return waking;
2159}
2160
8c6f45a7 2161static int __cmd_record(struct record *rec, int argc, const char **argv)
16c8a109 2162{
57706abc 2163 int err;
45604710 2164 int status = 0;
46be604b 2165 const bool forks = argc > 0;
45694aa7 2166 struct perf_tool *tool = &rec->tool;
b4006796 2167 struct record_opts *opts = &rec->opts;
8ceb41d7 2168 struct perf_data *data = &rec->data;
d20deb64 2169 struct perf_session *session;
6dcf45ef 2170 bool disabled = false, draining = false;
42aa276f 2171 int fd;
d3c8c08e 2172 float ratio = 0;
acce0223 2173 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
de9ac07b 2174
45604710 2175 atexit(record__sig_exit);
f5970550
PZ
2176 signal(SIGCHLD, sig_handler);
2177 signal(SIGINT, sig_handler);
804f7ac7 2178 signal(SIGTERM, sig_handler);
a074865e 2179 signal(SIGSEGV, sigsegv_handler);
c0bdc1c4 2180
f3b3614a
HB
2181 if (rec->opts.record_namespaces)
2182 tool->namespace_events = true;
2183
8fb4b679
NK
2184 if (rec->opts.record_cgroup) {
2185#ifdef HAVE_FILE_HANDLE
2186 tool->cgroup_events = true;
2187#else
2188 pr_err("cgroup tracking is not supported\n");
2189 return -1;
2190#endif
2191 }
2192
dc0c6127 2193 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2dd6d8a1 2194 signal(SIGUSR2, snapshot_sig_handler);
3c1cb7e3
WN
2195 if (rec->opts.auxtrace_snapshot_mode)
2196 trigger_on(&auxtrace_snapshot_trigger);
dc0c6127 2197 if (rec->switch_output.enabled)
3c1cb7e3 2198 trigger_on(&switch_output_trigger);
c0bdc1c4 2199 } else {
2dd6d8a1 2200 signal(SIGUSR2, SIG_IGN);
c0bdc1c4 2201 }
f5970550 2202
2681bd85 2203 session = perf_session__new(data, tool);
6ef81c55 2204 if (IS_ERR(session)) {
ffa91880 2205 pr_err("Perf session creation failed.\n");
6ef81c55 2206 return PTR_ERR(session);
a9a70bbc
ACM
2207 }
2208
b5f2511d
AB
2209 if (record__threads_enabled(rec)) {
2210 if (perf_data__is_pipe(&rec->data)) {
2211 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2212 return -1;
2213 }
2214 if (rec->opts.full_auxtrace) {
2215 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2216 return -1;
2217 }
2218 }
2219
8ceb41d7 2220 fd = perf_data__fd(data);
d20deb64
ACM
2221 rec->session = session;
2222
5d7f4116
AB
2223 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2224 pr_err("Compression initialization failed.\n");
2225 return -1;
2226 }
da231338
AM
2227#ifdef HAVE_EVENTFD_SUPPORT
2228 done_fd = eventfd(0, EFD_NONBLOCK);
2229 if (done_fd < 0) {
2230 pr_err("Failed to create wakeup eventfd, error: %m\n");
2231 status = -1;
2232 goto out_delete_session;
2233 }
e16c2ce7 2234 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
da231338
AM
2235 if (err < 0) {
2236 pr_err("Failed to add wakeup eventfd to poll list\n");
2237 status = err;
2238 goto out_delete_session;
2239 }
2240#endif // HAVE_EVENTFD_SUPPORT
5d7f4116
AB
2241
2242 session->header.env.comp_type = PERF_COMP_ZSTD;
2243 session->header.env.comp_level = rec->opts.comp_level;
2244
eeb399b5
AH
2245 if (rec->opts.kcore &&
2246 !record__kcore_readable(&session->machines.host)) {
2247 pr_err("ERROR: kcore is not readable.\n");
2248 return -1;
2249 }
2250
d1e325cf
JO
2251 if (record__init_clock(rec))
2252 return -1;
2253
8c6f45a7 2254 record__init_features(rec);
330aa675 2255
d4db3f16 2256 if (forks) {
7b392ef0
ACM
2257 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2258 workload_exec_failed_signal);
35b9d88e
ACM
2259 if (err < 0) {
2260 pr_err("Couldn't run the workload!\n");
45604710 2261 status = err;
35b9d88e 2262 goto out_delete_session;
856e9660 2263 }
856e9660
PZ
2264 }
2265
ad46e48c
JO
2266 /*
2267 * If we have just single event and are sending data
2268 * through pipe, we need to force the ids allocation,
2269 * because we synthesize event name through the pipe
2270 * and need the id for that.
2271 */
6484d2f9 2272 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
ad46e48c
JO
2273 rec->opts.sample_id = true;
2274
91c0f5ec
JY
2275 record__uniquify_name(rec);
2276
8c6f45a7 2277 if (record__open(rec) != 0) {
8d3eca20 2278 err = -1;
396b626b 2279 goto out_free_threads;
8d3eca20 2280 }
f6fa4375 2281 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
de9ac07b 2282
eeb399b5
AH
2283 if (rec->opts.kcore) {
2284 err = record__kcore_copy(&session->machines.host, data);
2285 if (err) {
2286 pr_err("ERROR: Failed to copy kcore\n");
396b626b 2287 goto out_free_threads;
eeb399b5
AH
2288 }
2289 }
2290
8690a2a7
WN
2291 err = bpf__apply_obj_config();
2292 if (err) {
2293 char errbuf[BUFSIZ];
2294
2295 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2296 pr_err("ERROR: Apply config to BPF failed: %s\n",
2297 errbuf);
396b626b 2298 goto out_free_threads;
8690a2a7
WN
2299 }
2300
cca8482c
AH
2301 /*
2302 * Normally perf_session__new would do this, but it doesn't have the
2303 * evlist.
2304 */
8cedf3a5 2305 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
cca8482c
AH
2306 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2307 rec->tool.ordered_events = false;
2308 }
2309
3a683120 2310 if (!rec->evlist->core.nr_groups)
a8bb559b
NK
2311 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2312
8ceb41d7 2313 if (data->is_pipe) {
42aa276f 2314 err = perf_header__write_pipe(fd);
529870e3 2315 if (err < 0)
396b626b 2316 goto out_free_threads;
563aecb2 2317 } else {
42aa276f 2318 err = perf_session__write_header(session, rec->evlist, fd, false);
d5eed904 2319 if (err < 0)
396b626b 2320 goto out_free_threads;
56b03f3c
ACM
2321 }
2322
b38d85ef 2323 err = -1;
d3665498 2324 if (!rec->no_buildid
e20960c0 2325 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 2326 pr_err("Couldn't generate buildids. "
e20960c0 2327 "Use --no-buildid to profile anyway.\n");
396b626b 2328 goto out_free_threads;
e20960c0
RR
2329 }
2330
23cbb41c
ACM
2331 err = record__setup_sb_evlist(rec);
2332 if (err)
396b626b 2333 goto out_free_threads;
657ee553 2334
4ea648ae 2335 err = record__synthesize(rec, false);
c45c86eb 2336 if (err < 0)
396b626b 2337 goto out_free_threads;
8d3eca20 2338
d20deb64 2339 if (rec->realtime_prio) {
de9ac07b
PZ
2340 struct sched_param param;
2341
d20deb64 2342 param.sched_priority = rec->realtime_prio;
de9ac07b 2343 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 2344 pr_err("Could not set realtime priority.\n");
8d3eca20 2345 err = -1;
396b626b 2346 goto out_free_threads;
de9ac07b
PZ
2347 }
2348 }
2349
396b626b
AB
2350 if (record__start_threads(rec))
2351 goto out_free_threads;
2352
774cb499
JO
2353 /*
2354 * When perf is starting the traced process, all the events
2355 * (apart from group members) have enable_on_exec=1 set,
2356 * so don't spoil it by prematurely enabling them.
2357 */
6619a53e 2358 if (!target__none(&opts->target) && !opts->initial_delay)
1c87f165 2359 evlist__enable(rec->evlist);
764e16a3 2360
856e9660
PZ
2361 /*
2362 * Let the child rip
2363 */
e803cf97 2364 if (forks) {
20a8a3cf 2365 struct machine *machine = &session->machines.host;
e5bed564 2366 union perf_event *event;
e907caf3 2367 pid_t tgid;
e5bed564
NK
2368
2369 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2370 if (event == NULL) {
2371 err = -ENOMEM;
2372 goto out_child;
2373 }
2374
e803cf97
NK
2375 /*
2376 * Some H/W events are generated before COMM event
2377 * which is emitted during exec(), so perf script
2378 * cannot see a correct process name for those events.
2379 * Synthesize COMM event to prevent it.
2380 */
e907caf3
HB
2381 tgid = perf_event__synthesize_comm(tool, event,
2382 rec->evlist->workload.pid,
2383 process_synthesized_event,
2384 machine);
2385 free(event);
2386
2387 if (tgid == -1)
2388 goto out_child;
2389
2390 event = malloc(sizeof(event->namespaces) +
2391 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2392 machine->id_hdr_size);
2393 if (event == NULL) {
2394 err = -ENOMEM;
2395 goto out_child;
2396 }
2397
2398 /*
2399 * Synthesize NAMESPACES event for the command specified.
2400 */
2401 perf_event__synthesize_namespaces(tool, event,
2402 rec->evlist->workload.pid,
2403 tgid, process_synthesized_event,
2404 machine);
e5bed564 2405 free(event);
e803cf97 2406
7b392ef0 2407 evlist__start_workload(rec->evlist);
e803cf97 2408 }
856e9660 2409
6619a53e 2410 if (opts->initial_delay) {
68cd3b45
AB
2411 pr_info(EVLIST_DISABLED_MSG);
2412 if (opts->initial_delay > 0) {
2413 usleep(opts->initial_delay * USEC_PER_MSEC);
2414 evlist__enable(rec->evlist);
2415 pr_info(EVLIST_ENABLED_MSG);
2416 }
6619a53e
AK
2417 }
2418
5f9cf599 2419 trigger_ready(&auxtrace_snapshot_trigger);
3c1cb7e3 2420 trigger_ready(&switch_output_trigger);
a074865e 2421 perf_hooks__invoke_record_start();
649c48a9 2422 for (;;) {
396b626b 2423 unsigned long long hits = thread->samples;
de9ac07b 2424
05737464
WN
2425 /*
2426 * rec->evlist->bkw_mmap_state is possible to be
2427 * BKW_MMAP_EMPTY here: when done == true and
2428 * hits != rec->samples in previous round.
2429 *
ade9d208 2430 * evlist__toggle_bkw_mmap ensure we never
05737464
WN
2431 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2432 */
2433 if (trigger_is_hit(&switch_output_trigger) || done || draining)
ade9d208 2434 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
05737464 2435
470530bb 2436 if (record__mmap_read_all(rec, false) < 0) {
5f9cf599 2437 trigger_error(&auxtrace_snapshot_trigger);
3c1cb7e3 2438 trigger_error(&switch_output_trigger);
8d3eca20 2439 err = -1;
45604710 2440 goto out_child;
8d3eca20 2441 }
de9ac07b 2442
2dd6d8a1
AH
2443 if (auxtrace_record__snapshot_started) {
2444 auxtrace_record__snapshot_started = 0;
5f9cf599 2445 if (!trigger_is_error(&auxtrace_snapshot_trigger))
ce7b0e42 2446 record__read_auxtrace_snapshot(rec, false);
5f9cf599 2447 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2dd6d8a1
AH
2448 pr_err("AUX area tracing snapshot failed\n");
2449 err = -1;
2450 goto out_child;
2451 }
2452 }
2453
3c1cb7e3 2454 if (trigger_is_hit(&switch_output_trigger)) {
05737464
WN
2455 /*
2456 * If switch_output_trigger is hit, the data in
2457 * overwritable ring buffer should have been collected,
2458 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2459 *
2460 * If SIGUSR2 raise after or during record__mmap_read_all(),
2461 * record__mmap_read_all() didn't collect data from
2462 * overwritable ring buffer. Read again.
2463 */
2464 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2465 continue;
3c1cb7e3
WN
2466 trigger_ready(&switch_output_trigger);
2467
05737464
WN
2468 /*
2469 * Reenable events in overwrite ring buffer after
2470 * record__mmap_read_all(): we should have collected
2471 * data from it.
2472 */
ade9d208 2473 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
05737464 2474
3c1cb7e3
WN
2475 if (!quiet)
2476 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
396b626b
AB
2477 record__waking(rec));
2478 thread->waking = 0;
3c1cb7e3
WN
2479 fd = record__switch_output(rec, false);
2480 if (fd < 0) {
2481 pr_err("Failed to switch to new file\n");
2482 trigger_error(&switch_output_trigger);
2483 err = fd;
2484 goto out_child;
2485 }
bfacbe3b
JO
2486
2487 /* re-arm the alarm */
2488 if (rec->switch_output.time)
2489 alarm(rec->switch_output.time);
3c1cb7e3
WN
2490 }
2491
396b626b 2492 if (hits == thread->samples) {
6dcf45ef 2493 if (done || draining)
649c48a9 2494 break;
396b626b 2495 err = fdarray__poll(&thread->pollfd, -1);
a515114f
JO
2496 /*
2497 * Propagate error, only if there's any. Ignore positive
2498 * number of returned events and interrupt error.
2499 */
2500 if (err > 0 || (err < 0 && errno == EINTR))
45604710 2501 err = 0;
396b626b 2502 thread->waking++;
6dcf45ef 2503
396b626b
AB
2504 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2505 record__thread_munmap_filtered, NULL) == 0)
6dcf45ef 2506 draining = true;
396b626b
AB
2507
2508 evlist__ctlfd_update(rec->evlist,
2509 &thread->pollfd.entries[thread->ctlfd_pos]);
8b412664
PZ
2510 }
2511
acce0223
AB
2512 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2513 switch (cmd) {
d20aff15
AH
2514 case EVLIST_CTL_CMD_SNAPSHOT:
2515 hit_auxtrace_snapshot_trigger(rec);
2516 evlist__ctlfd_ack(rec->evlist);
2517 break;
f186cd61
JO
2518 case EVLIST_CTL_CMD_STOP:
2519 done = 1;
2520 break;
acce0223
AB
2521 case EVLIST_CTL_CMD_ACK:
2522 case EVLIST_CTL_CMD_UNSUPPORTED:
991ae4eb
JO
2523 case EVLIST_CTL_CMD_ENABLE:
2524 case EVLIST_CTL_CMD_DISABLE:
142544a9 2525 case EVLIST_CTL_CMD_EVLIST:
47fddcb4 2526 case EVLIST_CTL_CMD_PING:
acce0223
AB
2527 default:
2528 break;
2529 }
2530 }
2531
774cb499
JO
2532 /*
2533 * When perf is starting the traced process, at the end events
2534 * die with the process and we wait for that. Thus no need to
2535 * disable events in this case.
2536 */
602ad878 2537 if (done && !disabled && !target__none(&opts->target)) {
5f9cf599 2538 trigger_off(&auxtrace_snapshot_trigger);
e74676de 2539 evlist__disable(rec->evlist);
2711926a
JO
2540 disabled = true;
2541 }
de9ac07b 2542 }
ce7b0e42 2543
5f9cf599 2544 trigger_off(&auxtrace_snapshot_trigger);
3c1cb7e3 2545 trigger_off(&switch_output_trigger);
de9ac07b 2546
ce7b0e42
AS
2547 if (opts->auxtrace_snapshot_on_exit)
2548 record__auxtrace_snapshot_exit(rec);
2549
f33cbe72 2550 if (forks && workload_exec_errno) {
3535a696 2551 char msg[STRERR_BUFSIZE], strevsels[2048];
c8b5f2c9 2552 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
3535a696
ACM
2553
2554 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2555
2556 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2557 strevsels, argv[0], emsg);
f33cbe72 2558 err = -1;
45604710 2559 goto out_child;
f33cbe72
ACM
2560 }
2561
e3d59112 2562 if (!quiet)
396b626b
AB
2563 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2564 record__waking(rec));
b44308f5 2565
4ea648ae
WN
2566 if (target__none(&rec->opts.target))
2567 record__synthesize_workload(rec, true);
2568
45604710 2569out_child:
396b626b 2570 record__stop_threads(rec);
470530bb 2571 record__mmap_read_all(rec, true);
396b626b 2572out_free_threads:
415ccb58 2573 record__free_thread_data(rec);
396b626b 2574 evlist__finalize_ctlfd(rec->evlist);
d3d1af6f
AB
2575 record__aio_mmap_read_sync(rec);
2576
d3c8c08e
AB
2577 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2578 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2579 session->header.env.comp_ratio = ratio + 0.5;
2580 }
2581
45604710
NK
2582 if (forks) {
2583 int exit_status;
addc2785 2584
45604710
NK
2585 if (!child_finished)
2586 kill(rec->evlist->workload.pid, SIGTERM);
2587
2588 wait(&exit_status);
2589
2590 if (err < 0)
2591 status = err;
2592 else if (WIFEXITED(exit_status))
2593 status = WEXITSTATUS(exit_status);
2594 else if (WIFSIGNALED(exit_status))
2595 signr = WTERMSIG(exit_status);
2596 } else
2597 status = err;
2598
4ea648ae 2599 record__synthesize(rec, true);
e3d59112
NK
2600 /* this will be recalculated during process_buildids() */
2601 rec->samples = 0;
2602
ecfd7a9c
WN
2603 if (!err) {
2604 if (!rec->timestamp_filename) {
2605 record__finish_output(rec);
2606 } else {
2607 fd = record__switch_output(rec, true);
2608 if (fd < 0) {
2609 status = fd;
2610 goto out_delete_session;
2611 }
2612 }
2613 }
39d17dac 2614
a074865e
WN
2615 perf_hooks__invoke_record_end();
2616
e3d59112
NK
2617 if (!err && !quiet) {
2618 char samples[128];
ecfd7a9c
WN
2619 const char *postfix = rec->timestamp_filename ?
2620 ".<timestamp>" : "";
e3d59112 2621
ef149c25 2622 if (rec->samples && !rec->opts.full_auxtrace)
e3d59112
NK
2623 scnprintf(samples, sizeof(samples),
2624 " (%" PRIu64 " samples)", rec->samples);
2625 else
2626 samples[0] = '\0';
2627
d3c8c08e 2628 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
8ceb41d7 2629 perf_data__size(data) / 1024.0 / 1024.0,
2d4f2799 2630 data->path, postfix, samples);
d3c8c08e
AB
2631 if (ratio) {
2632 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2633 rec->session->bytes_transferred / 1024.0 / 1024.0,
2634 ratio);
2635 }
2636 fprintf(stderr, " ]\n");
e3d59112
NK
2637 }
2638
39d17dac 2639out_delete_session:
da231338
AM
2640#ifdef HAVE_EVENTFD_SUPPORT
2641 if (done_fd >= 0)
2642 close(done_fd);
2643#endif
5d7f4116 2644 zstd_fini(&session->zstd_data);
39d17dac 2645 perf_session__delete(session);
657ee553
SL
2646
2647 if (!opts->no_bpf_event)
08c83997 2648 evlist__stop_sb_thread(rec->sb_evlist);
45604710 2649 return status;
de9ac07b 2650}
0e9b20b8 2651
0883e820 2652static void callchain_debug(struct callchain_param *callchain)
09b0fd45 2653{
aad2b21c 2654 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
a601fdff 2655
0883e820 2656 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
26d33022 2657
0883e820 2658 if (callchain->record_mode == CALLCHAIN_DWARF)
09b0fd45 2659 pr_debug("callchain: stack dump size %d\n",
0883e820 2660 callchain->dump_size);
09b0fd45
JO
2661}
2662
0883e820
ACM
2663int record_opts__parse_callchain(struct record_opts *record,
2664 struct callchain_param *callchain,
2665 const char *arg, bool unset)
09b0fd45 2666{
09b0fd45 2667 int ret;
0883e820 2668 callchain->enabled = !unset;
eb853e80 2669
09b0fd45
JO
2670 /* --no-call-graph */
2671 if (unset) {
0883e820 2672 callchain->record_mode = CALLCHAIN_NONE;
09b0fd45
JO
2673 pr_debug("callchain: disabled\n");
2674 return 0;
2675 }
2676
0883e820 2677 ret = parse_callchain_record_opt(arg, callchain);
5c0cf224
JO
2678 if (!ret) {
2679 /* Enable data address sampling for DWARF unwind. */
0883e820 2680 if (callchain->record_mode == CALLCHAIN_DWARF)
5c0cf224 2681 record->sample_address = true;
0883e820 2682 callchain_debug(callchain);
5c0cf224 2683 }
26d33022
JO
2684
2685 return ret;
2686}
2687
0883e820
ACM
2688int record_parse_callchain_opt(const struct option *opt,
2689 const char *arg,
2690 int unset)
2691{
2692 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2693}
2694
c421e80b 2695int record_callchain_opt(const struct option *opt,
09b0fd45
JO
2696 const char *arg __maybe_unused,
2697 int unset __maybe_unused)
2698{
2ddd5c04 2699 struct callchain_param *callchain = opt->value;
c421e80b 2700
2ddd5c04 2701 callchain->enabled = true;
09b0fd45 2702
2ddd5c04
ACM
2703 if (callchain->record_mode == CALLCHAIN_NONE)
2704 callchain->record_mode = CALLCHAIN_FP;
eb853e80 2705
2ddd5c04 2706 callchain_debug(callchain);
09b0fd45
JO
2707 return 0;
2708}
2709
eb853e80
JO
2710static int perf_record_config(const char *var, const char *value, void *cb)
2711{
7a29c087
NK
2712 struct record *rec = cb;
2713
2714 if (!strcmp(var, "record.build-id")) {
2715 if (!strcmp(value, "cache"))
2716 rec->no_buildid_cache = false;
2717 else if (!strcmp(value, "no-cache"))
2718 rec->no_buildid_cache = true;
2719 else if (!strcmp(value, "skip"))
2720 rec->no_buildid = true;
e29386c8
JO
2721 else if (!strcmp(value, "mmap"))
2722 rec->buildid_mmap = true;
7a29c087
NK
2723 else
2724 return -1;
2725 return 0;
2726 }
cff17205
YX
2727 if (!strcmp(var, "record.call-graph")) {
2728 var = "call-graph.record-mode";
2729 return perf_default_config(var, value, cb);
2730 }
93f20c0f
AB
2731#ifdef HAVE_AIO_SUPPORT
2732 if (!strcmp(var, "record.aio")) {
2733 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2734 if (!rec->opts.nr_cblocks)
2735 rec->opts.nr_cblocks = nr_cblocks_default;
2736 }
2737#endif
9bce13ea
JO
2738 if (!strcmp(var, "record.debuginfod")) {
2739 rec->debuginfod.urls = strdup(value);
2740 if (!rec->debuginfod.urls)
2741 return -ENOMEM;
2742 rec->debuginfod.set = true;
2743 }
eb853e80 2744
cff17205 2745 return 0;
eb853e80
JO
2746}
2747
814c8c38 2748
f4fe11b7
AB
2749static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2750{
2751 struct record_opts *opts = (struct record_opts *)opt->value;
2752
2753 if (unset || !str)
2754 return 0;
2755
2756 if (!strcasecmp(str, "node"))
2757 opts->affinity = PERF_AFFINITY_NODE;
2758 else if (!strcasecmp(str, "cpu"))
2759 opts->affinity = PERF_AFFINITY_CPU;
2760
2761 return 0;
2762}
2763
7954f716
AB
2764static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2765{
2766 mask->nbits = nr_bits;
2767 mask->bits = bitmap_zalloc(mask->nbits);
2768 if (!mask->bits)
2769 return -ENOMEM;
2770
2771 return 0;
2772}
2773
2774static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2775{
2776 bitmap_free(mask->bits);
2777 mask->nbits = 0;
2778}
2779
2780static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2781{
2782 int ret;
2783
2784 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2785 if (ret) {
2786 mask->affinity.bits = NULL;
2787 return ret;
2788 }
2789
2790 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2791 if (ret) {
2792 record__mmap_cpu_mask_free(&mask->maps);
2793 mask->maps.bits = NULL;
2794 }
2795
2796 return ret;
2797}
2798
2799static void record__thread_mask_free(struct thread_mask *mask)
2800{
2801 record__mmap_cpu_mask_free(&mask->maps);
2802 record__mmap_cpu_mask_free(&mask->affinity);
2803}
2804
06380a84
AB
2805static int record__parse_threads(const struct option *opt, const char *str, int unset)
2806{
f466e5ed 2807 int s;
06380a84
AB
2808 struct record_opts *opts = opt->value;
2809
f466e5ed 2810 if (unset || !str || !strlen(str)) {
06380a84 2811 opts->threads_spec = THREAD_SPEC__CPU;
f466e5ed
AB
2812 } else {
2813 for (s = 1; s < THREAD_SPEC__MAX; s++) {
2814 if (s == THREAD_SPEC__USER) {
2815 opts->threads_user_spec = strdup(str);
2816 if (!opts->threads_user_spec)
2817 return -ENOMEM;
2818 opts->threads_spec = THREAD_SPEC__USER;
2819 break;
2820 }
2821 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2822 opts->threads_spec = s;
2823 break;
2824 }
2825 }
2826 }
2827
2828 if (opts->threads_spec == THREAD_SPEC__USER)
2829 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2830 else
2831 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
06380a84
AB
2832
2833 return 0;
2834}
2835
6d575816
JS
2836static int parse_output_max_size(const struct option *opt,
2837 const char *str, int unset)
2838{
2839 unsigned long *s = (unsigned long *)opt->value;
2840 static struct parse_tag tags_size[] = {
2841 { .tag = 'B', .mult = 1 },
2842 { .tag = 'K', .mult = 1 << 10 },
2843 { .tag = 'M', .mult = 1 << 20 },
2844 { .tag = 'G', .mult = 1 << 30 },
2845 { .tag = 0 },
2846 };
2847 unsigned long val;
2848
2849 if (unset) {
2850 *s = 0;
2851 return 0;
2852 }
2853
2854 val = parse_tag_value(str, tags_size);
2855 if (val != (unsigned long) -1) {
2856 *s = val;
2857 return 0;
2858 }
2859
2860 return -1;
2861}
2862
e9db1310
AH
2863static int record__parse_mmap_pages(const struct option *opt,
2864 const char *str,
2865 int unset __maybe_unused)
2866{
2867 struct record_opts *opts = opt->value;
2868 char *s, *p;
2869 unsigned int mmap_pages;
2870 int ret;
2871
2872 if (!str)
2873 return -EINVAL;
2874
2875 s = strdup(str);
2876 if (!s)
2877 return -ENOMEM;
2878
2879 p = strchr(s, ',');
2880 if (p)
2881 *p = '\0';
2882
2883 if (*s) {
25f84702 2884 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
e9db1310
AH
2885 if (ret)
2886 goto out_free;
2887 opts->mmap_pages = mmap_pages;
2888 }
2889
2890 if (!p) {
2891 ret = 0;
2892 goto out_free;
2893 }
2894
25f84702 2895 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
e9db1310
AH
2896 if (ret)
2897 goto out_free;
2898
2899 opts->auxtrace_mmap_pages = mmap_pages;
2900
2901out_free:
2902 free(s);
2903 return ret;
2904}
2905
7248e308
AT
2906void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2907{
2908}
2909
1d078ccb
AB
2910static int parse_control_option(const struct option *opt,
2911 const char *str,
2912 int unset __maybe_unused)
2913{
9864a66d 2914 struct record_opts *opts = opt->value;
1d078ccb 2915
a8fcbd26
AH
2916 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2917}
2918
0c582449
JO
2919static void switch_output_size_warn(struct record *rec)
2920{
9521b5f2 2921 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
0c582449
JO
2922 struct switch_output *s = &rec->switch_output;
2923
2924 wakeup_size /= 2;
2925
2926 if (s->size < wakeup_size) {
2927 char buf[100];
2928
2929 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2930 pr_warning("WARNING: switch-output data size lower than "
2931 "wakeup kernel buffer size (%s) "
2932 "expect bigger perf.data sizes\n", buf);
2933 }
2934}
2935
cb4e1ebb
JO
2936static int switch_output_setup(struct record *rec)
2937{
2938 struct switch_output *s = &rec->switch_output;
dc0c6127
JO
2939 static struct parse_tag tags_size[] = {
2940 { .tag = 'B', .mult = 1 },
2941 { .tag = 'K', .mult = 1 << 10 },
2942 { .tag = 'M', .mult = 1 << 20 },
2943 { .tag = 'G', .mult = 1 << 30 },
2944 { .tag = 0 },
2945 };
bfacbe3b
JO
2946 static struct parse_tag tags_time[] = {
2947 { .tag = 's', .mult = 1 },
2948 { .tag = 'm', .mult = 60 },
2949 { .tag = 'h', .mult = 60*60 },
2950 { .tag = 'd', .mult = 60*60*24 },
2951 { .tag = 0 },
2952 };
dc0c6127 2953 unsigned long val;
cb4e1ebb 2954
899e5ffb
ACM
2955 /*
2956 * If we're using --switch-output-events, then we imply its
2957 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2958 * thread to its parent.
2959 */
b5f2511d
AB
2960 if (rec->switch_output_event_set) {
2961 if (record__threads_enabled(rec)) {
2962 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2963 return 0;
2964 }
899e5ffb 2965 goto do_signal;
b5f2511d 2966 }
899e5ffb 2967
cb4e1ebb
JO
2968 if (!s->set)
2969 return 0;
2970
b5f2511d
AB
2971 if (record__threads_enabled(rec)) {
2972 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
2973 return 0;
2974 }
2975
cb4e1ebb 2976 if (!strcmp(s->str, "signal")) {
899e5ffb 2977do_signal:
cb4e1ebb
JO
2978 s->signal = true;
2979 pr_debug("switch-output with SIGUSR2 signal\n");
dc0c6127
JO
2980 goto enabled;
2981 }
2982
2983 val = parse_tag_value(s->str, tags_size);
2984 if (val != (unsigned long) -1) {
2985 s->size = val;
2986 pr_debug("switch-output with %s size threshold\n", s->str);
2987 goto enabled;
cb4e1ebb
JO
2988 }
2989
bfacbe3b
JO
2990 val = parse_tag_value(s->str, tags_time);
2991 if (val != (unsigned long) -1) {
2992 s->time = val;
2993 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2994 s->str, s->time);
2995 goto enabled;
2996 }
2997
cb4e1ebb 2998 return -1;
dc0c6127
JO
2999
3000enabled:
3001 rec->timestamp_filename = true;
3002 s->enabled = true;
0c582449
JO
3003
3004 if (s->size && !rec->opts.no_buffering)
3005 switch_output_size_warn(rec);
3006
dc0c6127 3007 return 0;
cb4e1ebb
JO
3008}
3009
e5b2c207 3010static const char * const __record_usage[] = {
9e096753
MG
3011 "perf record [<options>] [<command>]",
3012 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
3013 NULL
3014};
e5b2c207 3015const char * const *record_usage = __record_usage;
0e9b20b8 3016
6e0a9b3d
ACM
3017static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3018 struct perf_sample *sample, struct machine *machine)
3019{
3020 /*
3021 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3022 * no need to add them twice.
3023 */
3024 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3025 return 0;
3026 return perf_event__process_mmap(tool, event, sample, machine);
3027}
3028
3029static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3030 struct perf_sample *sample, struct machine *machine)
3031{
3032 /*
3033 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3034 * no need to add them twice.
3035 */
3036 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3037 return 0;
3038
3039 return perf_event__process_mmap2(tool, event, sample, machine);
3040}
3041
66286ed3
AH
3042static int process_timestamp_boundary(struct perf_tool *tool,
3043 union perf_event *event __maybe_unused,
3044 struct perf_sample *sample,
3045 struct machine *machine __maybe_unused)
3046{
3047 struct record *rec = container_of(tool, struct record, tool);
3048
3049 set_timestamp_boundary(rec, sample->time);
3050 return 0;
3051}
3052
41b740b6
NK
3053static int parse_record_synth_option(const struct option *opt,
3054 const char *str,
3055 int unset __maybe_unused)
3056{
3057 struct record_opts *opts = opt->value;
3058 char *p = strdup(str);
3059
3060 if (p == NULL)
3061 return -1;
3062
3063 opts->synth = parse_synth_opt(p);
3064 free(p);
3065
3066 if (opts->synth < 0) {
3067 pr_err("Invalid synth option: %s\n", str);
3068 return -1;
3069 }
3070 return 0;
3071}
3072
d20deb64 3073/*
8c6f45a7
ACM
3074 * XXX Ideally would be local to cmd_record() and passed to a record__new
3075 * because we need to have access to it in record__exit, that is called
d20deb64
ACM
3076 * after cmd_record() exits, but since record_options need to be accessible to
3077 * builtin-script, leave it here.
3078 *
3079 * At least we don't ouch it in all the other functions here directly.
3080 *
3081 * Just say no to tons of global variables, sigh.
3082 */
8c6f45a7 3083static struct record record = {
d20deb64 3084 .opts = {
8affc2b8 3085 .sample_time = true,
d20deb64
ACM
3086 .mmap_pages = UINT_MAX,
3087 .user_freq = UINT_MAX,
3088 .user_interval = ULLONG_MAX,
447a6013 3089 .freq = 4000,
d1cb9fce
NK
3090 .target = {
3091 .uses_mmap = true,
3aa5939d 3092 .default_per_cpu = true,
d1cb9fce 3093 },
470530bb 3094 .mmap_flush = MMAP_FLUSH_DEFAULT,
d99c22ea 3095 .nr_threads_synthesize = 1,
1d078ccb
AB
3096 .ctl_fd = -1,
3097 .ctl_fd_ack = -1,
41b740b6 3098 .synth = PERF_SYNTH_ALL,
d20deb64 3099 },
e3d59112
NK
3100 .tool = {
3101 .sample = process_sample_event,
3102 .fork = perf_event__process_fork,
cca8482c 3103 .exit = perf_event__process_exit,
e3d59112 3104 .comm = perf_event__process_comm,
f3b3614a 3105 .namespaces = perf_event__process_namespaces,
6e0a9b3d
ACM
3106 .mmap = build_id__process_mmap,
3107 .mmap2 = build_id__process_mmap2,
66286ed3
AH
3108 .itrace_start = process_timestamp_boundary,
3109 .aux = process_timestamp_boundary,
cca8482c 3110 .ordered_events = true,
e3d59112 3111 },
d20deb64 3112};
7865e817 3113
76a26549
NK
3114const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3115 "\n\t\t\t\tDefault: fp";
61eaa3be 3116
0aab2136
WN
3117static bool dry_run;
3118
d20deb64
ACM
3119/*
3120 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3121 * with it and switch to use the library functions in perf_evlist that came
b4006796 3122 * from builtin-record.c, i.e. use record_opts,
7b392ef0 3123 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
d20deb64
ACM
3124 * using pipes, etc.
3125 */
efd21307 3126static struct option __record_options[] = {
d20deb64 3127 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 3128 "event selector. use 'perf list' to list available events",
f120f9d5 3129 parse_events_option),
d20deb64 3130 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 3131 "event filter", parse_filter),
4ba1faa1
WN
3132 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3133 NULL, "don't record events from perf itself",
3134 exclude_perf),
bea03405 3135 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 3136 "record events on existing process id"),
bea03405 3137 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 3138 "record events on existing thread id"),
d20deb64 3139 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 3140 "collect data with this RT SCHED_FIFO priority"),
509051ea 3141 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
acac03fa 3142 "collect data without buffering"),
d20deb64 3143 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 3144 "collect raw sample records from all opened counters"),
bea03405 3145 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 3146 "system-wide collection from all CPUs"),
bea03405 3147 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 3148 "list of cpus to monitor"),
d20deb64 3149 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2d4f2799 3150 OPT_STRING('o', "output", &record.data.path, "file",
abaff32a 3151 "output file name"),
69e7e5b0
AH
3152 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3153 &record.opts.no_inherit_set,
3154 "child tasks do not inherit counters"),
4ea648ae
WN
3155 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3156 "synthesize non-sample events at the end of output"),
626a6b78 3157 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
a060c1f1 3158 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
b09c2364
ACM
3159 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3160 "Fail if the specified frequency can't be used"),
67230479
ACM
3161 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3162 "profile at this frequency",
3163 record__parse_freq),
e9db1310
AH
3164 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3165 "number of mmap data pages and AUX area tracing mmap pages",
3166 record__parse_mmap_pages),
470530bb
AB
3167 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3168 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3169 record__mmap_flush_parse),
d20deb64 3170 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 3171 "put the counters into a counter group"),
2ddd5c04 3172 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
09b0fd45
JO
3173 NULL, "enables call-graph recording" ,
3174 &record_callchain_opt),
3175 OPT_CALLBACK(0, "call-graph", &record.opts,
76a26549 3176 "record_mode[,record_size]", record_callchain_help,
09b0fd45 3177 &record_parse_callchain_opt),
c0555642 3178 OPT_INCR('v', "verbose", &verbose,
3da297a6 3179 "be more verbose (show counter open errors, etc)"),
b44308f5 3180 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 3181 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 3182 "per thread counts"),
56100321 3183 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3b0a5daa
KL
3184 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3185 "Record the sample physical addresses"),
542b88fd
KL
3186 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3187 "Record the sampled data address data page size"),
c1de7f3d
KL
3188 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3189 "Record the sampled code address (ip) page size"),
b6f35ed7 3190 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3abebc55
AH
3191 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3192 &record.opts.sample_time_set,
3193 "Record the sample timestamps"),
f290aa1f
JO
3194 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3195 "Record the sample period"),
d20deb64 3196 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 3197 "don't sample"),
d2db9a98
WN
3198 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3199 &record.no_buildid_cache_set,
3200 "do not update the buildid cache"),
3201 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3202 &record.no_buildid_set,
3203 "do not collect buildids in perf.data"),
d20deb64 3204 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
3205 "monitor event in cgroup name only",
3206 parse_cgroups),
68cd3b45
AB
3207 OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3208 "ms to wait before starting measurement after program start (-1: start with events disabled)"),
eeb399b5 3209 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
bea03405
NK
3210 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3211 "user to profile"),
a5aabdac
SE
3212
3213 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3214 "branch any", "sample any taken branches",
3215 parse_branch_stack),
3216
3217 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3218 "branch filter mask", "branch stack filter modes",
bdfebd84 3219 parse_branch_stack),
05484298
AK
3220 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3221 "sample by weight (on special events only)"),
475eeab9
AK
3222 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3223 "sample transaction flags (special events only)"),
3aa5939d
AH
3224 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3225 "use per-thread mmaps"),
bcc84ec6
SE
3226 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3227 "sample selected machine registers on interrupt,"
aeea9062 3228 " use '-I?' to list register names", parse_intr_regs),
84c41742
AK
3229 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3230 "sample selected machine registers on interrupt,"
aeea9062 3231 " use '--user-regs=?' to list register names", parse_user_regs),
85c273d2
AK
3232 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3233 "Record running/enabled time of read (:S) events"),
814c8c38
PZ
3234 OPT_CALLBACK('k', "clockid", &record.opts,
3235 "clockid", "clockid to use for events, see clock_gettime()",
3236 parse_clockid),
2dd6d8a1
AH
3237 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3238 "opts", "AUX area tracing Snapshot Mode", ""),
c0a6de06
AH
3239 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3240 "opts", "sample AUX area", ""),
3fcb10e4 3241 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
9d9cad76 3242 "per thread proc mmap processing timeout in ms"),
f3b3614a
HB
3243 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3244 "Record namespaces events"),
8fb4b679
NK
3245 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3246 "Record cgroup events"),
16b4b4e1
AH
3247 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3248 &record.opts.record_switch_events_set,
3249 "Record context switch events"),
85723885
JO
3250 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3251 "Configure all used events to run in kernel space.",
3252 PARSE_OPT_EXCLUSIVE),
3253 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3254 "Configure all used events to run in user space.",
3255 PARSE_OPT_EXCLUSIVE),
53651b28 3256 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3257 "collect kernel callchains"),
3258 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3259 "collect user callchains"),
71dc2326
WN
3260 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3261 "clang binary to use for compiling BPF scriptlets"),
3262 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3263 "options passed to clang when compiling BPF scriptlets"),
7efe0e03
HK
3264 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3265 "file", "vmlinux pathname"),
6156681b
NK
3266 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3267 "Record build-id of all DSOs regardless of hits"),
e29386c8
JO
3268 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3269 "Record build-id in map events"),
ecfd7a9c
WN
3270 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3271 "append timestamp to output filename"),
68588baf
JY
3272 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3273 "Record timestamp boundary (time of first/last samples)"),
cb4e1ebb 3274 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
c38dab7d
AK
3275 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3276 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
dc0c6127 3277 "signal"),
899e5ffb
ACM
3278 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3279 "switch output event selector. use 'perf list' to list available events",
3280 parse_events_option_new_evlist),
03724b2e
AK
3281 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3282 "Limit number of switch output generated files"),
0aab2136
WN
3283 OPT_BOOLEAN(0, "dry-run", &dry_run,
3284 "Parse options then exit"),
d3d1af6f 3285#ifdef HAVE_AIO_SUPPORT
93f20c0f
AB
3286 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3287 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
d3d1af6f
AB
3288 record__aio_parse),
3289#endif
f4fe11b7
AB
3290 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3291 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3292 record__parse_affinity),
504c1ad1 3293#ifdef HAVE_ZSTD_SUPPORT
b5f2511d
AB
3294 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3295 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
504c1ad1
AB
3296 record__parse_comp_level),
3297#endif
6d575816
JS
3298 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3299 "size", "Limit the maximum size of the output file", parse_output_max_size),
d99c22ea
SE
3300 OPT_UINTEGER(0, "num-thread-synthesize",
3301 &record.opts.nr_threads_synthesize,
3302 "number of threads to run for event synthesis"),
70943490
SE
3303#ifdef HAVE_LIBPFM
3304 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3305 "libpfm4 event selector. use 'perf list' to list available events",
3306 parse_libpfm_events_option),
3307#endif
a8fcbd26 3308 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
d20aff15
AH
3309 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3310 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
a8fcbd26
AH
3311 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3312 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
1d078ccb 3313 parse_control_option),
41b740b6
NK
3314 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3315 "Fine-tune event synthesis: default=all", parse_record_synth_option),
9bce13ea
JO
3316 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3317 &record.debuginfod.set, "debuginfod urls",
3318 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3319 "system"),
06380a84
AB
3320 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3321 "write collected trace data into several data files using parallel threads",
3322 record__parse_threads),
0e9b20b8
IM
3323 OPT_END()
3324};
3325
e5b2c207
NK
3326struct option *record_options = __record_options;
3327
7954f716
AB
3328static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3329{
3330 int c;
3331
3332 for (c = 0; c < cpus->nr; c++)
3333 set_bit(cpus->map[c].cpu, mask->bits);
3334}
3335
f466e5ed
AB
3336static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3337{
3338 struct perf_cpu_map *cpus;
3339
3340 cpus = perf_cpu_map__new(mask_spec);
3341 if (!cpus)
3342 return -ENOMEM;
3343
3344 bitmap_zero(mask->bits, mask->nbits);
3345 record__mmap_cpu_mask_init(mask, cpus);
3346 perf_cpu_map__put(cpus);
3347
3348 return 0;
3349}
3350
7954f716
AB
3351static void record__free_thread_masks(struct record *rec, int nr_threads)
3352{
3353 int t;
3354
3355 if (rec->thread_masks)
3356 for (t = 0; t < nr_threads; t++)
3357 record__thread_mask_free(&rec->thread_masks[t]);
3358
3359 zfree(&rec->thread_masks);
3360}
3361
3362static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3363{
3364 int t, ret;
3365
3366 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3367 if (!rec->thread_masks) {
3368 pr_err("Failed to allocate thread masks\n");
3369 return -ENOMEM;
3370 }
3371
3372 for (t = 0; t < nr_threads; t++) {
3373 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3374 if (ret) {
3375 pr_err("Failed to allocate thread masks[%d]\n", t);
3376 goto out_free;
3377 }
3378 }
3379
3380 return 0;
3381
3382out_free:
3383 record__free_thread_masks(rec, nr_threads);
3384
3385 return ret;
3386}
3387
06380a84
AB
3388static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3389{
3390 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3391
3392 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3393 if (ret)
3394 return ret;
3395
3396 rec->nr_threads = nr_cpus;
3397 pr_debug("nr_threads: %d\n", rec->nr_threads);
3398
3399 for (t = 0; t < rec->nr_threads; t++) {
3400 set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits);
3401 set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits);
3402 if (verbose) {
3403 pr_debug("thread_masks[%d]: ", t);
3404 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3405 pr_debug("thread_masks[%d]: ", t);
3406 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3407 }
3408 }
3409
3410 return 0;
3411}
3412
f466e5ed
AB
3413static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3414 const char **maps_spec, const char **affinity_spec,
3415 u32 nr_spec)
3416{
3417 u32 s;
3418 int ret = 0, t = 0;
3419 struct mmap_cpu_mask cpus_mask;
3420 struct thread_mask thread_mask, full_mask, *thread_masks;
3421
3422 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3423 if (ret) {
3424 pr_err("Failed to allocate CPUs mask\n");
3425 return ret;
3426 }
3427 record__mmap_cpu_mask_init(&cpus_mask, cpus);
3428
3429 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3430 if (ret) {
3431 pr_err("Failed to allocate full mask\n");
3432 goto out_free_cpu_mask;
3433 }
3434
3435 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3436 if (ret) {
3437 pr_err("Failed to allocate thread mask\n");
3438 goto out_free_full_and_cpu_masks;
3439 }
3440
3441 for (s = 0; s < nr_spec; s++) {
3442 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3443 if (ret) {
3444 pr_err("Failed to initialize maps thread mask\n");
3445 goto out_free;
3446 }
3447 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3448 if (ret) {
3449 pr_err("Failed to initialize affinity thread mask\n");
3450 goto out_free;
3451 }
3452
3453 /* ignore invalid CPUs but do not allow empty masks */
3454 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3455 cpus_mask.bits, thread_mask.maps.nbits)) {
3456 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3457 ret = -EINVAL;
3458 goto out_free;
3459 }
3460 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3461 cpus_mask.bits, thread_mask.affinity.nbits)) {
3462 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3463 ret = -EINVAL;
3464 goto out_free;
3465 }
3466
3467 /* do not allow intersection with other masks (full_mask) */
3468 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3469 thread_mask.maps.nbits)) {
3470 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3471 ret = -EINVAL;
3472 goto out_free;
3473 }
3474 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3475 thread_mask.affinity.nbits)) {
3476 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3477 ret = -EINVAL;
3478 goto out_free;
3479 }
3480
3481 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3482 thread_mask.maps.bits, full_mask.maps.nbits);
3483 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3484 thread_mask.affinity.bits, full_mask.maps.nbits);
3485
3486 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3487 if (!thread_masks) {
3488 pr_err("Failed to reallocate thread masks\n");
3489 ret = -ENOMEM;
3490 goto out_free;
3491 }
3492 rec->thread_masks = thread_masks;
3493 rec->thread_masks[t] = thread_mask;
3494 if (verbose) {
3495 pr_debug("thread_masks[%d]: ", t);
3496 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3497 pr_debug("thread_masks[%d]: ", t);
3498 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3499 }
3500 t++;
3501 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3502 if (ret) {
3503 pr_err("Failed to allocate thread mask\n");
3504 goto out_free_full_and_cpu_masks;
3505 }
3506 }
3507 rec->nr_threads = t;
3508 pr_debug("nr_threads: %d\n", rec->nr_threads);
3509 if (!rec->nr_threads)
3510 ret = -EINVAL;
3511
3512out_free:
3513 record__thread_mask_free(&thread_mask);
3514out_free_full_and_cpu_masks:
3515 record__thread_mask_free(&full_mask);
3516out_free_cpu_mask:
3517 record__mmap_cpu_mask_free(&cpus_mask);
3518
3519 return ret;
3520}
3521
3522static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3523{
3524 int ret;
3525 struct cpu_topology *topo;
3526
3527 topo = cpu_topology__new();
3528 if (!topo) {
3529 pr_err("Failed to allocate CPU topology\n");
3530 return -ENOMEM;
3531 }
3532
3533 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3534 topo->core_cpus_list, topo->core_cpus_lists);
3535 cpu_topology__delete(topo);
3536
3537 return ret;
3538}
3539
3540static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3541{
3542 int ret;
3543 struct cpu_topology *topo;
3544
3545 topo = cpu_topology__new();
3546 if (!topo) {
3547 pr_err("Failed to allocate CPU topology\n");
3548 return -ENOMEM;
3549 }
3550
3551 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3552 topo->package_cpus_list, topo->package_cpus_lists);
3553 cpu_topology__delete(topo);
3554
3555 return ret;
3556}
3557
3558static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3559{
3560 u32 s;
3561 int ret;
3562 const char **spec;
3563 struct numa_topology *topo;
3564
3565 topo = numa_topology__new();
3566 if (!topo) {
3567 pr_err("Failed to allocate NUMA topology\n");
3568 return -ENOMEM;
3569 }
3570
3571 spec = zalloc(topo->nr * sizeof(char *));
3572 if (!spec) {
3573 pr_err("Failed to allocate NUMA spec\n");
3574 ret = -ENOMEM;
3575 goto out_delete_topo;
3576 }
3577 for (s = 0; s < topo->nr; s++)
3578 spec[s] = topo->nodes[s].cpus;
3579
3580 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3581
3582 zfree(&spec);
3583
3584out_delete_topo:
3585 numa_topology__delete(topo);
3586
3587 return ret;
3588}
3589
3590static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3591{
3592 int t, ret;
3593 u32 s, nr_spec = 0;
3594 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3595 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3596
3597 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3598 spec = strtok_r(user_spec, ":", &spec_ptr);
3599 if (spec == NULL)
3600 break;
3601 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3602 mask = strtok_r(spec, "/", &mask_ptr);
3603 if (mask == NULL)
3604 break;
3605 pr_debug2(" maps mask: %s\n", mask);
3606 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3607 if (!tmp_spec) {
3608 pr_err("Failed to reallocate maps spec\n");
3609 ret = -ENOMEM;
3610 goto out_free;
3611 }
3612 maps_spec = tmp_spec;
3613 maps_spec[nr_spec] = dup_mask = strdup(mask);
3614 if (!maps_spec[nr_spec]) {
3615 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3616 ret = -ENOMEM;
3617 goto out_free;
3618 }
3619 mask = strtok_r(NULL, "/", &mask_ptr);
3620 if (mask == NULL) {
3621 pr_err("Invalid thread maps or affinity specs\n");
3622 ret = -EINVAL;
3623 goto out_free;
3624 }
3625 pr_debug2(" affinity mask: %s\n", mask);
3626 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3627 if (!tmp_spec) {
3628 pr_err("Failed to reallocate affinity spec\n");
3629 ret = -ENOMEM;
3630 goto out_free;
3631 }
3632 affinity_spec = tmp_spec;
3633 affinity_spec[nr_spec] = strdup(mask);
3634 if (!affinity_spec[nr_spec]) {
3635 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3636 ret = -ENOMEM;
3637 goto out_free;
3638 }
3639 dup_mask = NULL;
3640 nr_spec++;
3641 }
3642
3643 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3644 (const char **)affinity_spec, nr_spec);
3645
3646out_free:
3647 free(dup_mask);
3648 for (s = 0; s < nr_spec; s++) {
3649 if (maps_spec)
3650 free(maps_spec[s]);
3651 if (affinity_spec)
3652 free(affinity_spec[s]);
3653 }
3654 free(affinity_spec);
3655 free(maps_spec);
3656
3657 return ret;
3658}
3659
7954f716
AB
3660static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3661{
3662 int ret;
3663
3664 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3665 if (ret)
3666 return ret;
3667
3668 record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
3669
3670 rec->nr_threads = 1;
3671
3672 return 0;
3673}
3674
3675static int record__init_thread_masks(struct record *rec)
3676{
f466e5ed 3677 int ret = 0;
0df6ade7 3678 struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus;
7954f716 3679
06380a84
AB
3680 if (!record__threads_enabled(rec))
3681 return record__init_thread_default_masks(rec, cpus);
3682
f466e5ed
AB
3683 switch (rec->opts.threads_spec) {
3684 case THREAD_SPEC__CPU:
3685 ret = record__init_thread_cpu_masks(rec, cpus);
3686 break;
3687 case THREAD_SPEC__CORE:
3688 ret = record__init_thread_core_masks(rec, cpus);
3689 break;
3690 case THREAD_SPEC__PACKAGE:
3691 ret = record__init_thread_package_masks(rec, cpus);
3692 break;
3693 case THREAD_SPEC__NUMA:
3694 ret = record__init_thread_numa_masks(rec, cpus);
3695 break;
3696 case THREAD_SPEC__USER:
3697 ret = record__init_thread_user_masks(rec, cpus);
3698 break;
3699 default:
3700 break;
3701 }
3702
3703 return ret;
7954f716
AB
3704}
3705
b0ad8ea6 3706int cmd_record(int argc, const char **argv)
0e9b20b8 3707{
ef149c25 3708 int err;
8c6f45a7 3709 struct record *rec = &record;
16ad2ffb 3710 char errbuf[BUFSIZ];
0e9b20b8 3711
67230479
ACM
3712 setlocale(LC_ALL, "");
3713
48e1cab1
WN
3714#ifndef HAVE_LIBBPF_SUPPORT
3715# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3716 set_nobuild('\0', "clang-path", true);
3717 set_nobuild('\0', "clang-opt", true);
3718# undef set_nobuild
7efe0e03
HK
3719#endif
3720
3721#ifndef HAVE_BPF_PROLOGUE
3722# if !defined (HAVE_DWARF_SUPPORT)
3723# define REASON "NO_DWARF=1"
3724# elif !defined (HAVE_LIBBPF_SUPPORT)
3725# define REASON "NO_LIBBPF=1"
3726# else
3727# define REASON "this architecture doesn't support BPF prologue"
3728# endif
3729# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3730 set_nobuild('\0', "vmlinux", true);
3731# undef set_nobuild
3732# undef REASON
48e1cab1
WN
3733#endif
3734
9d2ed645
AB
3735 rec->opts.affinity = PERF_AFFINITY_SYS;
3736
0f98b11c 3737 rec->evlist = evlist__new();
3e2be2da 3738 if (rec->evlist == NULL)
361c99a6
ACM
3739 return -ENOMEM;
3740
ecc4c561
ACM
3741 err = perf_config(perf_record_config, rec);
3742 if (err)
3743 return err;
eb853e80 3744
bca647aa 3745 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 3746 PARSE_OPT_STOP_AT_NON_OPTION);
68ba3235
NK
3747 if (quiet)
3748 perf_quiet_option();
483635a9 3749
7cc72553
JC
3750 err = symbol__validate_sym_arguments();
3751 if (err)
3752 return err;
3753
9bce13ea
JO
3754 perf_debuginfod_setup(&record.debuginfod);
3755
483635a9 3756 /* Make system wide (-a) the default target. */
602ad878 3757 if (!argc && target__none(&rec->opts.target))
483635a9 3758 rec->opts.target.system_wide = true;
0e9b20b8 3759
bea03405 3760 if (nr_cgroups && !rec->opts.target.system_wide) {
c7118369
NK
3761 usage_with_options_msg(record_usage, record_options,
3762 "cgroup monitoring only available in system-wide mode");
3763
023695d9 3764 }
504c1ad1 3765
e29386c8
JO
3766 if (rec->buildid_mmap) {
3767 if (!perf_can_record_build_id()) {
3768 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3769 err = -EINVAL;
3770 goto out_opts;
3771 }
3772 pr_debug("Enabling build id in mmap2 events.\n");
3773 /* Enable mmap build id synthesizing. */
3774 symbol_conf.buildid_mmap2 = true;
3775 /* Enable perf_event_attr::build_id bit. */
3776 rec->opts.build_id = true;
3777 /* Disable build id cache. */
3778 rec->no_buildid = true;
3779 }
3780
4f2abe91
NK
3781 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3782 pr_err("Kernel has no cgroup sampling support.\n");
3783 err = -EINVAL;
3784 goto out_opts;
3785 }
3786
56f735ff 3787 if (rec->opts.kcore || record__threads_enabled(rec))
eeb399b5
AH
3788 rec->data.is_dir = true;
3789
b5f2511d
AB
3790 if (record__threads_enabled(rec)) {
3791 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3792 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3793 goto out_opts;
3794 }
3795 if (record__aio_enabled(rec)) {
3796 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3797 goto out_opts;
3798 }
3799 }
3800
504c1ad1
AB
3801 if (rec->opts.comp_level != 0) {
3802 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3803 rec->no_buildid = true;
3804 }
3805
b757bb09
AH
3806 if (rec->opts.record_switch_events &&
3807 !perf_can_record_switch_events()) {
c7118369
NK
3808 ui__error("kernel does not support recording context switch events\n");
3809 parse_options_usage(record_usage, record_options, "switch-events", 0);
a8fcbd26
AH
3810 err = -EINVAL;
3811 goto out_opts;
b757bb09 3812 }
023695d9 3813
cb4e1ebb
JO
3814 if (switch_output_setup(rec)) {
3815 parse_options_usage(record_usage, record_options, "switch-output", 0);
a8fcbd26
AH
3816 err = -EINVAL;
3817 goto out_opts;
cb4e1ebb
JO
3818 }
3819
bfacbe3b
JO
3820 if (rec->switch_output.time) {
3821 signal(SIGALRM, alarm_sig_handler);
3822 alarm(rec->switch_output.time);
3823 }
3824
03724b2e
AK
3825 if (rec->switch_output.num_files) {
3826 rec->switch_output.filenames = calloc(sizeof(char *),
3827 rec->switch_output.num_files);
a8fcbd26
AH
3828 if (!rec->switch_output.filenames) {
3829 err = -EINVAL;
3830 goto out_opts;
3831 }
03724b2e
AK
3832 }
3833
b5f2511d
AB
3834 if (rec->timestamp_filename && record__threads_enabled(rec)) {
3835 rec->timestamp_filename = false;
3836 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3837 }
3838
1b36c03e
AH
3839 /*
3840 * Allow aliases to facilitate the lookup of symbols for address
3841 * filters. Refer to auxtrace_parse_filters().
3842 */
3843 symbol_conf.allow_aliases = true;
3844
3845 symbol__init(NULL);
3846
4b5ea3bd 3847 err = record__auxtrace_init(rec);
1b36c03e
AH
3848 if (err)
3849 goto out;
3850
0aab2136 3851 if (dry_run)
5c01ad60 3852 goto out;
0aab2136 3853
d7888573
WN
3854 err = bpf__setup_stdout(rec->evlist);
3855 if (err) {
3856 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3857 pr_err("ERROR: Setup BPF stdout failed: %s\n",
3858 errbuf);
5c01ad60 3859 goto out;
d7888573
WN
3860 }
3861
ef149c25
AH
3862 err = -ENOMEM;
3863
0c1d46a8 3864 if (rec->no_buildid_cache || rec->no_buildid) {
a1ac1d3c 3865 disable_buildid_cache();
dc0c6127 3866 } else if (rec->switch_output.enabled) {
0c1d46a8
WN
3867 /*
3868 * In 'perf record --switch-output', disable buildid
3869 * generation by default to reduce data file switching
3870 * overhead. Still generate buildid if they are required
3871 * explicitly using
3872 *
60437ac0 3873 * perf record --switch-output --no-no-buildid \
0c1d46a8
WN
3874 * --no-no-buildid-cache
3875 *
3876 * Following code equals to:
3877 *
3878 * if ((rec->no_buildid || !rec->no_buildid_set) &&
3879 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3880 * disable_buildid_cache();
3881 */
3882 bool disable = true;
3883
3884 if (rec->no_buildid_set && !rec->no_buildid)
3885 disable = false;
3886 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3887 disable = false;
3888 if (disable) {
3889 rec->no_buildid = true;
3890 rec->no_buildid_cache = true;
3891 disable_buildid_cache();
3892 }
3893 }
655000e7 3894
4ea648ae
WN
3895 if (record.opts.overwrite)
3896 record.opts.tail_synthesize = true;
3897
b53a0755
JY
3898 if (rec->evlist->core.nr_entries == 0) {
3899 if (perf_pmu__has_hybrid()) {
3900 err = evlist__add_default_hybrid(rec->evlist,
3901 !record.opts.no_samples);
3902 } else {
3903 err = __evlist__add_default(rec->evlist,
3904 !record.opts.no_samples);
3905 }
3906
3907 if (err < 0) {
3908 pr_err("Not enough memory for event selector list\n");
3909 goto out;
3910 }
bbd36e5e 3911 }
0e9b20b8 3912
69e7e5b0
AH
3913 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3914 rec->opts.no_inherit = true;
3915
602ad878 3916 err = target__validate(&rec->opts.target);
16ad2ffb 3917 if (err) {
602ad878 3918 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
c3dec27b 3919 ui__warning("%s\n", errbuf);
16ad2ffb
NK
3920 }
3921
602ad878 3922 err = target__parse_uid(&rec->opts.target);
16ad2ffb
NK
3923 if (err) {
3924 int saved_errno = errno;
4bd0f2d2 3925
602ad878 3926 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 3927 ui__error("%s", errbuf);
16ad2ffb
NK
3928
3929 err = -saved_errno;
394c01ed 3930 goto out;
16ad2ffb 3931 }
0d37aa34 3932
ca800068
MZ
3933 /* Enable ignoring missing threads when -u/-p option is defined. */
3934 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
23dc4f15 3935
1d3351e6
JY
3936 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
3937 pr_err("failed to use cpu list %s\n",
3938 rec->opts.target.cpu_list);
3939 goto out;
3940 }
3941
3942 rec->opts.target.hybrid = perf_pmu__has_hybrid();
7248e308
AT
3943
3944 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
3945 arch__add_leaf_frame_record_opts(&rec->opts);
3946
16ad2ffb 3947 err = -ENOMEM;
7748bb71 3948 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
dd7927f4 3949 usage_with_options(record_usage, record_options);
69aad6f1 3950
ef149c25
AH
3951 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
3952 if (err)
394c01ed 3953 goto out;
ef149c25 3954
6156681b
NK
3955 /*
3956 * We take all buildids when the file contains
3957 * AUX area tracing data because we do not decode the
3958 * trace because it would take too long.
3959 */
3960 if (rec->opts.full_auxtrace)
3961 rec->buildid_all = true;
3962
246eba8e
AH
3963 if (rec->opts.text_poke) {
3964 err = record__config_text_poke(rec->evlist);
3965 if (err) {
3966 pr_err("record__config_text_poke failed, error %d\n", err);
3967 goto out;
3968 }
3969 }
3970
b4006796 3971 if (record_opts__config(&rec->opts)) {
39d17dac 3972 err = -EINVAL;
394c01ed 3973 goto out;
7e4ff9e3
MG
3974 }
3975
7954f716
AB
3976 err = record__init_thread_masks(rec);
3977 if (err) {
3978 pr_err("Failed to initialize parallel data streaming masks\n");
3979 goto out;
3980 }
3981
93f20c0f
AB
3982 if (rec->opts.nr_cblocks > nr_cblocks_max)
3983 rec->opts.nr_cblocks = nr_cblocks_max;
5d7f4116 3984 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
d3d1af6f 3985
9d2ed645 3986 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
470530bb 3987 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
9d2ed645 3988
51255a8a
AB
3989 if (rec->opts.comp_level > comp_level_max)
3990 rec->opts.comp_level = comp_level_max;
3991 pr_debug("comp level: %d\n", rec->opts.comp_level);
3992
d20deb64 3993 err = __cmd_record(&record, argc, argv);
394c01ed 3994out:
c12995a5 3995 evlist__delete(rec->evlist);
d65a458b 3996 symbol__exit();
ef149c25 3997 auxtrace_record__free(rec->itr);
a8fcbd26 3998out_opts:
7954f716
AB
3999 record__free_thread_masks(rec, rec->nr_threads);
4000 rec->nr_threads = 0;
ee7fe31e 4001 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
39d17dac 4002 return err;
0e9b20b8 4003}
2dd6d8a1
AH
4004
4005static void snapshot_sig_handler(int sig __maybe_unused)
4006{
dc0c6127
JO
4007 struct record *rec = &record;
4008
d20aff15 4009 hit_auxtrace_snapshot_trigger(rec);
3c1cb7e3 4010
dc0c6127 4011 if (switch_output_signal(rec))
3c1cb7e3 4012 trigger_hit(&switch_output_trigger);
2dd6d8a1 4013}
bfacbe3b
JO
4014
4015static void alarm_sig_handler(int sig __maybe_unused)
4016{
4017 struct record *rec = &record;
4018
4019 if (switch_output_time(rec))
4020 trigger_hit(&switch_output_trigger);
4021}