perf evsel: Introduce mmap support
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
8d06367f 25#include "util/symbol.h"
a12b51c4 26#include "util/cpumap.h"
7c6a1c65 27
97124d5e 28#include <unistd.h>
de9ac07b 29#include <sched.h>
a41794cd 30#include <sys/mman.h>
de9ac07b 31
69aad6f1
ACM
32#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
33
7865e817
FW
34enum write_mode_t {
35 WRITE_FORCE,
36 WRITE_APPEND
37};
38
3de29cab
SE
39static u64 user_interval = ULLONG_MAX;
40static u64 default_interval = 0;
640c03ce 41static u64 sample_type;
a21ca2ca 42
60d567e2 43static struct cpu_map *cpus;
de9ac07b 44static unsigned int page_size;
42e59d7d 45static unsigned int mmap_pages = 128;
f9212819 46static unsigned int user_freq = UINT_MAX;
42e59d7d 47static int freq = 1000;
de9ac07b 48static int output;
529870e3 49static int pipe_output = 0;
23ac9cbe 50static const char *output_name = "perf.data";
42e59d7d 51static int group = 0;
1967936d 52static int realtime_prio = 0;
acac03fa 53static bool nodelay = false;
c0555642 54static bool raw_samples = false;
9c90a61c 55static bool sample_id_all_avail = true;
c0555642 56static bool system_wide = false;
42e59d7d 57static pid_t target_pid = -1;
d6d901c2 58static pid_t target_tid = -1;
5c98d466 59static struct thread_map *threads;
42e59d7d 60static pid_t child_pid = -1;
2e6cdf99 61static bool no_inherit = false;
7865e817 62static enum write_mode_t write_mode = WRITE_FORCE;
c0555642
IM
63static bool call_graph = false;
64static bool inherit_stat = false;
65static bool no_samples = false;
66static bool sample_address = false;
9c90a61c 67static bool sample_time = false;
a1ac1d3c 68static bool no_buildid = false;
baa2f6ce 69static bool no_buildid_cache = false;
361c99a6 70static struct perf_evlist *evsel_list;
42e59d7d
IM
71
72static long samples = 0;
42e59d7d 73static u64 bytes_written = 0;
a21ca2ca 74
42e59d7d 75static int file_new = 1;
6122e4e4 76static off_t post_processing_offset;
7c6a1c65 77
94c744b6 78static struct perf_session *session;
c45c6ea2 79static const char *cpu_list;
f5970550 80
de9ac07b 81struct mmap_data {
a21ca2ca
IM
82 void *base;
83 unsigned int mask;
84 unsigned int prev;
de9ac07b
PZ
85};
86
0e2e63dd 87static struct mmap_data mmap_array[MAX_NR_CPUS];
a21ca2ca 88
9d91a6f7 89static unsigned long mmap_read_head(struct mmap_data *md)
de9ac07b 90{
cdd6c482 91 struct perf_event_mmap_page *pc = md->base;
9d91a6f7 92 long head;
de9ac07b
PZ
93
94 head = pc->data_head;
95 rmb();
96
97 return head;
98}
99
9d91a6f7
PZ
100static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
101{
cdd6c482 102 struct perf_event_mmap_page *pc = md->base;
9d91a6f7
PZ
103
104 /*
105 * ensure all reads are done before we write the tail out.
106 */
107 /* mb(); */
108 pc->data_tail = tail;
109}
110
9215545e
TZ
111static void advance_output(size_t size)
112{
113 bytes_written += size;
114}
115
f5970550
PZ
116static void write_output(void *buf, size_t size)
117{
118 while (size) {
119 int ret = write(output, buf, size);
120
121 if (ret < 0)
122 die("failed to write");
123
124 size -= ret;
125 buf += ret;
126
127 bytes_written += ret;
128 }
129}
130
d8f66248 131static int process_synthesized_event(event_t *event,
640c03ce 132 struct sample_data *sample __used,
d8f66248 133 struct perf_session *self __used)
234fbbf5 134{
6122e4e4 135 write_output(event, event->header.size);
234fbbf5
ACM
136 return 0;
137}
138
de9ac07b
PZ
139static void mmap_read(struct mmap_data *md)
140{
141 unsigned int head = mmap_read_head(md);
142 unsigned int old = md->prev;
143 unsigned char *data = md->base + page_size;
144 unsigned long size;
145 void *buf;
146 int diff;
147
de9ac07b
PZ
148 /*
149 * If we're further behind than half the buffer, there's a chance
2debbc83 150 * the writer will bite our tail and mess up the samples under us.
de9ac07b
PZ
151 *
152 * If we somehow ended up ahead of the head, we got messed up.
153 *
154 * In either case, truncate and restart at head.
155 */
156 diff = head - old;
9d91a6f7 157 if (diff < 0) {
ef365cef 158 fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
de9ac07b
PZ
159 /*
160 * head points to a known good entry, start there.
161 */
162 old = head;
163 }
164
de9ac07b 165 if (old != head)
2debbc83 166 samples++;
de9ac07b
PZ
167
168 size = head - old;
169
170 if ((old & md->mask) + size != (head & md->mask)) {
171 buf = &data[old & md->mask];
172 size = md->mask + 1 - (old & md->mask);
173 old += size;
021e9f47 174
6122e4e4 175 write_output(buf, size);
de9ac07b
PZ
176 }
177
178 buf = &data[old & md->mask];
179 size = head - old;
180 old += size;
021e9f47 181
6122e4e4 182 write_output(buf, size);
de9ac07b
PZ
183
184 md->prev = old;
9d91a6f7 185 mmap_write_tail(md, old);
de9ac07b
PZ
186}
187
188static volatile int done = 0;
f7b7c26e 189static volatile int signr = -1;
de9ac07b 190
16c8a109 191static void sig_handler(int sig)
de9ac07b 192{
16c8a109 193 done = 1;
f7b7c26e
PZ
194 signr = sig;
195}
196
197static void sig_atexit(void)
198{
5ffc8881 199 if (child_pid > 0)
933da83a
CW
200 kill(child_pid, SIGTERM);
201
18483b81 202 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
203 return;
204
205 signal(signr, SIG_DFL);
206 kill(getpid(), signr);
de9ac07b
PZ
207}
208
cdd6c482 209static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
7c6a1c65
PZ
210{
211 struct perf_header_attr *h_attr;
212
94c744b6
ACM
213 if (nr < session->header.attrs) {
214 h_attr = session->header.attr[nr];
7c6a1c65
PZ
215 } else {
216 h_attr = perf_header_attr__new(a);
dc79c0fc 217 if (h_attr != NULL)
94c744b6 218 if (perf_header__add_attr(&session->header, h_attr) < 0) {
11deb1f9
ACM
219 perf_header_attr__delete(h_attr);
220 h_attr = NULL;
221 }
7c6a1c65
PZ
222 }
223
224 return h_attr;
225}
226
361c99a6
ACM
227static void create_counter(struct perf_evlist *evlist,
228 struct perf_evsel *evsel, int cpu)
de9ac07b 229{
69aad6f1
ACM
230 char *filter = evsel->filter;
231 struct perf_event_attr *attr = &evsel->attr;
7c6a1c65 232 struct perf_header_attr *h_attr;
d6d901c2 233 int thread_index;
c171b552 234 int ret;
7c6a1c65
PZ
235 struct {
236 u64 count;
237 u64 time_enabled;
238 u64 time_running;
239 u64 id;
240 } read_data;
dd7927f4
ACM
241
242 for (thread_index = 0; thread_index < threads->nr; thread_index++) {
243 h_attr = get_header_attr(attr, evsel->idx);
244 if (h_attr == NULL)
245 die("nomem\n");
246
247 if (!file_new) {
248 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
249 fprintf(stderr, "incompatible append\n");
250 exit(-1);
251 }
252 }
253
254 if (read(FD(evsel, cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
255 perror("Unable to read perf file descriptor");
256 exit(-1);
257 }
258
259 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
260 pr_warning("Not enough memory to add id\n");
261 exit(-1);
262 }
263
264 assert(FD(evsel, cpu, thread_index) >= 0);
265 fcntl(FD(evsel, cpu, thread_index), F_SETFL, O_NONBLOCK);
266
267 if (evsel->idx || thread_index) {
268 struct perf_evsel *first;
269 first = list_entry(evlist->entries.next, struct perf_evsel, node);
270 ret = ioctl(FD(evsel, cpu, thread_index),
271 PERF_EVENT_IOC_SET_OUTPUT,
272 FD(first, cpu, 0));
273 if (ret) {
274 error("failed to set output: %d (%s)\n", errno,
275 strerror(errno));
276 exit(-1);
277 }
278 } else {
279 mmap_array[cpu].prev = 0;
280 mmap_array[cpu].mask = mmap_pages*page_size - 1;
281 mmap_array[cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
282 PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, cpu, thread_index), 0);
283 if (mmap_array[cpu].base == MAP_FAILED) {
284 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
285 exit(-1);
286 }
287
288 evlist->pollfd[evlist->nr_fds].fd = FD(evsel, cpu, thread_index);
289 evlist->pollfd[evlist->nr_fds].events = POLLIN;
290 evlist->nr_fds++;
291 }
292
293 if (filter != NULL) {
294 ret = ioctl(FD(evsel, cpu, thread_index),
295 PERF_EVENT_IOC_SET_FILTER, filter);
296 if (ret) {
297 error("failed to set filter with %d (%s)\n", errno,
298 strerror(errno));
299 exit(-1);
300 }
301 }
302 }
303
304 if (!sample_type)
305 sample_type = attr->sample_type;
306}
307
308static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
309{
310 struct perf_event_attr *attr = &evsel->attr;
311 int track = !evsel->idx; /* only the first counter needs these */
7c6a1c65
PZ
312
313 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
314 PERF_FORMAT_TOTAL_TIME_RUNNING |
315 PERF_FORMAT_ID;
16c8a109 316
3a9f131f 317 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
3efa1cc9 318
361c99a6 319 if (evlist->nr_entries > 1)
8907fd60
EM
320 attr->sample_type |= PERF_SAMPLE_ID;
321
f9212819
FW
322 /*
323 * We default some events to a 1 default interval. But keep
324 * it a weak assumption overridable by the user.
325 */
326 if (!attr->sample_period || (user_freq != UINT_MAX &&
3de29cab 327 user_interval != ULLONG_MAX)) {
f9212819
FW
328 if (freq) {
329 attr->sample_type |= PERF_SAMPLE_PERIOD;
330 attr->freq = 1;
331 attr->sample_freq = freq;
332 } else {
333 attr->sample_period = default_interval;
334 }
1dba15e7 335 }
3efa1cc9 336
649c48a9
PZ
337 if (no_samples)
338 attr->sample_freq = 0;
339
340 if (inherit_stat)
341 attr->inherit_stat = 1;
342
3af9e859 343 if (sample_address) {
4bba828d 344 attr->sample_type |= PERF_SAMPLE_ADDR;
3af9e859
EM
345 attr->mmap_data = track;
346 }
4bba828d 347
3efa1cc9
IM
348 if (call_graph)
349 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
350
f60f3593
AS
351 if (system_wide)
352 attr->sample_type |= PERF_SAMPLE_CPU;
353
a43d3f08
ACM
354 if (sample_id_all_avail &&
355 (sample_time || system_wide || !no_inherit || cpu_list))
9c90a61c
ACM
356 attr->sample_type |= PERF_SAMPLE_TIME;
357
cd6feeea 358 if (raw_samples) {
6ddf259d 359 attr->sample_type |= PERF_SAMPLE_TIME;
daac07b2 360 attr->sample_type |= PERF_SAMPLE_RAW;
cd6feeea
IM
361 attr->sample_type |= PERF_SAMPLE_CPU;
362 }
f413cdb8 363
acac03fa
KS
364 if (nodelay) {
365 attr->watermark = 0;
366 attr->wakeup_events = 1;
367 }
368
a21ca2ca
IM
369 attr->mmap = track;
370 attr->comm = track;
dd7927f4 371
2e6cdf99 372 if (target_pid == -1 && target_tid == -1 && !system_wide) {
46be604b 373 attr->disabled = 1;
bedbfdea 374 attr->enable_on_exec = 1;
46be604b 375 }
dd7927f4 376}
bedbfdea 377
dd7927f4
ACM
378static void open_counters(struct perf_evlist *evlist)
379{
380 struct perf_evsel *pos;
381 int cpu;
382
383 list_for_each_entry(pos, &evlist->entries, node) {
384 struct perf_event_attr *attr = &pos->attr;
385 /*
386 * Check if parse_single_tracepoint_event has already asked for
387 * PERF_SAMPLE_TIME.
388 *
389 * XXX this is kludgy but short term fix for problems introduced by
390 * eac23d1c that broke 'perf script' by having different sample_types
391 * when using multiple tracepoint events when we use a perf binary
392 * that tries to use sample_id_all on an older kernel.
393 *
394 * We need to move counter creation to perf_session, support
395 * different sample_types, etc.
396 */
397 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
d6d901c2 398
dd7927f4
ACM
399 config_attr(pos, evlist);
400retry_sample_id:
401 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
402try_again:
403 if (perf_evsel__open(pos, cpus, threads, group, !no_inherit) < 0) {
d6d901c2
ZY
404 int err = errno;
405
406 if (err == EPERM || err == EACCES)
407 die("Permission error - are you root?\n"
408 "\t Consider tweaking"
409 " /proc/sys/kernel/perf_event_paranoid.\n");
c45c6ea2 410 else if (err == ENODEV && cpu_list) {
d6d901c2
ZY
411 die("No such device - did you specify"
412 " an out-of-range profile CPU?\n");
9c90a61c
ACM
413 } else if (err == EINVAL && sample_id_all_avail) {
414 /*
415 * Old kernel, no attr->sample_id_type_all field
416 */
417 sample_id_all_avail = false;
a43d3f08 418 if (!sample_time && !raw_samples && !time_needed)
eac23d1c
IM
419 attr->sample_type &= ~PERF_SAMPLE_TIME;
420
9c90a61c 421 goto retry_sample_id;
d6d901c2 422 }
3da297a6 423
d6d901c2
ZY
424 /*
425 * If it's cycles then fall back to hrtimer
426 * based cpu-clock-tick sw counter, which
427 * is always available even if no PMU support:
428 */
429 if (attr->type == PERF_TYPE_HARDWARE
430 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
431
432 if (verbose)
433 warning(" ... trying to fall back to cpu-clock-ticks\n");
434 attr->type = PERF_TYPE_SOFTWARE;
435 attr->config = PERF_COUNT_SW_CPU_CLOCK;
436 goto try_again;
437 }
438 printf("\n");
d9cf837e 439 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
dd7927f4 440 err, strerror(err));
bfd45118
SK
441
442#if defined(__i386__) || defined(__x86_64__)
d6d901c2
ZY
443 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
444 die("No hardware sampling interrupt available."
445 " No APIC? If so then you can boot the kernel"
446 " with the \"lapic\" boot parameter to"
447 " force-enable it.\n");
bfd45118
SK
448#endif
449
d6d901c2 450 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
c171b552
LZ
451 }
452 }
a43d3f08 453
dd7927f4
ACM
454 for (cpu = 0; cpu < cpus->nr; ++cpu) {
455 list_for_each_entry(pos, &evlist->entries, node)
456 create_counter(evlist, pos, cpu);
457 }
16c8a109
PZ
458}
459
6122e4e4
ACM
460static int process_buildids(void)
461{
462 u64 size = lseek(output, 0, SEEK_CUR);
463
9f591fd7
ACM
464 if (size == 0)
465 return 0;
466
6122e4e4
ACM
467 session->fd = output;
468 return __perf_session__process_events(session, post_processing_offset,
469 size - post_processing_offset,
470 size, &build_id__mark_dso_hit_ops);
471}
472
f5970550
PZ
473static void atexit_header(void)
474{
c7929e47
TZ
475 if (!pipe_output) {
476 session->header.data_size += bytes_written;
f5970550 477
baa2f6ce
ACM
478 if (!no_buildid)
479 process_buildids();
361c99a6 480 perf_header__write(&session->header, evsel_list, output, true);
39d17dac 481 perf_session__delete(session);
361c99a6 482 perf_evlist__delete(evsel_list);
d65a458b 483 symbol__exit();
c7929e47 484 }
f5970550
PZ
485}
486
23346f21 487static void event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
488{
489 int err;
23346f21 490 struct perf_session *psession = data;
a1645ce1 491
23346f21 492 if (machine__is_host(machine))
a1645ce1
ZY
493 return;
494
495 /*
496 *As for guest kernel when processing subcommand record&report,
497 *we arrange module mmap prior to guest kernel mmap and trigger
498 *a preload dso because default guest module symbols are loaded
499 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
500 *method is used to avoid symbol missing when the first addr is
501 *in module instead of in guest kernel.
502 */
503 err = event__synthesize_modules(process_synthesized_event,
23346f21 504 psession, machine);
a1645ce1
ZY
505 if (err < 0)
506 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 507 " relocation symbol.\n", machine->pid);
a1645ce1 508
a1645ce1
ZY
509 /*
510 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
511 * have no _text sometimes.
512 */
513 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 514 psession, machine, "_text");
a1645ce1
ZY
515 if (err < 0)
516 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 517 psession, machine, "_stext");
a1645ce1
ZY
518 if (err < 0)
519 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 520 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
521}
522
98402807
FW
523static struct perf_event_header finished_round_event = {
524 .size = sizeof(struct perf_event_header),
525 .type = PERF_RECORD_FINISHED_ROUND,
526};
527
528static void mmap_read_all(void)
529{
0e2e63dd 530 int i;
98402807 531
dd7927f4 532 for (i = 0; i < cpus->nr; i++) {
0e2e63dd
PZ
533 if (mmap_array[i].base)
534 mmap_read(&mmap_array[i]);
98402807
FW
535 }
536
537 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
538 write_output(&finished_round_event, sizeof(finished_round_event));
539}
540
d4db3f16 541static int __cmd_record(int argc, const char **argv)
16c8a109 542{
69aad6f1 543 int i;
abaff32a 544 struct stat st;
abaff32a 545 int flags;
4dc0a04b 546 int err;
8b412664 547 unsigned long waking = 0;
856e9660 548 int child_ready_pipe[2], go_pipe[2];
46be604b 549 const bool forks = argc > 0;
856e9660 550 char buf;
23346f21 551 struct machine *machine;
de9ac07b
PZ
552
553 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 554
f5970550
PZ
555 atexit(sig_atexit);
556 signal(SIGCHLD, sig_handler);
557 signal(SIGINT, sig_handler);
18483b81 558 signal(SIGUSR1, sig_handler);
f5970550 559
d4db3f16 560 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
856e9660
PZ
561 perror("failed to create pipes");
562 exit(-1);
563 }
564
529870e3
TZ
565 if (!strcmp(output_name, "-"))
566 pipe_output = 1;
567 else if (!stat(output_name, &st) && st.st_size) {
7865e817 568 if (write_mode == WRITE_FORCE) {
b38d3464
ACM
569 char oldname[PATH_MAX];
570 snprintf(oldname, sizeof(oldname), "%s.old",
571 output_name);
572 unlink(oldname);
573 rename(output_name, oldname);
266e0e21 574 }
7865e817
FW
575 } else if (write_mode == WRITE_APPEND) {
576 write_mode = WRITE_FORCE;
97124d5e
PZ
577 }
578
f887f301 579 flags = O_CREAT|O_RDWR;
7865e817 580 if (write_mode == WRITE_APPEND)
f5970550 581 file_new = 0;
abaff32a
IM
582 else
583 flags |= O_TRUNC;
584
529870e3
TZ
585 if (pipe_output)
586 output = STDOUT_FILENO;
587 else
588 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
589 if (output < 0) {
590 perror("failed to create output file");
591 exit(-1);
592 }
593
7865e817 594 session = perf_session__new(output_name, O_WRONLY,
21ef97f0 595 write_mode == WRITE_FORCE, false, NULL);
94c744b6 596 if (session == NULL) {
a9a70bbc
ACM
597 pr_err("Not enough memory for reading perf file header\n");
598 return -1;
599 }
600
baa2f6ce
ACM
601 if (!no_buildid)
602 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
603
4dc0a04b 604 if (!file_new) {
8dc58101 605 err = perf_header__read(session, output);
4dc0a04b 606 if (err < 0)
39d17dac 607 goto out_delete_session;
4dc0a04b
ACM
608 }
609
361c99a6 610 if (have_tracepoints(&evsel_list->entries))
94c744b6 611 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
03456a15 612
39d17dac
ACM
613 /*
614 * perf_session__delete(session) will be called at atexit_header()
615 */
f5970550
PZ
616 atexit(atexit_header);
617
d4db3f16 618 if (forks) {
46be604b 619 child_pid = fork();
2fb750e8 620 if (child_pid < 0) {
856e9660
PZ
621 perror("failed to fork");
622 exit(-1);
623 }
7c6a1c65 624
46be604b 625 if (!child_pid) {
529870e3
TZ
626 if (pipe_output)
627 dup2(2, 1);
856e9660
PZ
628 close(child_ready_pipe[0]);
629 close(go_pipe[1]);
630 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
631
632 /*
633 * Do a dummy execvp to get the PLT entry resolved,
634 * so we avoid the resolver overhead on the real
635 * execvp call.
636 */
637 execvp("", (char **)argv);
638
639 /*
640 * Tell the parent we're ready to go
641 */
642 close(child_ready_pipe[1]);
643
644 /*
645 * Wait until the parent tells us to go.
646 */
647 if (read(go_pipe[0], &buf, 1) == -1)
648 perror("unable to read pipe");
649
650 execvp(argv[0], (char **)argv);
651
652 perror(argv[0]);
18483b81 653 kill(getppid(), SIGUSR1);
856e9660 654 exit(-1);
0a5ac846 655 }
856e9660 656
d6d901c2 657 if (!system_wide && target_tid == -1 && target_pid == -1)
5c98d466 658 threads->map[0] = child_pid;
d6d901c2 659
856e9660
PZ
660 close(child_ready_pipe[1]);
661 close(go_pipe[0]);
662 /*
663 * wait for child to settle
664 */
665 if (read(child_ready_pipe[0], &buf, 1) == -1) {
666 perror("unable to read pipe");
667 exit(-1);
668 }
669 close(child_ready_pipe[0]);
670 }
671
dd7927f4 672 open_counters(evsel_list);
de9ac07b 673
640c03ce
ACM
674 perf_session__set_sample_type(session, sample_type);
675
529870e3
TZ
676 if (pipe_output) {
677 err = perf_header__write_pipe(output);
678 if (err < 0)
679 return err;
680 } else if (file_new) {
361c99a6
ACM
681 err = perf_header__write(&session->header, evsel_list,
682 output, false);
d5eed904
ACM
683 if (err < 0)
684 return err;
56b03f3c
ACM
685 }
686
6122e4e4
ACM
687 post_processing_offset = lseek(output, 0, SEEK_CUR);
688
9c90a61c
ACM
689 perf_session__set_sample_id_all(session, sample_id_all_avail);
690
2c46dbb5
TZ
691 if (pipe_output) {
692 err = event__synthesize_attrs(&session->header,
693 process_synthesized_event,
694 session);
695 if (err < 0) {
696 pr_err("Couldn't synthesize attrs.\n");
697 return err;
698 }
cd19a035
TZ
699
700 err = event__synthesize_event_types(process_synthesized_event,
701 session);
702 if (err < 0) {
703 pr_err("Couldn't synthesize event_types.\n");
704 return err;
705 }
9215545e 706
361c99a6 707 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
708 /*
709 * FIXME err <= 0 here actually means that
710 * there were no tracepoints so its not really
711 * an error, just that we don't need to
712 * synthesize anything. We really have to
713 * return this more properly and also
714 * propagate errors that now are calling die()
715 */
361c99a6 716 err = event__synthesize_tracing_data(output, evsel_list,
63e0c771
TZ
717 process_synthesized_event,
718 session);
719 if (err <= 0) {
720 pr_err("Couldn't record tracing data.\n");
721 return err;
722 }
2c9faa06 723 advance_output(err);
63e0c771 724 }
2c46dbb5
TZ
725 }
726
23346f21
ACM
727 machine = perf_session__find_host_machine(session);
728 if (!machine) {
a1645ce1
ZY
729 pr_err("Couldn't find native kernel information.\n");
730 return -1;
731 }
732
56b03f3c 733 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 734 session, machine, "_text");
70162138
ACM
735 if (err < 0)
736 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 737 session, machine, "_stext");
c1a3a4b9
ACM
738 if (err < 0)
739 pr_err("Couldn't record kernel reference relocation symbol\n"
740 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
741 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 742
a1645ce1 743 err = event__synthesize_modules(process_synthesized_event,
23346f21 744 session, machine);
c1a3a4b9
ACM
745 if (err < 0)
746 pr_err("Couldn't record kernel module information.\n"
747 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
748 "Check /proc/modules permission or run as root.\n");
749
a1645ce1 750 if (perf_guest)
23346f21 751 perf_session__process_machines(session, event__synthesize_guest_os);
7c6a1c65 752
cf103a14 753 if (!system_wide)
d6d901c2 754 event__synthesize_thread(target_tid, process_synthesized_event,
d8f66248 755 session);
234fbbf5 756 else
d8f66248 757 event__synthesize_threads(process_synthesized_event, session);
7c6a1c65 758
de9ac07b
PZ
759 if (realtime_prio) {
760 struct sched_param param;
761
762 param.sched_priority = realtime_prio;
763 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 764 pr_err("Could not set realtime priority.\n");
de9ac07b
PZ
765 exit(-1);
766 }
767 }
768
856e9660
PZ
769 /*
770 * Let the child rip
771 */
d4db3f16
ACM
772 if (forks)
773 close(go_pipe[1]);
856e9660 774
649c48a9 775 for (;;) {
2debbc83 776 int hits = samples;
d6d901c2 777 int thread;
de9ac07b 778
98402807 779 mmap_read_all();
de9ac07b 780
649c48a9
PZ
781 if (hits == samples) {
782 if (done)
783 break;
5c581041 784 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
785 waking++;
786 }
787
788 if (done) {
dd7927f4 789 for (i = 0; i < cpus->nr; i++) {
69aad6f1
ACM
790 struct perf_evsel *pos;
791
361c99a6 792 list_for_each_entry(pos, &evsel_list->entries, node) {
d6d901c2 793 for (thread = 0;
5c98d466 794 thread < threads->nr;
d6d901c2 795 thread++)
69aad6f1 796 ioctl(FD(pos, i, thread),
d6d901c2
ZY
797 PERF_EVENT_IOC_DISABLE);
798 }
8b412664 799 }
649c48a9 800 }
de9ac07b
PZ
801 }
802
18483b81 803 if (quiet || signr == SIGUSR1)
b44308f5
ACM
804 return 0;
805
8b412664
PZ
806 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
807
021e9f47
IM
808 /*
809 * Approximate RIP event size: 24 bytes.
810 */
811 fprintf(stderr,
9486aa38 812 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
021e9f47
IM
813 (double)bytes_written / 1024.0 / 1024.0,
814 output_name,
815 bytes_written / 24);
addc2785 816
de9ac07b 817 return 0;
39d17dac
ACM
818
819out_delete_session:
820 perf_session__delete(session);
821 return err;
de9ac07b 822}
0e9b20b8 823
0e9b20b8 824static const char * const record_usage[] = {
9e096753
MG
825 "perf record [<options>] [<command>]",
826 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
827 NULL
828};
829
7865e817
FW
830static bool force, append_file;
831
bca647aa 832const struct option record_options[] = {
361c99a6 833 OPT_CALLBACK('e', "event", &evsel_list, "event",
86847b62
TG
834 "event selector. use 'perf list' to list available events",
835 parse_events),
361c99a6 836 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
c171b552 837 "event filter", parse_filter),
0e9b20b8 838 OPT_INTEGER('p', "pid", &target_pid,
d6d901c2
ZY
839 "record events on existing process id"),
840 OPT_INTEGER('t', "tid", &target_tid,
841 "record events on existing thread id"),
0e9b20b8
IM
842 OPT_INTEGER('r', "realtime", &realtime_prio,
843 "collect data with this RT SCHED_FIFO priority"),
acac03fa
KS
844 OPT_BOOLEAN('D', "no-delay", &nodelay,
845 "collect data without buffering"),
daac07b2
FW
846 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
847 "collect raw sample records from all opened counters"),
0e9b20b8
IM
848 OPT_BOOLEAN('a', "all-cpus", &system_wide,
849 "system-wide collection from all CPUs"),
abaff32a
IM
850 OPT_BOOLEAN('A', "append", &append_file,
851 "append to the output file to do incremental profiling"),
c45c6ea2
SE
852 OPT_STRING('C', "cpu", &cpu_list, "cpu",
853 "list of cpus to monitor"),
97124d5e 854 OPT_BOOLEAN('f', "force", &force,
7865e817 855 "overwrite existing data file (deprecated)"),
3de29cab 856 OPT_U64('c', "count", &user_interval, "event period to sample"),
abaff32a
IM
857 OPT_STRING('o', "output", &output_name, "file",
858 "output file name"),
2e6cdf99
SE
859 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
860 "child tasks do not inherit counters"),
1967936d
ACM
861 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
862 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
3efa1cc9
IM
863 OPT_BOOLEAN('g', "call-graph", &call_graph,
864 "do call-graph (stack chain/backtrace) recording"),
c0555642 865 OPT_INCR('v', "verbose", &verbose,
3da297a6 866 "be more verbose (show counter open errors, etc)"),
b44308f5 867 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
649c48a9
PZ
868 OPT_BOOLEAN('s', "stat", &inherit_stat,
869 "per thread counts"),
4bba828d
AB
870 OPT_BOOLEAN('d', "data", &sample_address,
871 "Sample addresses"),
9c90a61c 872 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
649c48a9
PZ
873 OPT_BOOLEAN('n', "no-samples", &no_samples,
874 "don't sample"),
baa2f6ce 875 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
a1ac1d3c 876 "do not update the buildid cache"),
baa2f6ce
ACM
877 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
878 "do not collect buildids in perf.data"),
0e9b20b8
IM
879 OPT_END()
880};
881
f37a291c 882int cmd_record(int argc, const char **argv, const char *prefix __used)
0e9b20b8 883{
69aad6f1
ACM
884 int err = -ENOMEM;
885 struct perf_evsel *pos;
0e9b20b8 886
361c99a6
ACM
887 evsel_list = perf_evlist__new();
888 if (evsel_list == NULL)
889 return -ENOMEM;
890
bca647aa 891 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 892 PARSE_OPT_STOP_AT_NON_OPTION);
d6d901c2 893 if (!argc && target_pid == -1 && target_tid == -1 &&
c45c6ea2 894 !system_wide && !cpu_list)
bca647aa 895 usage_with_options(record_usage, record_options);
0e9b20b8 896
7865e817
FW
897 if (force && append_file) {
898 fprintf(stderr, "Can't overwrite and append at the same time."
899 " You need to choose between -f and -A");
bca647aa 900 usage_with_options(record_usage, record_options);
7865e817
FW
901 } else if (append_file) {
902 write_mode = WRITE_APPEND;
903 } else {
904 write_mode = WRITE_FORCE;
905 }
906
655000e7 907 symbol__init();
baa2f6ce
ACM
908
909 if (no_buildid_cache || no_buildid)
a1ac1d3c 910 disable_buildid_cache();
655000e7 911
361c99a6
ACM
912 if (evsel_list->nr_entries == 0 &&
913 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
914 pr_err("Not enough memory for event selector list\n");
915 goto out_symbol_exit;
bbd36e5e 916 }
0e9b20b8 917
5c98d466 918 if (target_pid != -1)
d6d901c2 919 target_tid = target_pid;
d6d901c2 920
5c98d466
ACM
921 threads = thread_map__new(target_pid, target_tid);
922 if (threads == NULL) {
923 pr_err("Problems finding threads of monitor\n");
924 usage_with_options(record_usage, record_options);
d6d901c2
ZY
925 }
926
dd7927f4
ACM
927 if (target_tid != -1)
928 cpus = cpu_map__dummy_new();
929 else
930 cpus = cpu_map__new(cpu_list);
931
932 if (cpus == NULL)
933 usage_with_options(record_usage, record_options);
69aad6f1 934
361c99a6 935 list_for_each_entry(pos, &evsel_list->entries, node) {
5c98d466 936 if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
69aad6f1 937 goto out_free_fd;
ad7f4e3f
ACM
938 if (perf_header__push_event(pos->attr.config, event_name(pos)))
939 goto out_free_fd;
d6d901c2 940 }
5c581041
ACM
941
942 if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0)
39d17dac 943 goto out_free_fd;
d6d901c2 944
3de29cab 945 if (user_interval != ULLONG_MAX)
f9212819
FW
946 default_interval = user_interval;
947 if (user_freq != UINT_MAX)
948 freq = user_freq;
949
7e4ff9e3
MG
950 /*
951 * User specified count overrides default frequency.
952 */
953 if (default_interval)
954 freq = 0;
955 else if (freq) {
956 default_interval = freq;
957 } else {
958 fprintf(stderr, "frequency and count are zero, aborting\n");
39d17dac 959 err = -EINVAL;
5c581041 960 goto out_free_fd;
7e4ff9e3
MG
961 }
962
39d17dac
ACM
963 err = __cmd_record(argc, argv);
964
39d17dac 965out_free_fd:
5c98d466
ACM
966 thread_map__delete(threads);
967 threads = NULL;
d65a458b
ACM
968out_symbol_exit:
969 symbol__exit();
39d17dac 970 return err;
0e9b20b8 971}