perf top: Use perf_evsel__open
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
b8f46c5a
XG
8#define _FILE_OFFSET_BITS 64
9
16f762a2 10#include "builtin.h"
bf9e1876
IM
11
12#include "perf.h"
13
6122e4e4 14#include "util/build-id.h"
6eda5838 15#include "util/util.h"
0e9b20b8 16#include "util/parse-options.h"
8ad8db37 17#include "util/parse-events.h"
6eda5838 18
7c6a1c65 19#include "util/header.h"
66e274f3 20#include "util/event.h"
361c99a6 21#include "util/evlist.h"
69aad6f1 22#include "util/evsel.h"
8f28827a 23#include "util/debug.h"
94c744b6 24#include "util/session.h"
8d06367f 25#include "util/symbol.h"
a12b51c4 26#include "util/cpumap.h"
7c6a1c65 27
97124d5e 28#include <unistd.h>
de9ac07b 29#include <sched.h>
a41794cd 30#include <sys/mman.h>
de9ac07b 31
69aad6f1
ACM
32#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
33
7865e817
FW
34enum write_mode_t {
35 WRITE_FORCE,
36 WRITE_APPEND
37};
38
3de29cab
SE
39static u64 user_interval = ULLONG_MAX;
40static u64 default_interval = 0;
640c03ce 41static u64 sample_type;
a21ca2ca 42
60d567e2 43static struct cpu_map *cpus;
de9ac07b 44static unsigned int page_size;
42e59d7d 45static unsigned int mmap_pages = 128;
f9212819 46static unsigned int user_freq = UINT_MAX;
42e59d7d 47static int freq = 1000;
de9ac07b 48static int output;
529870e3 49static int pipe_output = 0;
23ac9cbe 50static const char *output_name = "perf.data";
42e59d7d 51static int group = 0;
1967936d 52static int realtime_prio = 0;
acac03fa 53static bool nodelay = false;
c0555642 54static bool raw_samples = false;
9c90a61c 55static bool sample_id_all_avail = true;
c0555642 56static bool system_wide = false;
42e59d7d 57static pid_t target_pid = -1;
d6d901c2 58static pid_t target_tid = -1;
5c98d466 59static struct thread_map *threads;
42e59d7d 60static pid_t child_pid = -1;
2e6cdf99 61static bool no_inherit = false;
7865e817 62static enum write_mode_t write_mode = WRITE_FORCE;
c0555642
IM
63static bool call_graph = false;
64static bool inherit_stat = false;
65static bool no_samples = false;
66static bool sample_address = false;
9c90a61c 67static bool sample_time = false;
a1ac1d3c 68static bool no_buildid = false;
baa2f6ce 69static bool no_buildid_cache = false;
361c99a6 70static struct perf_evlist *evsel_list;
42e59d7d
IM
71
72static long samples = 0;
42e59d7d 73static u64 bytes_written = 0;
a21ca2ca 74
42e59d7d 75static int nr_cpu = 0;
a21ca2ca 76
42e59d7d 77static int file_new = 1;
6122e4e4 78static off_t post_processing_offset;
7c6a1c65 79
94c744b6 80static struct perf_session *session;
c45c6ea2 81static const char *cpu_list;
f5970550 82
de9ac07b 83struct mmap_data {
a21ca2ca
IM
84 void *base;
85 unsigned int mask;
86 unsigned int prev;
de9ac07b
PZ
87};
88
0e2e63dd 89static struct mmap_data mmap_array[MAX_NR_CPUS];
a21ca2ca 90
9d91a6f7 91static unsigned long mmap_read_head(struct mmap_data *md)
de9ac07b 92{
cdd6c482 93 struct perf_event_mmap_page *pc = md->base;
9d91a6f7 94 long head;
de9ac07b
PZ
95
96 head = pc->data_head;
97 rmb();
98
99 return head;
100}
101
9d91a6f7
PZ
102static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
103{
cdd6c482 104 struct perf_event_mmap_page *pc = md->base;
9d91a6f7
PZ
105
106 /*
107 * ensure all reads are done before we write the tail out.
108 */
109 /* mb(); */
110 pc->data_tail = tail;
111}
112
9215545e
TZ
113static void advance_output(size_t size)
114{
115 bytes_written += size;
116}
117
f5970550
PZ
118static void write_output(void *buf, size_t size)
119{
120 while (size) {
121 int ret = write(output, buf, size);
122
123 if (ret < 0)
124 die("failed to write");
125
126 size -= ret;
127 buf += ret;
128
129 bytes_written += ret;
130 }
131}
132
d8f66248 133static int process_synthesized_event(event_t *event,
640c03ce 134 struct sample_data *sample __used,
d8f66248 135 struct perf_session *self __used)
234fbbf5 136{
6122e4e4 137 write_output(event, event->header.size);
234fbbf5
ACM
138 return 0;
139}
140
de9ac07b
PZ
141static void mmap_read(struct mmap_data *md)
142{
143 unsigned int head = mmap_read_head(md);
144 unsigned int old = md->prev;
145 unsigned char *data = md->base + page_size;
146 unsigned long size;
147 void *buf;
148 int diff;
149
de9ac07b
PZ
150 /*
151 * If we're further behind than half the buffer, there's a chance
2debbc83 152 * the writer will bite our tail and mess up the samples under us.
de9ac07b
PZ
153 *
154 * If we somehow ended up ahead of the head, we got messed up.
155 *
156 * In either case, truncate and restart at head.
157 */
158 diff = head - old;
9d91a6f7 159 if (diff < 0) {
ef365cef 160 fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
de9ac07b
PZ
161 /*
162 * head points to a known good entry, start there.
163 */
164 old = head;
165 }
166
de9ac07b 167 if (old != head)
2debbc83 168 samples++;
de9ac07b
PZ
169
170 size = head - old;
171
172 if ((old & md->mask) + size != (head & md->mask)) {
173 buf = &data[old & md->mask];
174 size = md->mask + 1 - (old & md->mask);
175 old += size;
021e9f47 176
6122e4e4 177 write_output(buf, size);
de9ac07b
PZ
178 }
179
180 buf = &data[old & md->mask];
181 size = head - old;
182 old += size;
021e9f47 183
6122e4e4 184 write_output(buf, size);
de9ac07b
PZ
185
186 md->prev = old;
9d91a6f7 187 mmap_write_tail(md, old);
de9ac07b
PZ
188}
189
190static volatile int done = 0;
f7b7c26e 191static volatile int signr = -1;
de9ac07b 192
16c8a109 193static void sig_handler(int sig)
de9ac07b 194{
16c8a109 195 done = 1;
f7b7c26e
PZ
196 signr = sig;
197}
198
199static void sig_atexit(void)
200{
5ffc8881 201 if (child_pid > 0)
933da83a
CW
202 kill(child_pid, SIGTERM);
203
18483b81 204 if (signr == -1 || signr == SIGUSR1)
f7b7c26e
PZ
205 return;
206
207 signal(signr, SIG_DFL);
208 kill(getpid(), signr);
de9ac07b
PZ
209}
210
f250c030
IM
211static int group_fd;
212
cdd6c482 213static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
7c6a1c65
PZ
214{
215 struct perf_header_attr *h_attr;
216
94c744b6
ACM
217 if (nr < session->header.attrs) {
218 h_attr = session->header.attr[nr];
7c6a1c65
PZ
219 } else {
220 h_attr = perf_header_attr__new(a);
dc79c0fc 221 if (h_attr != NULL)
94c744b6 222 if (perf_header__add_attr(&session->header, h_attr) < 0) {
11deb1f9
ACM
223 perf_header_attr__delete(h_attr);
224 h_attr = NULL;
225 }
7c6a1c65
PZ
226 }
227
228 return h_attr;
229}
230
361c99a6
ACM
231static void create_counter(struct perf_evlist *evlist,
232 struct perf_evsel *evsel, int cpu)
de9ac07b 233{
69aad6f1
ACM
234 char *filter = evsel->filter;
235 struct perf_event_attr *attr = &evsel->attr;
7c6a1c65 236 struct perf_header_attr *h_attr;
69aad6f1 237 int track = !evsel->idx; /* only the first counter needs these */
d6d901c2 238 int thread_index;
c171b552 239 int ret;
7c6a1c65
PZ
240 struct {
241 u64 count;
242 u64 time_enabled;
243 u64 time_running;
244 u64 id;
245 } read_data;
a43d3f08
ACM
246 /*
247 * Check if parse_single_tracepoint_event has already asked for
248 * PERF_SAMPLE_TIME.
249 *
250 * XXX this is kludgy but short term fix for problems introduced by
251 * eac23d1c that broke 'perf script' by having different sample_types
252 * when using multiple tracepoint events when we use a perf binary
253 * that tries to use sample_id_all on an older kernel.
254 *
255 * We need to move counter creation to perf_session, support
256 * different sample_types, etc.
257 */
258 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
7c6a1c65
PZ
259
260 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
261 PERF_FORMAT_TOTAL_TIME_RUNNING |
262 PERF_FORMAT_ID;
16c8a109 263
3a9f131f 264 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
3efa1cc9 265
361c99a6 266 if (evlist->nr_entries > 1)
8907fd60
EM
267 attr->sample_type |= PERF_SAMPLE_ID;
268
f9212819
FW
269 /*
270 * We default some events to a 1 default interval. But keep
271 * it a weak assumption overridable by the user.
272 */
273 if (!attr->sample_period || (user_freq != UINT_MAX &&
3de29cab 274 user_interval != ULLONG_MAX)) {
f9212819
FW
275 if (freq) {
276 attr->sample_type |= PERF_SAMPLE_PERIOD;
277 attr->freq = 1;
278 attr->sample_freq = freq;
279 } else {
280 attr->sample_period = default_interval;
281 }
1dba15e7 282 }
3efa1cc9 283
649c48a9
PZ
284 if (no_samples)
285 attr->sample_freq = 0;
286
287 if (inherit_stat)
288 attr->inherit_stat = 1;
289
3af9e859 290 if (sample_address) {
4bba828d 291 attr->sample_type |= PERF_SAMPLE_ADDR;
3af9e859
EM
292 attr->mmap_data = track;
293 }
4bba828d 294
3efa1cc9
IM
295 if (call_graph)
296 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
297
f60f3593
AS
298 if (system_wide)
299 attr->sample_type |= PERF_SAMPLE_CPU;
300
a43d3f08
ACM
301 if (sample_id_all_avail &&
302 (sample_time || system_wide || !no_inherit || cpu_list))
9c90a61c
ACM
303 attr->sample_type |= PERF_SAMPLE_TIME;
304
cd6feeea 305 if (raw_samples) {
6ddf259d 306 attr->sample_type |= PERF_SAMPLE_TIME;
daac07b2 307 attr->sample_type |= PERF_SAMPLE_RAW;
cd6feeea
IM
308 attr->sample_type |= PERF_SAMPLE_CPU;
309 }
f413cdb8 310
acac03fa
KS
311 if (nodelay) {
312 attr->watermark = 0;
313 attr->wakeup_events = 1;
314 }
315
a21ca2ca
IM
316 attr->mmap = track;
317 attr->comm = track;
2e6cdf99
SE
318 attr->inherit = !no_inherit;
319 if (target_pid == -1 && target_tid == -1 && !system_wide) {
46be604b 320 attr->disabled = 1;
bedbfdea 321 attr->enable_on_exec = 1;
46be604b 322 }
9c90a61c
ACM
323retry_sample_id:
324 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
bedbfdea 325
5c98d466 326 for (thread_index = 0; thread_index < threads->nr; thread_index++) {
3da297a6 327try_again:
5c98d466 328 FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
d6d901c2 329
69aad6f1 330 if (FD(evsel, nr_cpu, thread_index) < 0) {
d6d901c2
ZY
331 int err = errno;
332
333 if (err == EPERM || err == EACCES)
334 die("Permission error - are you root?\n"
335 "\t Consider tweaking"
336 " /proc/sys/kernel/perf_event_paranoid.\n");
c45c6ea2 337 else if (err == ENODEV && cpu_list) {
d6d901c2
ZY
338 die("No such device - did you specify"
339 " an out-of-range profile CPU?\n");
9c90a61c
ACM
340 } else if (err == EINVAL && sample_id_all_avail) {
341 /*
342 * Old kernel, no attr->sample_id_type_all field
343 */
344 sample_id_all_avail = false;
a43d3f08 345 if (!sample_time && !raw_samples && !time_needed)
eac23d1c
IM
346 attr->sample_type &= ~PERF_SAMPLE_TIME;
347
9c90a61c 348 goto retry_sample_id;
d6d901c2 349 }
3da297a6 350
d6d901c2
ZY
351 /*
352 * If it's cycles then fall back to hrtimer
353 * based cpu-clock-tick sw counter, which
354 * is always available even if no PMU support:
355 */
356 if (attr->type == PERF_TYPE_HARDWARE
357 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
358
359 if (verbose)
360 warning(" ... trying to fall back to cpu-clock-ticks\n");
361 attr->type = PERF_TYPE_SOFTWARE;
362 attr->config = PERF_COUNT_SW_CPU_CLOCK;
363 goto try_again;
364 }
365 printf("\n");
d9cf837e 366 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
69aad6f1 367 FD(evsel, nr_cpu, thread_index), strerror(err));
bfd45118
SK
368
369#if defined(__i386__) || defined(__x86_64__)
d6d901c2
ZY
370 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
371 die("No hardware sampling interrupt available."
372 " No APIC? If so then you can boot the kernel"
373 " with the \"lapic\" boot parameter to"
374 " force-enable it.\n");
bfd45118
SK
375#endif
376
d6d901c2
ZY
377 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
378 exit(-1);
379 }
3da297a6 380
69aad6f1 381 h_attr = get_header_attr(attr, evsel->idx);
d6d901c2
ZY
382 if (h_attr == NULL)
383 die("nomem\n");
7c6a1c65 384
d6d901c2
ZY
385 if (!file_new) {
386 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
387 fprintf(stderr, "incompatible append\n");
388 exit(-1);
389 }
7c6a1c65 390 }
7c6a1c65 391
69aad6f1 392 if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
0ab7368f 393 perror("Unable to read perf file descriptor");
d6d901c2
ZY
394 exit(-1);
395 }
7c6a1c65 396
d6d901c2
ZY
397 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
398 pr_warning("Not enough memory to add id\n");
399 exit(-1);
400 }
7c6a1c65 401
69aad6f1
ACM
402 assert(FD(evsel, nr_cpu, thread_index) >= 0);
403 fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
16c8a109 404
d6d901c2
ZY
405 /*
406 * First counter acts as the group leader:
407 */
408 if (group && group_fd == -1)
69aad6f1
ACM
409 group_fd = FD(evsel, nr_cpu, thread_index);
410
411 if (evsel->idx || thread_index) {
412 struct perf_evsel *first;
361c99a6 413 first = list_entry(evlist->entries.next, struct perf_evsel, node);
69aad6f1
ACM
414 ret = ioctl(FD(evsel, nr_cpu, thread_index),
415 PERF_EVENT_IOC_SET_OUTPUT,
416 FD(first, nr_cpu, 0));
0e2e63dd
PZ
417 if (ret) {
418 error("failed to set output: %d (%s)\n", errno,
419 strerror(errno));
420 exit(-1);
421 }
422 } else {
0e2e63dd
PZ
423 mmap_array[nr_cpu].prev = 0;
424 mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
425 mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
69aad6f1 426 PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
0e2e63dd
PZ
427 if (mmap_array[nr_cpu].base == MAP_FAILED) {
428 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
429 exit(-1);
430 }
431
5c581041
ACM
432 evlist->pollfd[evlist->nr_fds].fd = FD(evsel, nr_cpu, thread_index);
433 evlist->pollfd[evlist->nr_fds].events = POLLIN;
434 evlist->nr_fds++;
ea57c4f5 435 }
d1302522 436
d6d901c2 437 if (filter != NULL) {
69aad6f1
ACM
438 ret = ioctl(FD(evsel, nr_cpu, thread_index),
439 PERF_EVENT_IOC_SET_FILTER, filter);
d6d901c2
ZY
440 if (ret) {
441 error("failed to set filter with %d (%s)\n", errno,
442 strerror(errno));
443 exit(-1);
444 }
c171b552
LZ
445 }
446 }
a43d3f08
ACM
447
448 if (!sample_type)
449 sample_type = attr->sample_type;
f250c030 450}
f2521b6e 451
361c99a6 452static void open_counters(struct perf_evlist *evlist, int cpu)
f250c030 453{
69aad6f1 454 struct perf_evsel *pos;
16c8a109 455
f250c030 456 group_fd = -1;
69aad6f1 457
361c99a6
ACM
458 list_for_each_entry(pos, &evlist->entries, node)
459 create_counter(evlist, pos, cpu);
f250c030 460
16c8a109
PZ
461 nr_cpu++;
462}
463
6122e4e4
ACM
464static int process_buildids(void)
465{
466 u64 size = lseek(output, 0, SEEK_CUR);
467
9f591fd7
ACM
468 if (size == 0)
469 return 0;
470
6122e4e4
ACM
471 session->fd = output;
472 return __perf_session__process_events(session, post_processing_offset,
473 size - post_processing_offset,
474 size, &build_id__mark_dso_hit_ops);
475}
476
f5970550
PZ
477static void atexit_header(void)
478{
c7929e47
TZ
479 if (!pipe_output) {
480 session->header.data_size += bytes_written;
f5970550 481
baa2f6ce
ACM
482 if (!no_buildid)
483 process_buildids();
361c99a6 484 perf_header__write(&session->header, evsel_list, output, true);
39d17dac 485 perf_session__delete(session);
361c99a6 486 perf_evlist__delete(evsel_list);
d65a458b 487 symbol__exit();
c7929e47 488 }
f5970550
PZ
489}
490
23346f21 491static void event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
492{
493 int err;
23346f21 494 struct perf_session *psession = data;
a1645ce1 495
23346f21 496 if (machine__is_host(machine))
a1645ce1
ZY
497 return;
498
499 /*
500 *As for guest kernel when processing subcommand record&report,
501 *we arrange module mmap prior to guest kernel mmap and trigger
502 *a preload dso because default guest module symbols are loaded
503 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
504 *method is used to avoid symbol missing when the first addr is
505 *in module instead of in guest kernel.
506 */
507 err = event__synthesize_modules(process_synthesized_event,
23346f21 508 psession, machine);
a1645ce1
ZY
509 if (err < 0)
510 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 511 " relocation symbol.\n", machine->pid);
a1645ce1 512
a1645ce1
ZY
513 /*
514 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
515 * have no _text sometimes.
516 */
517 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 518 psession, machine, "_text");
a1645ce1
ZY
519 if (err < 0)
520 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 521 psession, machine, "_stext");
a1645ce1
ZY
522 if (err < 0)
523 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 524 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
525}
526
98402807
FW
527static struct perf_event_header finished_round_event = {
528 .size = sizeof(struct perf_event_header),
529 .type = PERF_RECORD_FINISHED_ROUND,
530};
531
532static void mmap_read_all(void)
533{
0e2e63dd 534 int i;
98402807
FW
535
536 for (i = 0; i < nr_cpu; i++) {
0e2e63dd
PZ
537 if (mmap_array[i].base)
538 mmap_read(&mmap_array[i]);
98402807
FW
539 }
540
541 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
542 write_output(&finished_round_event, sizeof(finished_round_event));
543}
544
d4db3f16 545static int __cmd_record(int argc, const char **argv)
16c8a109 546{
69aad6f1 547 int i;
abaff32a 548 struct stat st;
abaff32a 549 int flags;
4dc0a04b 550 int err;
8b412664 551 unsigned long waking = 0;
856e9660 552 int child_ready_pipe[2], go_pipe[2];
46be604b 553 const bool forks = argc > 0;
856e9660 554 char buf;
23346f21 555 struct machine *machine;
de9ac07b
PZ
556
557 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b 558
f5970550
PZ
559 atexit(sig_atexit);
560 signal(SIGCHLD, sig_handler);
561 signal(SIGINT, sig_handler);
18483b81 562 signal(SIGUSR1, sig_handler);
f5970550 563
d4db3f16 564 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
856e9660
PZ
565 perror("failed to create pipes");
566 exit(-1);
567 }
568
529870e3
TZ
569 if (!strcmp(output_name, "-"))
570 pipe_output = 1;
571 else if (!stat(output_name, &st) && st.st_size) {
7865e817 572 if (write_mode == WRITE_FORCE) {
b38d3464
ACM
573 char oldname[PATH_MAX];
574 snprintf(oldname, sizeof(oldname), "%s.old",
575 output_name);
576 unlink(oldname);
577 rename(output_name, oldname);
266e0e21 578 }
7865e817
FW
579 } else if (write_mode == WRITE_APPEND) {
580 write_mode = WRITE_FORCE;
97124d5e
PZ
581 }
582
f887f301 583 flags = O_CREAT|O_RDWR;
7865e817 584 if (write_mode == WRITE_APPEND)
f5970550 585 file_new = 0;
abaff32a
IM
586 else
587 flags |= O_TRUNC;
588
529870e3
TZ
589 if (pipe_output)
590 output = STDOUT_FILENO;
591 else
592 output = open(output_name, flags, S_IRUSR | S_IWUSR);
de9ac07b
PZ
593 if (output < 0) {
594 perror("failed to create output file");
595 exit(-1);
596 }
597
7865e817 598 session = perf_session__new(output_name, O_WRONLY,
21ef97f0 599 write_mode == WRITE_FORCE, false, NULL);
94c744b6 600 if (session == NULL) {
a9a70bbc
ACM
601 pr_err("Not enough memory for reading perf file header\n");
602 return -1;
603 }
604
baa2f6ce
ACM
605 if (!no_buildid)
606 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
607
4dc0a04b 608 if (!file_new) {
8dc58101 609 err = perf_header__read(session, output);
4dc0a04b 610 if (err < 0)
39d17dac 611 goto out_delete_session;
4dc0a04b
ACM
612 }
613
361c99a6 614 if (have_tracepoints(&evsel_list->entries))
94c744b6 615 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
03456a15 616
39d17dac
ACM
617 /*
618 * perf_session__delete(session) will be called at atexit_header()
619 */
f5970550
PZ
620 atexit(atexit_header);
621
d4db3f16 622 if (forks) {
46be604b 623 child_pid = fork();
2fb750e8 624 if (child_pid < 0) {
856e9660
PZ
625 perror("failed to fork");
626 exit(-1);
627 }
7c6a1c65 628
46be604b 629 if (!child_pid) {
529870e3
TZ
630 if (pipe_output)
631 dup2(2, 1);
856e9660
PZ
632 close(child_ready_pipe[0]);
633 close(go_pipe[1]);
634 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
635
636 /*
637 * Do a dummy execvp to get the PLT entry resolved,
638 * so we avoid the resolver overhead on the real
639 * execvp call.
640 */
641 execvp("", (char **)argv);
642
643 /*
644 * Tell the parent we're ready to go
645 */
646 close(child_ready_pipe[1]);
647
648 /*
649 * Wait until the parent tells us to go.
650 */
651 if (read(go_pipe[0], &buf, 1) == -1)
652 perror("unable to read pipe");
653
654 execvp(argv[0], (char **)argv);
655
656 perror(argv[0]);
18483b81 657 kill(getppid(), SIGUSR1);
856e9660 658 exit(-1);
0a5ac846 659 }
856e9660 660
d6d901c2 661 if (!system_wide && target_tid == -1 && target_pid == -1)
5c98d466 662 threads->map[0] = child_pid;
d6d901c2 663
856e9660
PZ
664 close(child_ready_pipe[1]);
665 close(go_pipe[0]);
666 /*
667 * wait for child to settle
668 */
669 if (read(child_ready_pipe[0], &buf, 1) == -1) {
670 perror("unable to read pipe");
671 exit(-1);
672 }
673 close(child_ready_pipe[0]);
674 }
675
c45c6ea2 676 if (!system_wide && no_inherit && !cpu_list) {
361c99a6 677 open_counters(evsel_list, -1);
856e9660 678 } else {
60d567e2 679 for (i = 0; i < cpus->nr; i++)
361c99a6 680 open_counters(evsel_list, cpus->map[i]);
0a5ac846 681 }
de9ac07b 682
640c03ce
ACM
683 perf_session__set_sample_type(session, sample_type);
684
529870e3
TZ
685 if (pipe_output) {
686 err = perf_header__write_pipe(output);
687 if (err < 0)
688 return err;
689 } else if (file_new) {
361c99a6
ACM
690 err = perf_header__write(&session->header, evsel_list,
691 output, false);
d5eed904
ACM
692 if (err < 0)
693 return err;
56b03f3c
ACM
694 }
695
6122e4e4
ACM
696 post_processing_offset = lseek(output, 0, SEEK_CUR);
697
9c90a61c
ACM
698 perf_session__set_sample_id_all(session, sample_id_all_avail);
699
2c46dbb5
TZ
700 if (pipe_output) {
701 err = event__synthesize_attrs(&session->header,
702 process_synthesized_event,
703 session);
704 if (err < 0) {
705 pr_err("Couldn't synthesize attrs.\n");
706 return err;
707 }
cd19a035
TZ
708
709 err = event__synthesize_event_types(process_synthesized_event,
710 session);
711 if (err < 0) {
712 pr_err("Couldn't synthesize event_types.\n");
713 return err;
714 }
9215545e 715
361c99a6 716 if (have_tracepoints(&evsel_list->entries)) {
63e0c771
TZ
717 /*
718 * FIXME err <= 0 here actually means that
719 * there were no tracepoints so its not really
720 * an error, just that we don't need to
721 * synthesize anything. We really have to
722 * return this more properly and also
723 * propagate errors that now are calling die()
724 */
361c99a6 725 err = event__synthesize_tracing_data(output, evsel_list,
63e0c771
TZ
726 process_synthesized_event,
727 session);
728 if (err <= 0) {
729 pr_err("Couldn't record tracing data.\n");
730 return err;
731 }
2c9faa06 732 advance_output(err);
63e0c771 733 }
2c46dbb5
TZ
734 }
735
23346f21
ACM
736 machine = perf_session__find_host_machine(session);
737 if (!machine) {
a1645ce1
ZY
738 pr_err("Couldn't find native kernel information.\n");
739 return -1;
740 }
741
56b03f3c 742 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 743 session, machine, "_text");
70162138
ACM
744 if (err < 0)
745 err = event__synthesize_kernel_mmap(process_synthesized_event,
23346f21 746 session, machine, "_stext");
c1a3a4b9
ACM
747 if (err < 0)
748 pr_err("Couldn't record kernel reference relocation symbol\n"
749 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
750 "Check /proc/kallsyms permission or run as root.\n");
b7cece76 751
a1645ce1 752 err = event__synthesize_modules(process_synthesized_event,
23346f21 753 session, machine);
c1a3a4b9
ACM
754 if (err < 0)
755 pr_err("Couldn't record kernel module information.\n"
756 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
757 "Check /proc/modules permission or run as root.\n");
758
a1645ce1 759 if (perf_guest)
23346f21 760 perf_session__process_machines(session, event__synthesize_guest_os);
7c6a1c65 761
cf103a14 762 if (!system_wide)
d6d901c2 763 event__synthesize_thread(target_tid, process_synthesized_event,
d8f66248 764 session);
234fbbf5 765 else
d8f66248 766 event__synthesize_threads(process_synthesized_event, session);
7c6a1c65 767
de9ac07b
PZ
768 if (realtime_prio) {
769 struct sched_param param;
770
771 param.sched_priority = realtime_prio;
772 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 773 pr_err("Could not set realtime priority.\n");
de9ac07b
PZ
774 exit(-1);
775 }
776 }
777
856e9660
PZ
778 /*
779 * Let the child rip
780 */
d4db3f16
ACM
781 if (forks)
782 close(go_pipe[1]);
856e9660 783
649c48a9 784 for (;;) {
2debbc83 785 int hits = samples;
d6d901c2 786 int thread;
de9ac07b 787
98402807 788 mmap_read_all();
de9ac07b 789
649c48a9
PZ
790 if (hits == samples) {
791 if (done)
792 break;
5c581041 793 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
8b412664
PZ
794 waking++;
795 }
796
797 if (done) {
798 for (i = 0; i < nr_cpu; i++) {
69aad6f1
ACM
799 struct perf_evsel *pos;
800
361c99a6 801 list_for_each_entry(pos, &evsel_list->entries, node) {
d6d901c2 802 for (thread = 0;
5c98d466 803 thread < threads->nr;
d6d901c2 804 thread++)
69aad6f1 805 ioctl(FD(pos, i, thread),
d6d901c2
ZY
806 PERF_EVENT_IOC_DISABLE);
807 }
8b412664 808 }
649c48a9 809 }
de9ac07b
PZ
810 }
811
18483b81 812 if (quiet || signr == SIGUSR1)
b44308f5
ACM
813 return 0;
814
8b412664
PZ
815 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
816
021e9f47
IM
817 /*
818 * Approximate RIP event size: 24 bytes.
819 */
820 fprintf(stderr,
9486aa38 821 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
021e9f47
IM
822 (double)bytes_written / 1024.0 / 1024.0,
823 output_name,
824 bytes_written / 24);
addc2785 825
de9ac07b 826 return 0;
39d17dac
ACM
827
828out_delete_session:
829 perf_session__delete(session);
830 return err;
de9ac07b 831}
0e9b20b8 832
0e9b20b8 833static const char * const record_usage[] = {
9e096753
MG
834 "perf record [<options>] [<command>]",
835 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
836 NULL
837};
838
7865e817
FW
839static bool force, append_file;
840
bca647aa 841const struct option record_options[] = {
361c99a6 842 OPT_CALLBACK('e', "event", &evsel_list, "event",
86847b62
TG
843 "event selector. use 'perf list' to list available events",
844 parse_events),
361c99a6 845 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
c171b552 846 "event filter", parse_filter),
0e9b20b8 847 OPT_INTEGER('p', "pid", &target_pid,
d6d901c2
ZY
848 "record events on existing process id"),
849 OPT_INTEGER('t', "tid", &target_tid,
850 "record events on existing thread id"),
0e9b20b8
IM
851 OPT_INTEGER('r', "realtime", &realtime_prio,
852 "collect data with this RT SCHED_FIFO priority"),
acac03fa
KS
853 OPT_BOOLEAN('D', "no-delay", &nodelay,
854 "collect data without buffering"),
daac07b2
FW
855 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
856 "collect raw sample records from all opened counters"),
0e9b20b8
IM
857 OPT_BOOLEAN('a', "all-cpus", &system_wide,
858 "system-wide collection from all CPUs"),
abaff32a
IM
859 OPT_BOOLEAN('A', "append", &append_file,
860 "append to the output file to do incremental profiling"),
c45c6ea2
SE
861 OPT_STRING('C', "cpu", &cpu_list, "cpu",
862 "list of cpus to monitor"),
97124d5e 863 OPT_BOOLEAN('f', "force", &force,
7865e817 864 "overwrite existing data file (deprecated)"),
3de29cab 865 OPT_U64('c', "count", &user_interval, "event period to sample"),
abaff32a
IM
866 OPT_STRING('o', "output", &output_name, "file",
867 "output file name"),
2e6cdf99
SE
868 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
869 "child tasks do not inherit counters"),
1967936d
ACM
870 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
871 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
3efa1cc9
IM
872 OPT_BOOLEAN('g', "call-graph", &call_graph,
873 "do call-graph (stack chain/backtrace) recording"),
c0555642 874 OPT_INCR('v', "verbose", &verbose,
3da297a6 875 "be more verbose (show counter open errors, etc)"),
b44308f5 876 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
649c48a9
PZ
877 OPT_BOOLEAN('s', "stat", &inherit_stat,
878 "per thread counts"),
4bba828d
AB
879 OPT_BOOLEAN('d', "data", &sample_address,
880 "Sample addresses"),
9c90a61c 881 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
649c48a9
PZ
882 OPT_BOOLEAN('n', "no-samples", &no_samples,
883 "don't sample"),
baa2f6ce 884 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
a1ac1d3c 885 "do not update the buildid cache"),
baa2f6ce
ACM
886 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
887 "do not collect buildids in perf.data"),
0e9b20b8
IM
888 OPT_END()
889};
890
f37a291c 891int cmd_record(int argc, const char **argv, const char *prefix __used)
0e9b20b8 892{
69aad6f1
ACM
893 int err = -ENOMEM;
894 struct perf_evsel *pos;
0e9b20b8 895
361c99a6
ACM
896 evsel_list = perf_evlist__new();
897 if (evsel_list == NULL)
898 return -ENOMEM;
899
bca647aa 900 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 901 PARSE_OPT_STOP_AT_NON_OPTION);
d6d901c2 902 if (!argc && target_pid == -1 && target_tid == -1 &&
c45c6ea2 903 !system_wide && !cpu_list)
bca647aa 904 usage_with_options(record_usage, record_options);
0e9b20b8 905
7865e817
FW
906 if (force && append_file) {
907 fprintf(stderr, "Can't overwrite and append at the same time."
908 " You need to choose between -f and -A");
bca647aa 909 usage_with_options(record_usage, record_options);
7865e817
FW
910 } else if (append_file) {
911 write_mode = WRITE_APPEND;
912 } else {
913 write_mode = WRITE_FORCE;
914 }
915
655000e7 916 symbol__init();
baa2f6ce
ACM
917
918 if (no_buildid_cache || no_buildid)
a1ac1d3c 919 disable_buildid_cache();
655000e7 920
361c99a6
ACM
921 if (evsel_list->nr_entries == 0 &&
922 perf_evlist__add_default(evsel_list) < 0) {
69aad6f1
ACM
923 pr_err("Not enough memory for event selector list\n");
924 goto out_symbol_exit;
bbd36e5e 925 }
0e9b20b8 926
5c98d466 927 if (target_pid != -1)
d6d901c2 928 target_tid = target_pid;
d6d901c2 929
5c98d466
ACM
930 threads = thread_map__new(target_pid, target_tid);
931 if (threads == NULL) {
932 pr_err("Problems finding threads of monitor\n");
933 usage_with_options(record_usage, record_options);
d6d901c2
ZY
934 }
935
60d567e2
ACM
936 cpus = cpu_map__new(cpu_list);
937 if (cpus == NULL) {
938 perror("failed to parse CPUs map");
69aad6f1
ACM
939 return -1;
940 }
941
361c99a6 942 list_for_each_entry(pos, &evsel_list->entries, node) {
5c98d466 943 if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
69aad6f1 944 goto out_free_fd;
ad7f4e3f
ACM
945 if (perf_header__push_event(pos->attr.config, event_name(pos)))
946 goto out_free_fd;
d6d901c2 947 }
5c581041
ACM
948
949 if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0)
39d17dac 950 goto out_free_fd;
d6d901c2 951
3de29cab 952 if (user_interval != ULLONG_MAX)
f9212819
FW
953 default_interval = user_interval;
954 if (user_freq != UINT_MAX)
955 freq = user_freq;
956
7e4ff9e3
MG
957 /*
958 * User specified count overrides default frequency.
959 */
960 if (default_interval)
961 freq = 0;
962 else if (freq) {
963 default_interval = freq;
964 } else {
965 fprintf(stderr, "frequency and count are zero, aborting\n");
39d17dac 966 err = -EINVAL;
5c581041 967 goto out_free_fd;
7e4ff9e3
MG
968 }
969
39d17dac
ACM
970 err = __cmd_record(argc, argv);
971
39d17dac 972out_free_fd:
5c98d466
ACM
973 thread_map__delete(threads);
974 threads = NULL;
d65a458b
ACM
975out_symbol_exit:
976 symbol__exit();
39d17dac 977 return err;
0e9b20b8 978}