perf_counter tools: Shorten names for events
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
16f762a2 8#include "builtin.h"
bf9e1876
IM
9
10#include "perf.h"
11
6eda5838 12#include "util/util.h"
0e9b20b8 13#include "util/parse-options.h"
8ad8db37 14#include "util/parse-events.h"
a0055ae2 15#include "util/string.h"
6eda5838 16
97124d5e 17#include <unistd.h>
de9ac07b 18#include <sched.h>
de9ac07b 19
0e9b20b8
IM
20#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
21#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
1a853e36 22
de9ac07b 23static int fd[MAX_NR_CPUS][MAX_COUNTERS];
a21ca2ca
IM
24
25static long default_interval = 100000;
26
3cf165fc 27static int nr_cpus = 0;
de9ac07b 28static unsigned int page_size;
3cf165fc 29static unsigned int mmap_pages = 128;
cf1f4574 30static int freq = 0;
de9ac07b 31static int output;
23ac9cbe 32static const char *output_name = "perf.data";
de9ac07b 33static int group = 0;
16c8a109
PZ
34static unsigned int realtime_prio = 0;
35static int system_wide = 0;
1a853e36 36static pid_t target_pid = -1;
16c8a109 37static int inherit = 1;
97124d5e 38static int force = 0;
abaff32a 39static int append_file = 0;
3efa1cc9 40static int call_graph = 0;
3da297a6 41static int verbose = 0;
de9ac07b 42
a21ca2ca
IM
43static long samples;
44static struct timeval last_read;
45static struct timeval this_read;
46
9cffa8d5 47static u64 bytes_written;
a21ca2ca
IM
48
49static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
50
51static int nr_poll;
52static int nr_cpu;
53
f5970550
PZ
54static int file_new = 1;
55static struct perf_file_header file_header;
56
a21ca2ca
IM
57struct mmap_event {
58 struct perf_event_header header;
9cffa8d5
PM
59 u32 pid;
60 u32 tid;
61 u64 start;
62 u64 len;
63 u64 pgoff;
a21ca2ca
IM
64 char filename[PATH_MAX];
65};
66
67struct comm_event {
68 struct perf_event_header header;
9cffa8d5
PM
69 u32 pid;
70 u32 tid;
a21ca2ca 71 char comm[16];
de9ac07b
PZ
72};
73
a21ca2ca 74
de9ac07b 75struct mmap_data {
a21ca2ca
IM
76 int counter;
77 void *base;
78 unsigned int mask;
79 unsigned int prev;
de9ac07b
PZ
80};
81
a21ca2ca
IM
82static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
83
9d91a6f7 84static unsigned long mmap_read_head(struct mmap_data *md)
de9ac07b
PZ
85{
86 struct perf_counter_mmap_page *pc = md->base;
9d91a6f7 87 long head;
de9ac07b
PZ
88
89 head = pc->data_head;
90 rmb();
91
92 return head;
93}
94
9d91a6f7
PZ
95static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
96{
97 struct perf_counter_mmap_page *pc = md->base;
98
99 /*
100 * ensure all reads are done before we write the tail out.
101 */
102 /* mb(); */
103 pc->data_tail = tail;
104}
105
f5970550
PZ
106static void write_output(void *buf, size_t size)
107{
108 while (size) {
109 int ret = write(output, buf, size);
110
111 if (ret < 0)
112 die("failed to write");
113
114 size -= ret;
115 buf += ret;
116
117 bytes_written += ret;
118 }
119}
120
de9ac07b
PZ
121static void mmap_read(struct mmap_data *md)
122{
123 unsigned int head = mmap_read_head(md);
124 unsigned int old = md->prev;
125 unsigned char *data = md->base + page_size;
126 unsigned long size;
127 void *buf;
128 int diff;
129
130 gettimeofday(&this_read, NULL);
131
132 /*
133 * If we're further behind than half the buffer, there's a chance
2debbc83 134 * the writer will bite our tail and mess up the samples under us.
de9ac07b
PZ
135 *
136 * If we somehow ended up ahead of the head, we got messed up.
137 *
138 * In either case, truncate and restart at head.
139 */
140 diff = head - old;
9d91a6f7 141 if (diff < 0) {
de9ac07b
PZ
142 struct timeval iv;
143 unsigned long msecs;
144
145 timersub(&this_read, &last_read, &iv);
146 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
147
148 fprintf(stderr, "WARNING: failed to keep up with mmap data."
149 " Last read %lu msecs ago.\n", msecs);
150
151 /*
152 * head points to a known good entry, start there.
153 */
154 old = head;
155 }
156
157 last_read = this_read;
158
159 if (old != head)
2debbc83 160 samples++;
de9ac07b
PZ
161
162 size = head - old;
163
164 if ((old & md->mask) + size != (head & md->mask)) {
165 buf = &data[old & md->mask];
166 size = md->mask + 1 - (old & md->mask);
167 old += size;
021e9f47 168
f5970550 169 write_output(buf, size);
de9ac07b
PZ
170 }
171
172 buf = &data[old & md->mask];
173 size = head - old;
174 old += size;
021e9f47 175
f5970550 176 write_output(buf, size);
de9ac07b
PZ
177
178 md->prev = old;
9d91a6f7 179 mmap_write_tail(md, old);
de9ac07b
PZ
180}
181
182static volatile int done = 0;
f7b7c26e 183static volatile int signr = -1;
de9ac07b 184
16c8a109 185static void sig_handler(int sig)
de9ac07b 186{
16c8a109 187 done = 1;
f7b7c26e
PZ
188 signr = sig;
189}
190
191static void sig_atexit(void)
192{
193 if (signr == -1)
194 return;
195
196 signal(signr, SIG_DFL);
197 kill(getpid(), signr);
de9ac07b
PZ
198}
199
f70e87d7 200static void pid_synthesize_comm_event(pid_t pid, int full)
1a853e36 201{
16f762a2 202 struct comm_event comm_ev;
1a853e36
ACM
203 char filename[PATH_MAX];
204 char bf[BUFSIZ];
f5970550 205 int fd;
1a853e36 206 size_t size;
a0055ae2 207 char *field, *sep;
f70e87d7
PZ
208 DIR *tasks;
209 struct dirent dirent, *next;
1a853e36
ACM
210
211 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
212
213 fd = open(filename, O_RDONLY);
214 if (fd < 0) {
613d8602
IM
215 /*
216 * We raced with a task exiting - just return:
217 */
218 if (verbose)
219 fprintf(stderr, "couldn't open %s\n", filename);
220 return;
1a853e36
ACM
221 }
222 if (read(fd, bf, sizeof(bf)) < 0) {
223 fprintf(stderr, "couldn't read %s\n", filename);
224 exit(EXIT_FAILURE);
225 }
226 close(fd);
227
a0055ae2 228 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
1a853e36 229 memset(&comm_ev, 0, sizeof(comm_ev));
a0055ae2
ACM
230 field = strchr(bf, '(');
231 if (field == NULL)
232 goto out_failure;
233 sep = strchr(++field, ')');
234 if (sep == NULL)
235 goto out_failure;
236 size = sep - field;
237 memcpy(comm_ev.comm, field, size++);
f70e87d7
PZ
238
239 comm_ev.pid = pid;
1a853e36 240 comm_ev.header.type = PERF_EVENT_COMM;
9cffa8d5 241 size = ALIGN(size, sizeof(u64));
1a853e36 242 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
16f762a2 243
f70e87d7
PZ
244 if (!full) {
245 comm_ev.tid = pid;
246
f5970550 247 write_output(&comm_ev, comm_ev.header.size);
f70e87d7
PZ
248 return;
249 }
250
251 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
252
253 tasks = opendir(filename);
254 while (!readdir_r(tasks, &dirent, &next) && next) {
255 char *end;
256 pid = strtol(dirent.d_name, &end, 10);
257 if (*end)
258 continue;
259
260 comm_ev.tid = pid;
261
f5970550 262 write_output(&comm_ev, comm_ev.header.size);
1a853e36 263 }
f70e87d7
PZ
264 closedir(tasks);
265 return;
266
a0055ae2
ACM
267out_failure:
268 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
269 filename);
270 exit(EXIT_FAILURE);
1a853e36
ACM
271}
272
2debbc83 273static void pid_synthesize_mmap_samples(pid_t pid)
1a853e36
ACM
274{
275 char filename[PATH_MAX];
276 FILE *fp;
277
278 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
279
280 fp = fopen(filename, "r");
281 if (fp == NULL) {
613d8602
IM
282 /*
283 * We raced with a task exiting - just return:
284 */
285 if (verbose)
286 fprintf(stderr, "couldn't open %s\n", filename);
287 return;
1a853e36
ACM
288 }
289 while (1) {
a0055ae2 290 char bf[BUFSIZ], *pbf = bf;
1a853e36
ACM
291 struct mmap_event mmap_ev = {
292 .header.type = PERF_EVENT_MMAP,
293 };
a0055ae2 294 int n;
1a853e36
ACM
295 size_t size;
296 if (fgets(bf, sizeof(bf), fp) == NULL)
297 break;
298
299 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
a0055ae2
ACM
300 n = hex2u64(pbf, &mmap_ev.start);
301 if (n < 0)
302 continue;
303 pbf += n + 1;
304 n = hex2u64(pbf, &mmap_ev.len);
305 if (n < 0)
306 continue;
307 pbf += n + 3;
308 if (*pbf == 'x') { /* vm_exec */
76c64c5e 309 char *execname = strchr(bf, '/');
1a853e36 310
76c64c5e 311 if (execname == NULL)
1a853e36
ACM
312 continue;
313
1a853e36
ACM
314 size = strlen(execname);
315 execname[size - 1] = '\0'; /* Remove \n */
316 memcpy(mmap_ev.filename, execname, size);
9cffa8d5 317 size = ALIGN(size, sizeof(u64));
1a853e36
ACM
318 mmap_ev.len -= mmap_ev.start;
319 mmap_ev.header.size = (sizeof(mmap_ev) -
320 (sizeof(mmap_ev.filename) - size));
f70e87d7 321 mmap_ev.pid = pid;
1a853e36
ACM
322 mmap_ev.tid = pid;
323
f5970550 324 write_output(&mmap_ev, mmap_ev.header.size);
1a853e36
ACM
325 }
326 }
327
328 fclose(fp);
329}
330
2debbc83 331static void synthesize_samples(void)
f70e87d7
PZ
332{
333 DIR *proc;
334 struct dirent dirent, *next;
335
336 proc = opendir("/proc");
337
338 while (!readdir_r(proc, &dirent, &next) && next) {
339 char *end;
340 pid_t pid;
341
342 pid = strtol(dirent.d_name, &end, 10);
343 if (*end) /* only interested in proper numerical dirents */
344 continue;
345
346 pid_synthesize_comm_event(pid, 1);
2debbc83 347 pid_synthesize_mmap_samples(pid);
f70e87d7
PZ
348 }
349
350 closedir(proc);
351}
352
f250c030
IM
353static int group_fd;
354
355static void create_counter(int counter, int cpu, pid_t pid)
de9ac07b 356{
a21ca2ca 357 struct perf_counter_attr *attr = attrs + counter;
16c8a109 358 int track = 1;
16c8a109 359
ea1900e5 360 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
3efa1cc9 361
1dba15e7 362 if (freq) {
ea1900e5 363 attr->sample_type |= PERF_SAMPLE_PERIOD;
a21ca2ca
IM
364 attr->freq = 1;
365 attr->sample_freq = freq;
1dba15e7 366 }
3efa1cc9
IM
367
368 if (call_graph)
369 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
370
f5970550
PZ
371 if (file_new) {
372 file_header.sample_type = attr->sample_type;
373 } else {
374 if (file_header.sample_type != attr->sample_type) {
375 fprintf(stderr, "incompatible append\n");
376 exit(-1);
377 }
378 }
379
a21ca2ca
IM
380 attr->mmap = track;
381 attr->comm = track;
382 attr->inherit = (cpu < 0) && inherit;
4502d77c 383 attr->disabled = 1;
16c8a109 384
f250c030 385 track = 0; /* only the first counter needs these */
16c8a109 386
3da297a6 387try_again:
a21ca2ca 388 fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
16c8a109 389
f250c030
IM
390 if (fd[nr_cpu][counter] < 0) {
391 int err = errno;
16c8a109 392
f250c030 393 if (err == EPERM)
3da297a6
IM
394 die("Permission error - are you root?\n");
395
396 /*
397 * If it's cycles then fall back to hrtimer
398 * based cpu-clock-tick sw counter, which
399 * is always available even if no PMU support:
400 */
401 if (attr->type == PERF_TYPE_HARDWARE
f4dbfa8f 402 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
3da297a6
IM
403
404 if (verbose)
405 warning(" ... trying to fall back to cpu-clock-ticks\n");
406 attr->type = PERF_TYPE_SOFTWARE;
f4dbfa8f 407 attr->config = PERF_COUNT_SW_CPU_CLOCK;
3da297a6
IM
408 goto try_again;
409 }
30c806a0
IM
410 printf("\n");
411 error("perfcounter syscall returned with %d (%s)\n",
412 fd[nr_cpu][counter], strerror(err));
413 die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
f250c030
IM
414 exit(-1);
415 }
3da297a6 416
f250c030
IM
417 assert(fd[nr_cpu][counter] >= 0);
418 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
16c8a109 419
f250c030
IM
420 /*
421 * First counter acts as the group leader:
422 */
423 if (group && group_fd == -1)
424 group_fd = fd[nr_cpu][counter];
425
426 event_array[nr_poll].fd = fd[nr_cpu][counter];
427 event_array[nr_poll].events = POLLIN;
428 nr_poll++;
429
430 mmap_array[nr_cpu][counter].counter = counter;
431 mmap_array[nr_cpu][counter].prev = 0;
432 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
433 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
9d91a6f7 434 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
f250c030
IM
435 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
436 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
437 exit(-1);
438 }
4502d77c
PZ
439
440 ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
f250c030 441}
f2521b6e 442
f250c030
IM
443static void open_counters(int cpu, pid_t pid)
444{
445 int counter;
16c8a109 446
f250c030
IM
447 if (pid > 0) {
448 pid_synthesize_comm_event(pid, 0);
2debbc83 449 pid_synthesize_mmap_samples(pid);
16c8a109 450 }
f250c030
IM
451
452 group_fd = -1;
453 for (counter = 0; counter < nr_counters; counter++)
454 create_counter(counter, cpu, pid);
455
16c8a109
PZ
456 nr_cpu++;
457}
458
f5970550
PZ
459static void atexit_header(void)
460{
461 file_header.data_size += bytes_written;
462
eadc84cc
FW
463 if (pwrite(output, &file_header, sizeof(file_header), 0) == -1)
464 perror("failed to write on file headers");
f5970550
PZ
465}
466
0e9b20b8 467static int __cmd_record(int argc, const char **argv)
16c8a109
PZ
468{
469 int i, counter;
abaff32a 470 struct stat st;
de9ac07b 471 pid_t pid;
abaff32a 472 int flags;
de9ac07b
PZ
473 int ret;
474
475 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b
PZ
476 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
477 assert(nr_cpus <= MAX_NR_CPUS);
478 assert(nr_cpus >= 0);
479
f5970550
PZ
480 atexit(sig_atexit);
481 signal(SIGCHLD, sig_handler);
482 signal(SIGINT, sig_handler);
483
abaff32a
IM
484 if (!stat(output_name, &st) && !force && !append_file) {
485 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
97124d5e
PZ
486 output_name);
487 exit(-1);
488 }
489
abaff32a
IM
490 flags = O_CREAT|O_RDWR;
491 if (append_file)
f5970550 492 file_new = 0;
abaff32a
IM
493 else
494 flags |= O_TRUNC;
495
496 output = open(output_name, flags, S_IRUSR|S_IWUSR);
de9ac07b
PZ
497 if (output < 0) {
498 perror("failed to create output file");
499 exit(-1);
500 }
501
f5970550 502 if (!file_new) {
eadc84cc
FW
503 if (read(output, &file_header, sizeof(file_header)) == -1) {
504 perror("failed to read file headers");
505 exit(-1);
506 }
507
f5970550
PZ
508 lseek(output, file_header.data_size, SEEK_CUR);
509 }
510
511 atexit(atexit_header);
512
1a853e36 513 if (!system_wide) {
df97992c 514 open_counters(-1, target_pid != -1 ? target_pid : getpid());
1a853e36
ACM
515 } else for (i = 0; i < nr_cpus; i++)
516 open_counters(i, target_pid);
de9ac07b 517
ef65b2a0 518 if (target_pid == -1 && argc) {
1a853e36
ACM
519 pid = fork();
520 if (pid < 0)
521 perror("failed to fork");
de9ac07b 522
1a853e36 523 if (!pid) {
0e9b20b8 524 if (execvp(argv[0], (char **)argv)) {
1a853e36
ACM
525 perror(argv[0]);
526 exit(-1);
527 }
de9ac07b
PZ
528 }
529 }
530
531 if (realtime_prio) {
532 struct sched_param param;
533
534 param.sched_priority = realtime_prio;
535 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
536 printf("Could not set realtime priority.\n");
537 exit(-1);
538 }
539 }
540
f70e87d7 541 if (system_wide)
2debbc83 542 synthesize_samples();
de9ac07b
PZ
543
544 while (!done) {
2debbc83 545 int hits = samples;
de9ac07b 546
16c8a109 547 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
548 for (counter = 0; counter < nr_counters; counter++)
549 mmap_read(&mmap_array[i][counter]);
550 }
551
2debbc83 552 if (hits == samples)
de9ac07b
PZ
553 ret = poll(event_array, nr_poll, 100);
554 }
555
021e9f47
IM
556 /*
557 * Approximate RIP event size: 24 bytes.
558 */
559 fprintf(stderr,
2debbc83 560 "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
021e9f47
IM
561 (double)bytes_written / 1024.0 / 1024.0,
562 output_name,
563 bytes_written / 24);
addc2785 564
de9ac07b
PZ
565 return 0;
566}
0e9b20b8 567
0e9b20b8 568static const char * const record_usage[] = {
9e096753
MG
569 "perf record [<options>] [<command>]",
570 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
571 NULL
572};
573
5242519b 574static const struct option options[] = {
0e9b20b8 575 OPT_CALLBACK('e', "event", NULL, "event",
86847b62
TG
576 "event selector. use 'perf list' to list available events",
577 parse_events),
0e9b20b8
IM
578 OPT_INTEGER('p', "pid", &target_pid,
579 "record events on existing pid"),
580 OPT_INTEGER('r', "realtime", &realtime_prio,
581 "collect data with this RT SCHED_FIFO priority"),
582 OPT_BOOLEAN('a', "all-cpus", &system_wide,
583 "system-wide collection from all CPUs"),
abaff32a
IM
584 OPT_BOOLEAN('A', "append", &append_file,
585 "append to the output file to do incremental profiling"),
97124d5e
PZ
586 OPT_BOOLEAN('f', "force", &force,
587 "overwrite existing data file"),
e61078a0 588 OPT_LONG('c', "count", &default_interval,
abaff32a
IM
589 "event period to sample"),
590 OPT_STRING('o', "output", &output_name, "file",
591 "output file name"),
592 OPT_BOOLEAN('i', "inherit", &inherit,
593 "child tasks inherit counters"),
cf1f4574
IM
594 OPT_INTEGER('F', "freq", &freq,
595 "profile at this frequency"),
abaff32a
IM
596 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
597 "number of mmap data pages"),
3efa1cc9
IM
598 OPT_BOOLEAN('g', "call-graph", &call_graph,
599 "do call-graph (stack chain/backtrace) recording"),
3da297a6
IM
600 OPT_BOOLEAN('v', "verbose", &verbose,
601 "be more verbose (show counter open errors, etc)"),
0e9b20b8
IM
602 OPT_END()
603};
604
605int cmd_record(int argc, const char **argv, const char *prefix)
606{
607 int counter;
608
0e9b20b8 609 argc = parse_options(argc, argv, options, record_usage, 0);
ef65b2a0 610 if (!argc && target_pid == -1 && !system_wide)
0e9b20b8
IM
611 usage_with_options(record_usage, options);
612
bbd36e5e
PZ
613 if (!nr_counters) {
614 nr_counters = 1;
615 attrs[0].type = PERF_TYPE_HARDWARE;
616 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
617 }
0e9b20b8
IM
618
619 for (counter = 0; counter < nr_counters; counter++) {
a21ca2ca 620 if (attrs[counter].sample_period)
0e9b20b8
IM
621 continue;
622
a21ca2ca 623 attrs[counter].sample_period = default_interval;
0e9b20b8
IM
624 }
625
626 return __cmd_record(argc, argv);
627}