perf report: Fix column width/alignment of dsos
[linux-2.6-block.git] / Documentation / perf_counter / builtin-record.c
CommitLineData
de9ac07b
PZ
1
2
1a482f38 3#include "perf.h"
16f762a2 4#include "builtin.h"
6eda5838 5#include "util/util.h"
0e9b20b8 6#include "util/parse-options.h"
8ad8db37 7#include "util/parse-events.h"
a0055ae2 8#include "util/string.h"
6eda5838 9
97124d5e 10#include <unistd.h>
de9ac07b 11#include <sched.h>
de9ac07b 12
0e9b20b8
IM
13#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
14#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
1a853e36 15
de9ac07b
PZ
16static int default_interval = 100000;
17static int event_count[MAX_COUNTERS];
8ad8db37 18
de9ac07b
PZ
19static int fd[MAX_NR_CPUS][MAX_COUNTERS];
20static int nr_cpus = 0;
21static unsigned int page_size;
22static unsigned int mmap_pages = 16;
23static int output;
23ac9cbe 24static const char *output_name = "perf.data";
de9ac07b 25static int group = 0;
16c8a109
PZ
26static unsigned int realtime_prio = 0;
27static int system_wide = 0;
1a853e36 28static pid_t target_pid = -1;
16c8a109 29static int inherit = 1;
97124d5e 30static int force = 0;
de9ac07b
PZ
31
32const unsigned int default_count[] = {
33 1000000,
34 1000000,
35 10000,
36 10000,
37 1000000,
38 10000,
39};
40
de9ac07b
PZ
41struct mmap_data {
42 int counter;
43 void *base;
44 unsigned int mask;
45 unsigned int prev;
46};
47
48static unsigned int mmap_read_head(struct mmap_data *md)
49{
50 struct perf_counter_mmap_page *pc = md->base;
51 int head;
52
53 head = pc->data_head;
54 rmb();
55
56 return head;
57}
58
59static long events;
60static struct timeval last_read, this_read;
61
62static void mmap_read(struct mmap_data *md)
63{
64 unsigned int head = mmap_read_head(md);
65 unsigned int old = md->prev;
66 unsigned char *data = md->base + page_size;
67 unsigned long size;
68 void *buf;
69 int diff;
70
71 gettimeofday(&this_read, NULL);
72
73 /*
74 * If we're further behind than half the buffer, there's a chance
75 * the writer will bite our tail and screw up the events under us.
76 *
77 * If we somehow ended up ahead of the head, we got messed up.
78 *
79 * In either case, truncate and restart at head.
80 */
81 diff = head - old;
82 if (diff > md->mask / 2 || diff < 0) {
83 struct timeval iv;
84 unsigned long msecs;
85
86 timersub(&this_read, &last_read, &iv);
87 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
88
89 fprintf(stderr, "WARNING: failed to keep up with mmap data."
90 " Last read %lu msecs ago.\n", msecs);
91
92 /*
93 * head points to a known good entry, start there.
94 */
95 old = head;
96 }
97
98 last_read = this_read;
99
100 if (old != head)
101 events++;
102
103 size = head - old;
104
105 if ((old & md->mask) + size != (head & md->mask)) {
106 buf = &data[old & md->mask];
107 size = md->mask + 1 - (old & md->mask);
108 old += size;
109 while (size) {
110 int ret = write(output, buf, size);
111 if (ret < 0) {
112 perror("failed to write");
113 exit(-1);
114 }
115 size -= ret;
116 buf += ret;
117 }
118 }
119
120 buf = &data[old & md->mask];
121 size = head - old;
122 old += size;
123 while (size) {
124 int ret = write(output, buf, size);
125 if (ret < 0) {
126 perror("failed to write");
127 exit(-1);
128 }
129 size -= ret;
130 buf += ret;
131 }
132
133 md->prev = old;
134}
135
136static volatile int done = 0;
137
16c8a109 138static void sig_handler(int sig)
de9ac07b 139{
16c8a109 140 done = 1;
de9ac07b
PZ
141}
142
16c8a109
PZ
143static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
144static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
145
146static int nr_poll;
147static int nr_cpu;
148
1a853e36 149struct mmap_event {
16f762a2
IM
150 struct perf_event_header header;
151 __u32 pid;
152 __u32 tid;
153 __u64 start;
154 __u64 len;
155 __u64 pgoff;
156 char filename[PATH_MAX];
1a853e36 157};
16f762a2 158
1a853e36 159struct comm_event {
16f762a2
IM
160 struct perf_event_header header;
161 __u32 pid;
162 __u32 tid;
163 char comm[16];
1a853e36
ACM
164};
165
f70e87d7 166static void pid_synthesize_comm_event(pid_t pid, int full)
1a853e36 167{
16f762a2 168 struct comm_event comm_ev;
1a853e36
ACM
169 char filename[PATH_MAX];
170 char bf[BUFSIZ];
a0055ae2 171 int fd, ret;
1a853e36 172 size_t size;
a0055ae2 173 char *field, *sep;
f70e87d7
PZ
174 DIR *tasks;
175 struct dirent dirent, *next;
1a853e36
ACM
176
177 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
178
179 fd = open(filename, O_RDONLY);
180 if (fd < 0) {
181 fprintf(stderr, "couldn't open %s\n", filename);
182 exit(EXIT_FAILURE);
183 }
184 if (read(fd, bf, sizeof(bf)) < 0) {
185 fprintf(stderr, "couldn't read %s\n", filename);
186 exit(EXIT_FAILURE);
187 }
188 close(fd);
189
a0055ae2 190 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
1a853e36 191 memset(&comm_ev, 0, sizeof(comm_ev));
a0055ae2
ACM
192 field = strchr(bf, '(');
193 if (field == NULL)
194 goto out_failure;
195 sep = strchr(++field, ')');
196 if (sep == NULL)
197 goto out_failure;
198 size = sep - field;
199 memcpy(comm_ev.comm, field, size++);
f70e87d7
PZ
200
201 comm_ev.pid = pid;
1a853e36 202 comm_ev.header.type = PERF_EVENT_COMM;
1a853e36
ACM
203 size = ALIGN(size, sizeof(uint64_t));
204 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
16f762a2 205
f70e87d7
PZ
206 if (!full) {
207 comm_ev.tid = pid;
208
209 ret = write(output, &comm_ev, comm_ev.header.size);
210 if (ret < 0) {
211 perror("failed to write");
212 exit(-1);
213 }
214 return;
215 }
216
217 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
218
219 tasks = opendir(filename);
220 while (!readdir_r(tasks, &dirent, &next) && next) {
221 char *end;
222 pid = strtol(dirent.d_name, &end, 10);
223 if (*end)
224 continue;
225
226 comm_ev.tid = pid;
227
228 ret = write(output, &comm_ev, comm_ev.header.size);
229 if (ret < 0) {
230 perror("failed to write");
231 exit(-1);
232 }
1a853e36 233 }
f70e87d7
PZ
234 closedir(tasks);
235 return;
236
a0055ae2
ACM
237out_failure:
238 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
239 filename);
240 exit(EXIT_FAILURE);
1a853e36
ACM
241}
242
f70e87d7 243static void pid_synthesize_mmap_events(pid_t pid)
1a853e36
ACM
244{
245 char filename[PATH_MAX];
246 FILE *fp;
247
248 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
249
250 fp = fopen(filename, "r");
251 if (fp == NULL) {
252 fprintf(stderr, "couldn't open %s\n", filename);
253 exit(EXIT_FAILURE);
254 }
255 while (1) {
a0055ae2 256 char bf[BUFSIZ], *pbf = bf;
1a853e36
ACM
257 struct mmap_event mmap_ev = {
258 .header.type = PERF_EVENT_MMAP,
259 };
a0055ae2 260 int n;
1a853e36
ACM
261 size_t size;
262 if (fgets(bf, sizeof(bf), fp) == NULL)
263 break;
264
265 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
a0055ae2
ACM
266 n = hex2u64(pbf, &mmap_ev.start);
267 if (n < 0)
268 continue;
269 pbf += n + 1;
270 n = hex2u64(pbf, &mmap_ev.len);
271 if (n < 0)
272 continue;
273 pbf += n + 3;
274 if (*pbf == 'x') { /* vm_exec */
1a853e36
ACM
275 char *execname = strrchr(bf, ' ');
276
277 if (execname == NULL || execname[1] != '/')
278 continue;
279
280 execname += 1;
281 size = strlen(execname);
282 execname[size - 1] = '\0'; /* Remove \n */
283 memcpy(mmap_ev.filename, execname, size);
284 size = ALIGN(size, sizeof(uint64_t));
285 mmap_ev.len -= mmap_ev.start;
286 mmap_ev.header.size = (sizeof(mmap_ev) -
287 (sizeof(mmap_ev.filename) - size));
f70e87d7 288 mmap_ev.pid = pid;
1a853e36
ACM
289 mmap_ev.tid = pid;
290
291 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
292 perror("failed to write");
293 exit(-1);
294 }
295 }
296 }
297
298 fclose(fp);
299}
300
f70e87d7
PZ
301static void synthesize_events(void)
302{
303 DIR *proc;
304 struct dirent dirent, *next;
305
306 proc = opendir("/proc");
307
308 while (!readdir_r(proc, &dirent, &next) && next) {
309 char *end;
310 pid_t pid;
311
312 pid = strtol(dirent.d_name, &end, 10);
313 if (*end) /* only interested in proper numerical dirents */
314 continue;
315
316 pid_synthesize_comm_event(pid, 1);
317 pid_synthesize_mmap_events(pid);
318 }
319
320 closedir(proc);
321}
322
1a853e36 323static void open_counters(int cpu, pid_t pid)
de9ac07b 324{
c70975bc 325 struct perf_counter_attr attr;
16c8a109
PZ
326 int counter, group_fd;
327 int track = 1;
16c8a109 328
1a853e36 329 if (pid > 0) {
f70e87d7
PZ
330 pid_synthesize_comm_event(pid, 0);
331 pid_synthesize_mmap_events(pid);
1a853e36 332 }
16c8a109
PZ
333
334 group_fd = -1;
335 for (counter = 0; counter < nr_counters; counter++) {
336
c70975bc
PZ
337 memset(&attr, 0, sizeof(attr));
338 attr.config = event_id[counter];
339 attr.sample_period = event_count[counter];
340 attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
341 attr.mmap = track;
342 attr.comm = track;
343 attr.inherit = (cpu < 0) && inherit;
16c8a109
PZ
344
345 track = 0; // only the first counter needs these
346
347 fd[nr_cpu][counter] =
c70975bc 348 sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
16c8a109
PZ
349
350 if (fd[nr_cpu][counter] < 0) {
351 int err = errno;
352 printf("kerneltop error: syscall returned with %d (%s)\n",
353 fd[nr_cpu][counter], strerror(err));
354 if (err == EPERM)
355 printf("Are you root?\n");
356 exit(-1);
357 }
358 assert(fd[nr_cpu][counter] >= 0);
359 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
360
361 /*
362 * First counter acts as the group leader:
363 */
364 if (group && group_fd == -1)
365 group_fd = fd[nr_cpu][counter];
366
367 event_array[nr_poll].fd = fd[nr_cpu][counter];
368 event_array[nr_poll].events = POLLIN;
369 nr_poll++;
370
371 mmap_array[nr_cpu][counter].counter = counter;
372 mmap_array[nr_cpu][counter].prev = 0;
373 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
374 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
375 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
376 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
377 printf("kerneltop error: failed to mmap with %d (%s)\n",
378 errno, strerror(errno));
379 exit(-1);
380 }
381 }
382 nr_cpu++;
383}
384
0e9b20b8 385static int __cmd_record(int argc, const char **argv)
16c8a109
PZ
386{
387 int i, counter;
de9ac07b
PZ
388 pid_t pid;
389 int ret;
97124d5e 390 struct stat st;
de9ac07b
PZ
391
392 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b
PZ
393 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
394 assert(nr_cpus <= MAX_NR_CPUS);
395 assert(nr_cpus >= 0);
396
97124d5e
PZ
397 if (!stat(output_name, &st) && !force) {
398 fprintf(stderr, "Error, output file: %s exists, use -f to overwrite.\n",
399 output_name);
400 exit(-1);
401 }
402
403 output = open(output_name, O_CREAT|O_TRUNC|O_RDWR, S_IRUSR|S_IWUSR);
de9ac07b
PZ
404 if (output < 0) {
405 perror("failed to create output file");
406 exit(-1);
407 }
408
1a853e36
ACM
409 if (!system_wide) {
410 open_counters(-1, target_pid != -1 ? target_pid : 0);
411 } else for (i = 0; i < nr_cpus; i++)
412 open_counters(i, target_pid);
de9ac07b 413
16c8a109
PZ
414 signal(SIGCHLD, sig_handler);
415 signal(SIGINT, sig_handler);
de9ac07b 416
ef65b2a0 417 if (target_pid == -1 && argc) {
1a853e36
ACM
418 pid = fork();
419 if (pid < 0)
420 perror("failed to fork");
de9ac07b 421
1a853e36 422 if (!pid) {
0e9b20b8 423 if (execvp(argv[0], (char **)argv)) {
1a853e36
ACM
424 perror(argv[0]);
425 exit(-1);
426 }
de9ac07b
PZ
427 }
428 }
429
430 if (realtime_prio) {
431 struct sched_param param;
432
433 param.sched_priority = realtime_prio;
434 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
435 printf("Could not set realtime priority.\n");
436 exit(-1);
437 }
438 }
439
f70e87d7
PZ
440 if (system_wide)
441 synthesize_events();
de9ac07b
PZ
442
443 while (!done) {
444 int hits = events;
445
16c8a109 446 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
447 for (counter = 0; counter < nr_counters; counter++)
448 mmap_read(&mmap_array[i][counter]);
449 }
450
451 if (hits == events)
452 ret = poll(event_array, nr_poll, 100);
453 }
454
455 return 0;
456}
0e9b20b8 457
0e9b20b8 458static const char * const record_usage[] = {
9e096753
MG
459 "perf record [<options>] [<command>]",
460 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
461 NULL
462};
463
8ad8db37
IM
464static char events_help_msg[EVENTS_HELP_MAX];
465
5242519b 466static const struct option options[] = {
0e9b20b8 467 OPT_CALLBACK('e', "event", NULL, "event",
8ad8db37 468 events_help_msg, parse_events),
0e9b20b8
IM
469 OPT_INTEGER('c', "count", &default_interval,
470 "event period to sample"),
471 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
472 "number of mmap data pages"),
473 OPT_STRING('o', "output", &output_name, "file",
474 "output file name"),
475 OPT_BOOLEAN('i', "inherit", &inherit,
476 "child tasks inherit counters"),
477 OPT_INTEGER('p', "pid", &target_pid,
478 "record events on existing pid"),
479 OPT_INTEGER('r', "realtime", &realtime_prio,
480 "collect data with this RT SCHED_FIFO priority"),
481 OPT_BOOLEAN('a', "all-cpus", &system_wide,
482 "system-wide collection from all CPUs"),
97124d5e
PZ
483 OPT_BOOLEAN('f', "force", &force,
484 "overwrite existing data file"),
0e9b20b8
IM
485 OPT_END()
486};
487
488int cmd_record(int argc, const char **argv, const char *prefix)
489{
490 int counter;
491
8ad8db37 492 create_events_help(events_help_msg);
0e9b20b8
IM
493
494 argc = parse_options(argc, argv, options, record_usage, 0);
ef65b2a0 495 if (!argc && target_pid == -1 && !system_wide)
0e9b20b8
IM
496 usage_with_options(record_usage, options);
497
498 if (!nr_counters) {
499 nr_counters = 1;
500 event_id[0] = 0;
501 }
502
503 for (counter = 0; counter < nr_counters; counter++) {
504 if (event_count[counter])
505 continue;
506
507 event_count[counter] = default_interval;
508 }
509
510 return __cmd_record(argc, argv);
511}