perf_counter tools: Clean up old kerneltop references
[linux-2.6-block.git] / Documentation / perf_counter / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
16f762a2 8#include "builtin.h"
bf9e1876
IM
9
10#include "perf.h"
11
6eda5838 12#include "util/util.h"
0e9b20b8 13#include "util/parse-options.h"
8ad8db37 14#include "util/parse-events.h"
a0055ae2 15#include "util/string.h"
6eda5838 16
97124d5e 17#include <unistd.h>
de9ac07b 18#include <sched.h>
de9ac07b 19
0e9b20b8
IM
20#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
21#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
1a853e36 22
e61078a0
PZ
23static long default_interval = 100000;
24static long event_count[MAX_COUNTERS];
8ad8db37 25
de9ac07b 26static int fd[MAX_NR_CPUS][MAX_COUNTERS];
3cf165fc 27static int nr_cpus = 0;
de9ac07b 28static unsigned int page_size;
3cf165fc 29static unsigned int mmap_pages = 128;
de9ac07b 30static int output;
23ac9cbe 31static const char *output_name = "perf.data";
de9ac07b 32static int group = 0;
16c8a109
PZ
33static unsigned int realtime_prio = 0;
34static int system_wide = 0;
1a853e36 35static pid_t target_pid = -1;
16c8a109 36static int inherit = 1;
97124d5e 37static int force = 0;
abaff32a 38static int append_file = 0;
de9ac07b
PZ
39
40const unsigned int default_count[] = {
41 1000000,
42 1000000,
43 10000,
44 10000,
45 1000000,
46 10000,
47};
48
de9ac07b
PZ
49struct mmap_data {
50 int counter;
51 void *base;
52 unsigned int mask;
53 unsigned int prev;
54};
55
56static unsigned int mmap_read_head(struct mmap_data *md)
57{
58 struct perf_counter_mmap_page *pc = md->base;
59 int head;
60
61 head = pc->data_head;
62 rmb();
63
64 return head;
65}
66
67static long events;
68static struct timeval last_read, this_read;
69
70static void mmap_read(struct mmap_data *md)
71{
72 unsigned int head = mmap_read_head(md);
73 unsigned int old = md->prev;
74 unsigned char *data = md->base + page_size;
75 unsigned long size;
76 void *buf;
77 int diff;
78
79 gettimeofday(&this_read, NULL);
80
81 /*
82 * If we're further behind than half the buffer, there's a chance
83 * the writer will bite our tail and screw up the events under us.
84 *
85 * If we somehow ended up ahead of the head, we got messed up.
86 *
87 * In either case, truncate and restart at head.
88 */
89 diff = head - old;
90 if (diff > md->mask / 2 || diff < 0) {
91 struct timeval iv;
92 unsigned long msecs;
93
94 timersub(&this_read, &last_read, &iv);
95 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
96
97 fprintf(stderr, "WARNING: failed to keep up with mmap data."
98 " Last read %lu msecs ago.\n", msecs);
99
100 /*
101 * head points to a known good entry, start there.
102 */
103 old = head;
104 }
105
106 last_read = this_read;
107
108 if (old != head)
109 events++;
110
111 size = head - old;
112
113 if ((old & md->mask) + size != (head & md->mask)) {
114 buf = &data[old & md->mask];
115 size = md->mask + 1 - (old & md->mask);
116 old += size;
117 while (size) {
118 int ret = write(output, buf, size);
119 if (ret < 0) {
120 perror("failed to write");
121 exit(-1);
122 }
123 size -= ret;
124 buf += ret;
125 }
126 }
127
128 buf = &data[old & md->mask];
129 size = head - old;
130 old += size;
131 while (size) {
132 int ret = write(output, buf, size);
133 if (ret < 0) {
134 perror("failed to write");
135 exit(-1);
136 }
137 size -= ret;
138 buf += ret;
139 }
140
141 md->prev = old;
142}
143
144static volatile int done = 0;
145
16c8a109 146static void sig_handler(int sig)
de9ac07b 147{
16c8a109 148 done = 1;
de9ac07b
PZ
149}
150
16c8a109
PZ
151static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
152static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
153
154static int nr_poll;
155static int nr_cpu;
156
1a853e36 157struct mmap_event {
16f762a2
IM
158 struct perf_event_header header;
159 __u32 pid;
160 __u32 tid;
161 __u64 start;
162 __u64 len;
163 __u64 pgoff;
164 char filename[PATH_MAX];
1a853e36 165};
16f762a2 166
1a853e36 167struct comm_event {
16f762a2
IM
168 struct perf_event_header header;
169 __u32 pid;
170 __u32 tid;
171 char comm[16];
1a853e36
ACM
172};
173
f70e87d7 174static void pid_synthesize_comm_event(pid_t pid, int full)
1a853e36 175{
16f762a2 176 struct comm_event comm_ev;
1a853e36
ACM
177 char filename[PATH_MAX];
178 char bf[BUFSIZ];
a0055ae2 179 int fd, ret;
1a853e36 180 size_t size;
a0055ae2 181 char *field, *sep;
f70e87d7
PZ
182 DIR *tasks;
183 struct dirent dirent, *next;
1a853e36
ACM
184
185 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
186
187 fd = open(filename, O_RDONLY);
188 if (fd < 0) {
189 fprintf(stderr, "couldn't open %s\n", filename);
190 exit(EXIT_FAILURE);
191 }
192 if (read(fd, bf, sizeof(bf)) < 0) {
193 fprintf(stderr, "couldn't read %s\n", filename);
194 exit(EXIT_FAILURE);
195 }
196 close(fd);
197
a0055ae2 198 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
1a853e36 199 memset(&comm_ev, 0, sizeof(comm_ev));
a0055ae2
ACM
200 field = strchr(bf, '(');
201 if (field == NULL)
202 goto out_failure;
203 sep = strchr(++field, ')');
204 if (sep == NULL)
205 goto out_failure;
206 size = sep - field;
207 memcpy(comm_ev.comm, field, size++);
f70e87d7
PZ
208
209 comm_ev.pid = pid;
1a853e36 210 comm_ev.header.type = PERF_EVENT_COMM;
1a853e36
ACM
211 size = ALIGN(size, sizeof(uint64_t));
212 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
16f762a2 213
f70e87d7
PZ
214 if (!full) {
215 comm_ev.tid = pid;
216
217 ret = write(output, &comm_ev, comm_ev.header.size);
218 if (ret < 0) {
219 perror("failed to write");
220 exit(-1);
221 }
222 return;
223 }
224
225 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
226
227 tasks = opendir(filename);
228 while (!readdir_r(tasks, &dirent, &next) && next) {
229 char *end;
230 pid = strtol(dirent.d_name, &end, 10);
231 if (*end)
232 continue;
233
234 comm_ev.tid = pid;
235
236 ret = write(output, &comm_ev, comm_ev.header.size);
237 if (ret < 0) {
238 perror("failed to write");
239 exit(-1);
240 }
1a853e36 241 }
f70e87d7
PZ
242 closedir(tasks);
243 return;
244
a0055ae2
ACM
245out_failure:
246 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
247 filename);
248 exit(EXIT_FAILURE);
1a853e36
ACM
249}
250
f70e87d7 251static void pid_synthesize_mmap_events(pid_t pid)
1a853e36
ACM
252{
253 char filename[PATH_MAX];
254 FILE *fp;
255
256 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
257
258 fp = fopen(filename, "r");
259 if (fp == NULL) {
260 fprintf(stderr, "couldn't open %s\n", filename);
261 exit(EXIT_FAILURE);
262 }
263 while (1) {
a0055ae2 264 char bf[BUFSIZ], *pbf = bf;
1a853e36
ACM
265 struct mmap_event mmap_ev = {
266 .header.type = PERF_EVENT_MMAP,
267 };
a0055ae2 268 int n;
1a853e36
ACM
269 size_t size;
270 if (fgets(bf, sizeof(bf), fp) == NULL)
271 break;
272
273 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
a0055ae2
ACM
274 n = hex2u64(pbf, &mmap_ev.start);
275 if (n < 0)
276 continue;
277 pbf += n + 1;
278 n = hex2u64(pbf, &mmap_ev.len);
279 if (n < 0)
280 continue;
281 pbf += n + 3;
282 if (*pbf == 'x') { /* vm_exec */
1a853e36
ACM
283 char *execname = strrchr(bf, ' ');
284
285 if (execname == NULL || execname[1] != '/')
286 continue;
287
288 execname += 1;
289 size = strlen(execname);
290 execname[size - 1] = '\0'; /* Remove \n */
291 memcpy(mmap_ev.filename, execname, size);
292 size = ALIGN(size, sizeof(uint64_t));
293 mmap_ev.len -= mmap_ev.start;
294 mmap_ev.header.size = (sizeof(mmap_ev) -
295 (sizeof(mmap_ev.filename) - size));
f70e87d7 296 mmap_ev.pid = pid;
1a853e36
ACM
297 mmap_ev.tid = pid;
298
299 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
300 perror("failed to write");
301 exit(-1);
302 }
303 }
304 }
305
306 fclose(fp);
307}
308
f70e87d7
PZ
309static void synthesize_events(void)
310{
311 DIR *proc;
312 struct dirent dirent, *next;
313
314 proc = opendir("/proc");
315
316 while (!readdir_r(proc, &dirent, &next) && next) {
317 char *end;
318 pid_t pid;
319
320 pid = strtol(dirent.d_name, &end, 10);
321 if (*end) /* only interested in proper numerical dirents */
322 continue;
323
324 pid_synthesize_comm_event(pid, 1);
325 pid_synthesize_mmap_events(pid);
326 }
327
328 closedir(proc);
329}
330
1a853e36 331static void open_counters(int cpu, pid_t pid)
de9ac07b 332{
c70975bc 333 struct perf_counter_attr attr;
16c8a109
PZ
334 int counter, group_fd;
335 int track = 1;
16c8a109 336
1a853e36 337 if (pid > 0) {
f70e87d7
PZ
338 pid_synthesize_comm_event(pid, 0);
339 pid_synthesize_mmap_events(pid);
1a853e36 340 }
16c8a109
PZ
341
342 group_fd = -1;
343 for (counter = 0; counter < nr_counters; counter++) {
344
c70975bc
PZ
345 memset(&attr, 0, sizeof(attr));
346 attr.config = event_id[counter];
347 attr.sample_period = event_count[counter];
348 attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
349 attr.mmap = track;
350 attr.comm = track;
351 attr.inherit = (cpu < 0) && inherit;
16c8a109
PZ
352
353 track = 0; // only the first counter needs these
354
355 fd[nr_cpu][counter] =
c70975bc 356 sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
16c8a109
PZ
357
358 if (fd[nr_cpu][counter] < 0) {
359 int err = errno;
f2521b6e
IM
360
361 error("syscall returned with %d (%s)\n",
16c8a109
PZ
362 fd[nr_cpu][counter], strerror(err));
363 if (err == EPERM)
364 printf("Are you root?\n");
365 exit(-1);
366 }
367 assert(fd[nr_cpu][counter] >= 0);
368 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
369
370 /*
371 * First counter acts as the group leader:
372 */
373 if (group && group_fd == -1)
374 group_fd = fd[nr_cpu][counter];
375
376 event_array[nr_poll].fd = fd[nr_cpu][counter];
377 event_array[nr_poll].events = POLLIN;
378 nr_poll++;
379
380 mmap_array[nr_cpu][counter].counter = counter;
381 mmap_array[nr_cpu][counter].prev = 0;
382 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
383 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
384 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
385 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
f2521b6e 386 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
16c8a109
PZ
387 exit(-1);
388 }
389 }
390 nr_cpu++;
391}
392
0e9b20b8 393static int __cmd_record(int argc, const char **argv)
16c8a109
PZ
394{
395 int i, counter;
abaff32a 396 struct stat st;
de9ac07b 397 pid_t pid;
abaff32a 398 int flags;
de9ac07b
PZ
399 int ret;
400
401 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b
PZ
402 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
403 assert(nr_cpus <= MAX_NR_CPUS);
404 assert(nr_cpus >= 0);
405
abaff32a
IM
406 if (!stat(output_name, &st) && !force && !append_file) {
407 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
97124d5e
PZ
408 output_name);
409 exit(-1);
410 }
411
abaff32a
IM
412 flags = O_CREAT|O_RDWR;
413 if (append_file)
414 flags |= O_APPEND;
415 else
416 flags |= O_TRUNC;
417
418 output = open(output_name, flags, S_IRUSR|S_IWUSR);
de9ac07b
PZ
419 if (output < 0) {
420 perror("failed to create output file");
421 exit(-1);
422 }
423
1a853e36
ACM
424 if (!system_wide) {
425 open_counters(-1, target_pid != -1 ? target_pid : 0);
426 } else for (i = 0; i < nr_cpus; i++)
427 open_counters(i, target_pid);
de9ac07b 428
16c8a109
PZ
429 signal(SIGCHLD, sig_handler);
430 signal(SIGINT, sig_handler);
de9ac07b 431
ef65b2a0 432 if (target_pid == -1 && argc) {
1a853e36
ACM
433 pid = fork();
434 if (pid < 0)
435 perror("failed to fork");
de9ac07b 436
1a853e36 437 if (!pid) {
0e9b20b8 438 if (execvp(argv[0], (char **)argv)) {
1a853e36
ACM
439 perror(argv[0]);
440 exit(-1);
441 }
de9ac07b
PZ
442 }
443 }
444
445 if (realtime_prio) {
446 struct sched_param param;
447
448 param.sched_priority = realtime_prio;
449 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
450 printf("Could not set realtime priority.\n");
451 exit(-1);
452 }
453 }
454
f70e87d7
PZ
455 if (system_wide)
456 synthesize_events();
de9ac07b
PZ
457
458 while (!done) {
459 int hits = events;
460
16c8a109 461 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
462 for (counter = 0; counter < nr_counters; counter++)
463 mmap_read(&mmap_array[i][counter]);
464 }
465
466 if (hits == events)
467 ret = poll(event_array, nr_poll, 100);
468 }
469
addc2785
IM
470
471 fprintf(stderr, "[ perf record: Captured and wrote %ld events. ]\n", events);
472
de9ac07b
PZ
473 return 0;
474}
0e9b20b8 475
0e9b20b8 476static const char * const record_usage[] = {
9e096753
MG
477 "perf record [<options>] [<command>]",
478 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
479 NULL
480};
481
8ad8db37
IM
482static char events_help_msg[EVENTS_HELP_MAX];
483
5242519b 484static const struct option options[] = {
0e9b20b8 485 OPT_CALLBACK('e', "event", NULL, "event",
8ad8db37 486 events_help_msg, parse_events),
0e9b20b8
IM
487 OPT_INTEGER('p', "pid", &target_pid,
488 "record events on existing pid"),
489 OPT_INTEGER('r', "realtime", &realtime_prio,
490 "collect data with this RT SCHED_FIFO priority"),
491 OPT_BOOLEAN('a', "all-cpus", &system_wide,
492 "system-wide collection from all CPUs"),
abaff32a
IM
493 OPT_BOOLEAN('A', "append", &append_file,
494 "append to the output file to do incremental profiling"),
97124d5e
PZ
495 OPT_BOOLEAN('f', "force", &force,
496 "overwrite existing data file"),
e61078a0 497 OPT_LONG('c', "count", &default_interval,
abaff32a
IM
498 "event period to sample"),
499 OPT_STRING('o', "output", &output_name, "file",
500 "output file name"),
501 OPT_BOOLEAN('i', "inherit", &inherit,
502 "child tasks inherit counters"),
503 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
504 "number of mmap data pages"),
0e9b20b8
IM
505 OPT_END()
506};
507
508int cmd_record(int argc, const char **argv, const char *prefix)
509{
510 int counter;
511
8ad8db37 512 create_events_help(events_help_msg);
0e9b20b8
IM
513
514 argc = parse_options(argc, argv, options, record_usage, 0);
ef65b2a0 515 if (!argc && target_pid == -1 && !system_wide)
0e9b20b8
IM
516 usage_with_options(record_usage, options);
517
518 if (!nr_counters) {
519 nr_counters = 1;
520 event_id[0] = 0;
521 }
522
523 for (counter = 0; counter < nr_counters; counter++) {
524 if (event_count[counter])
525 continue;
526
527 event_count[counter] = default_interval;
528 }
529
530 return __cmd_record(argc, argv);
531}