perf top: Fall back to cpu-clock-tick hrtimer sampling if no cycle counter available
[linux-2.6-block.git] / tools / perf / builtin-record.c
CommitLineData
abaff32a 1/*
bf9e1876
IM
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
abaff32a 7 */
16f762a2 8#include "builtin.h"
bf9e1876
IM
9
10#include "perf.h"
11
6eda5838 12#include "util/util.h"
0e9b20b8 13#include "util/parse-options.h"
8ad8db37 14#include "util/parse-events.h"
a0055ae2 15#include "util/string.h"
6eda5838 16
97124d5e 17#include <unistd.h>
de9ac07b 18#include <sched.h>
de9ac07b 19
0e9b20b8
IM
20#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
21#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
1a853e36 22
de9ac07b 23static int fd[MAX_NR_CPUS][MAX_COUNTERS];
a21ca2ca
IM
24
25static long default_interval = 100000;
26
3cf165fc 27static int nr_cpus = 0;
de9ac07b 28static unsigned int page_size;
3cf165fc 29static unsigned int mmap_pages = 128;
cf1f4574 30static int freq = 0;
de9ac07b 31static int output;
23ac9cbe 32static const char *output_name = "perf.data";
de9ac07b 33static int group = 0;
16c8a109
PZ
34static unsigned int realtime_prio = 0;
35static int system_wide = 0;
1a853e36 36static pid_t target_pid = -1;
16c8a109 37static int inherit = 1;
97124d5e 38static int force = 0;
abaff32a 39static int append_file = 0;
de9ac07b 40
a21ca2ca
IM
41static long samples;
42static struct timeval last_read;
43static struct timeval this_read;
44
45static __u64 bytes_written;
46
47static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
48
49static int nr_poll;
50static int nr_cpu;
51
52struct mmap_event {
53 struct perf_event_header header;
54 __u32 pid;
55 __u32 tid;
56 __u64 start;
57 __u64 len;
58 __u64 pgoff;
59 char filename[PATH_MAX];
60};
61
62struct comm_event {
63 struct perf_event_header header;
64 __u32 pid;
65 __u32 tid;
66 char comm[16];
de9ac07b
PZ
67};
68
a21ca2ca 69
de9ac07b 70struct mmap_data {
a21ca2ca
IM
71 int counter;
72 void *base;
73 unsigned int mask;
74 unsigned int prev;
de9ac07b
PZ
75};
76
a21ca2ca
IM
77static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
78
de9ac07b
PZ
79static unsigned int mmap_read_head(struct mmap_data *md)
80{
81 struct perf_counter_mmap_page *pc = md->base;
82 int head;
83
84 head = pc->data_head;
85 rmb();
86
87 return head;
88}
89
de9ac07b
PZ
90static void mmap_read(struct mmap_data *md)
91{
92 unsigned int head = mmap_read_head(md);
93 unsigned int old = md->prev;
94 unsigned char *data = md->base + page_size;
95 unsigned long size;
96 void *buf;
97 int diff;
98
99 gettimeofday(&this_read, NULL);
100
101 /*
102 * If we're further behind than half the buffer, there's a chance
2debbc83 103 * the writer will bite our tail and mess up the samples under us.
de9ac07b
PZ
104 *
105 * If we somehow ended up ahead of the head, we got messed up.
106 *
107 * In either case, truncate and restart at head.
108 */
109 diff = head - old;
110 if (diff > md->mask / 2 || diff < 0) {
111 struct timeval iv;
112 unsigned long msecs;
113
114 timersub(&this_read, &last_read, &iv);
115 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
116
117 fprintf(stderr, "WARNING: failed to keep up with mmap data."
118 " Last read %lu msecs ago.\n", msecs);
119
120 /*
121 * head points to a known good entry, start there.
122 */
123 old = head;
124 }
125
126 last_read = this_read;
127
128 if (old != head)
2debbc83 129 samples++;
de9ac07b
PZ
130
131 size = head - old;
132
133 if ((old & md->mask) + size != (head & md->mask)) {
134 buf = &data[old & md->mask];
135 size = md->mask + 1 - (old & md->mask);
136 old += size;
021e9f47 137
de9ac07b
PZ
138 while (size) {
139 int ret = write(output, buf, size);
021e9f47
IM
140
141 if (ret < 0)
142 die("failed to write");
143
de9ac07b
PZ
144 size -= ret;
145 buf += ret;
021e9f47
IM
146
147 bytes_written += ret;
de9ac07b
PZ
148 }
149 }
150
151 buf = &data[old & md->mask];
152 size = head - old;
153 old += size;
021e9f47 154
de9ac07b
PZ
155 while (size) {
156 int ret = write(output, buf, size);
021e9f47
IM
157
158 if (ret < 0)
159 die("failed to write");
160
de9ac07b
PZ
161 size -= ret;
162 buf += ret;
021e9f47
IM
163
164 bytes_written += ret;
de9ac07b
PZ
165 }
166
167 md->prev = old;
168}
169
170static volatile int done = 0;
171
16c8a109 172static void sig_handler(int sig)
de9ac07b 173{
16c8a109 174 done = 1;
de9ac07b
PZ
175}
176
f70e87d7 177static void pid_synthesize_comm_event(pid_t pid, int full)
1a853e36 178{
16f762a2 179 struct comm_event comm_ev;
1a853e36
ACM
180 char filename[PATH_MAX];
181 char bf[BUFSIZ];
a0055ae2 182 int fd, ret;
1a853e36 183 size_t size;
a0055ae2 184 char *field, *sep;
f70e87d7
PZ
185 DIR *tasks;
186 struct dirent dirent, *next;
1a853e36
ACM
187
188 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
189
190 fd = open(filename, O_RDONLY);
191 if (fd < 0) {
192 fprintf(stderr, "couldn't open %s\n", filename);
193 exit(EXIT_FAILURE);
194 }
195 if (read(fd, bf, sizeof(bf)) < 0) {
196 fprintf(stderr, "couldn't read %s\n", filename);
197 exit(EXIT_FAILURE);
198 }
199 close(fd);
200
a0055ae2 201 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
1a853e36 202 memset(&comm_ev, 0, sizeof(comm_ev));
a0055ae2
ACM
203 field = strchr(bf, '(');
204 if (field == NULL)
205 goto out_failure;
206 sep = strchr(++field, ')');
207 if (sep == NULL)
208 goto out_failure;
209 size = sep - field;
210 memcpy(comm_ev.comm, field, size++);
f70e87d7
PZ
211
212 comm_ev.pid = pid;
1a853e36 213 comm_ev.header.type = PERF_EVENT_COMM;
1a853e36
ACM
214 size = ALIGN(size, sizeof(uint64_t));
215 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
16f762a2 216
f70e87d7
PZ
217 if (!full) {
218 comm_ev.tid = pid;
219
220 ret = write(output, &comm_ev, comm_ev.header.size);
221 if (ret < 0) {
222 perror("failed to write");
223 exit(-1);
224 }
225 return;
226 }
227
228 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
229
230 tasks = opendir(filename);
231 while (!readdir_r(tasks, &dirent, &next) && next) {
232 char *end;
233 pid = strtol(dirent.d_name, &end, 10);
234 if (*end)
235 continue;
236
237 comm_ev.tid = pid;
238
239 ret = write(output, &comm_ev, comm_ev.header.size);
240 if (ret < 0) {
241 perror("failed to write");
242 exit(-1);
243 }
1a853e36 244 }
f70e87d7
PZ
245 closedir(tasks);
246 return;
247
a0055ae2
ACM
248out_failure:
249 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
250 filename);
251 exit(EXIT_FAILURE);
1a853e36
ACM
252}
253
2debbc83 254static void pid_synthesize_mmap_samples(pid_t pid)
1a853e36
ACM
255{
256 char filename[PATH_MAX];
257 FILE *fp;
258
259 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
260
261 fp = fopen(filename, "r");
262 if (fp == NULL) {
263 fprintf(stderr, "couldn't open %s\n", filename);
264 exit(EXIT_FAILURE);
265 }
266 while (1) {
a0055ae2 267 char bf[BUFSIZ], *pbf = bf;
1a853e36
ACM
268 struct mmap_event mmap_ev = {
269 .header.type = PERF_EVENT_MMAP,
270 };
a0055ae2 271 int n;
1a853e36
ACM
272 size_t size;
273 if (fgets(bf, sizeof(bf), fp) == NULL)
274 break;
275
276 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
a0055ae2
ACM
277 n = hex2u64(pbf, &mmap_ev.start);
278 if (n < 0)
279 continue;
280 pbf += n + 1;
281 n = hex2u64(pbf, &mmap_ev.len);
282 if (n < 0)
283 continue;
284 pbf += n + 3;
285 if (*pbf == 'x') { /* vm_exec */
1a853e36
ACM
286 char *execname = strrchr(bf, ' ');
287
288 if (execname == NULL || execname[1] != '/')
289 continue;
290
291 execname += 1;
292 size = strlen(execname);
293 execname[size - 1] = '\0'; /* Remove \n */
294 memcpy(mmap_ev.filename, execname, size);
295 size = ALIGN(size, sizeof(uint64_t));
296 mmap_ev.len -= mmap_ev.start;
297 mmap_ev.header.size = (sizeof(mmap_ev) -
298 (sizeof(mmap_ev.filename) - size));
f70e87d7 299 mmap_ev.pid = pid;
1a853e36
ACM
300 mmap_ev.tid = pid;
301
302 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
303 perror("failed to write");
304 exit(-1);
305 }
306 }
307 }
308
309 fclose(fp);
310}
311
2debbc83 312static void synthesize_samples(void)
f70e87d7
PZ
313{
314 DIR *proc;
315 struct dirent dirent, *next;
316
317 proc = opendir("/proc");
318
319 while (!readdir_r(proc, &dirent, &next) && next) {
320 char *end;
321 pid_t pid;
322
323 pid = strtol(dirent.d_name, &end, 10);
324 if (*end) /* only interested in proper numerical dirents */
325 continue;
326
327 pid_synthesize_comm_event(pid, 1);
2debbc83 328 pid_synthesize_mmap_samples(pid);
f70e87d7
PZ
329 }
330
331 closedir(proc);
332}
333
f250c030
IM
334static int group_fd;
335
336static void create_counter(int counter, int cpu, pid_t pid)
de9ac07b 337{
a21ca2ca 338 struct perf_counter_attr *attr = attrs + counter;
16c8a109 339 int track = 1;
16c8a109 340
a21ca2ca 341 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
1dba15e7 342 if (freq) {
a21ca2ca
IM
343 attr->freq = 1;
344 attr->sample_freq = freq;
1dba15e7 345 }
a21ca2ca
IM
346 attr->mmap = track;
347 attr->comm = track;
348 attr->inherit = (cpu < 0) && inherit;
16c8a109 349
f250c030 350 track = 0; /* only the first counter needs these */
16c8a109 351
a21ca2ca 352 fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
16c8a109 353
f250c030
IM
354 if (fd[nr_cpu][counter] < 0) {
355 int err = errno;
16c8a109 356
f250c030
IM
357 error("syscall returned with %d (%s)\n",
358 fd[nr_cpu][counter], strerror(err));
359 if (err == EPERM)
360 printf("Are you root?\n");
361 exit(-1);
362 }
363 assert(fd[nr_cpu][counter] >= 0);
364 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
16c8a109 365
f250c030
IM
366 /*
367 * First counter acts as the group leader:
368 */
369 if (group && group_fd == -1)
370 group_fd = fd[nr_cpu][counter];
371
372 event_array[nr_poll].fd = fd[nr_cpu][counter];
373 event_array[nr_poll].events = POLLIN;
374 nr_poll++;
375
376 mmap_array[nr_cpu][counter].counter = counter;
377 mmap_array[nr_cpu][counter].prev = 0;
378 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
379 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
380 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
381 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
382 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
383 exit(-1);
384 }
385}
f2521b6e 386
f250c030
IM
387static void open_counters(int cpu, pid_t pid)
388{
389 int counter;
16c8a109 390
f250c030
IM
391 if (pid > 0) {
392 pid_synthesize_comm_event(pid, 0);
2debbc83 393 pid_synthesize_mmap_samples(pid);
16c8a109 394 }
f250c030
IM
395
396 group_fd = -1;
397 for (counter = 0; counter < nr_counters; counter++)
398 create_counter(counter, cpu, pid);
399
16c8a109
PZ
400 nr_cpu++;
401}
402
0e9b20b8 403static int __cmd_record(int argc, const char **argv)
16c8a109
PZ
404{
405 int i, counter;
abaff32a 406 struct stat st;
de9ac07b 407 pid_t pid;
abaff32a 408 int flags;
de9ac07b
PZ
409 int ret;
410
411 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b
PZ
412 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
413 assert(nr_cpus <= MAX_NR_CPUS);
414 assert(nr_cpus >= 0);
415
abaff32a
IM
416 if (!stat(output_name, &st) && !force && !append_file) {
417 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
97124d5e
PZ
418 output_name);
419 exit(-1);
420 }
421
abaff32a
IM
422 flags = O_CREAT|O_RDWR;
423 if (append_file)
424 flags |= O_APPEND;
425 else
426 flags |= O_TRUNC;
427
428 output = open(output_name, flags, S_IRUSR|S_IWUSR);
de9ac07b
PZ
429 if (output < 0) {
430 perror("failed to create output file");
431 exit(-1);
432 }
433
1a853e36 434 if (!system_wide) {
df97992c 435 open_counters(-1, target_pid != -1 ? target_pid : getpid());
1a853e36
ACM
436 } else for (i = 0; i < nr_cpus; i++)
437 open_counters(i, target_pid);
de9ac07b 438
16c8a109
PZ
439 signal(SIGCHLD, sig_handler);
440 signal(SIGINT, sig_handler);
de9ac07b 441
ef65b2a0 442 if (target_pid == -1 && argc) {
1a853e36
ACM
443 pid = fork();
444 if (pid < 0)
445 perror("failed to fork");
de9ac07b 446
1a853e36 447 if (!pid) {
0e9b20b8 448 if (execvp(argv[0], (char **)argv)) {
1a853e36
ACM
449 perror(argv[0]);
450 exit(-1);
451 }
de9ac07b
PZ
452 }
453 }
454
455 if (realtime_prio) {
456 struct sched_param param;
457
458 param.sched_priority = realtime_prio;
459 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
460 printf("Could not set realtime priority.\n");
461 exit(-1);
462 }
463 }
464
f70e87d7 465 if (system_wide)
2debbc83 466 synthesize_samples();
de9ac07b
PZ
467
468 while (!done) {
2debbc83 469 int hits = samples;
de9ac07b 470
16c8a109 471 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
472 for (counter = 0; counter < nr_counters; counter++)
473 mmap_read(&mmap_array[i][counter]);
474 }
475
2debbc83 476 if (hits == samples)
de9ac07b
PZ
477 ret = poll(event_array, nr_poll, 100);
478 }
479
021e9f47
IM
480 /*
481 * Approximate RIP event size: 24 bytes.
482 */
483 fprintf(stderr,
2debbc83 484 "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n",
021e9f47
IM
485 (double)bytes_written / 1024.0 / 1024.0,
486 output_name,
487 bytes_written / 24);
addc2785 488
de9ac07b
PZ
489 return 0;
490}
0e9b20b8 491
0e9b20b8 492static const char * const record_usage[] = {
9e096753
MG
493 "perf record [<options>] [<command>]",
494 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
495 NULL
496};
497
5242519b 498static const struct option options[] = {
0e9b20b8 499 OPT_CALLBACK('e', "event", NULL, "event",
86847b62
TG
500 "event selector. use 'perf list' to list available events",
501 parse_events),
0e9b20b8
IM
502 OPT_INTEGER('p', "pid", &target_pid,
503 "record events on existing pid"),
504 OPT_INTEGER('r', "realtime", &realtime_prio,
505 "collect data with this RT SCHED_FIFO priority"),
506 OPT_BOOLEAN('a', "all-cpus", &system_wide,
507 "system-wide collection from all CPUs"),
abaff32a
IM
508 OPT_BOOLEAN('A', "append", &append_file,
509 "append to the output file to do incremental profiling"),
97124d5e
PZ
510 OPT_BOOLEAN('f', "force", &force,
511 "overwrite existing data file"),
e61078a0 512 OPT_LONG('c', "count", &default_interval,
abaff32a
IM
513 "event period to sample"),
514 OPT_STRING('o', "output", &output_name, "file",
515 "output file name"),
516 OPT_BOOLEAN('i', "inherit", &inherit,
517 "child tasks inherit counters"),
cf1f4574
IM
518 OPT_INTEGER('F', "freq", &freq,
519 "profile at this frequency"),
abaff32a
IM
520 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
521 "number of mmap data pages"),
0e9b20b8
IM
522 OPT_END()
523};
524
525int cmd_record(int argc, const char **argv, const char *prefix)
526{
527 int counter;
528
0e9b20b8 529 argc = parse_options(argc, argv, options, record_usage, 0);
ef65b2a0 530 if (!argc && target_pid == -1 && !system_wide)
0e9b20b8
IM
531 usage_with_options(record_usage, options);
532
a21ca2ca 533 if (!nr_counters)
0e9b20b8 534 nr_counters = 1;
0e9b20b8
IM
535
536 for (counter = 0; counter < nr_counters; counter++) {
a21ca2ca 537 if (attrs[counter].sample_period)
0e9b20b8
IM
538 continue;
539
a21ca2ca 540 attrs[counter].sample_period = default_interval;
0e9b20b8
IM
541 }
542
543 return __cmd_record(argc, argv);
544}