perf report: Show the IP only in --verbose mode
[linux-2.6-block.git] / Documentation / perf_counter / builtin-record.c
CommitLineData
de9ac07b
PZ
1
2
1a482f38 3#include "perf.h"
6eda5838 4#include "util/util.h"
0e9b20b8 5#include "util/parse-options.h"
8ad8db37 6#include "util/parse-events.h"
6eda5838 7
de9ac07b 8#include <sched.h>
de9ac07b 9
0e9b20b8
IM
10#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
11#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
1a853e36 12
de9ac07b
PZ
13static int default_interval = 100000;
14static int event_count[MAX_COUNTERS];
8ad8db37 15
de9ac07b
PZ
16static int fd[MAX_NR_CPUS][MAX_COUNTERS];
17static int nr_cpus = 0;
18static unsigned int page_size;
19static unsigned int mmap_pages = 16;
20static int output;
0e9b20b8 21static const char *output_name = "output.perf";
de9ac07b 22static int group = 0;
16c8a109
PZ
23static unsigned int realtime_prio = 0;
24static int system_wide = 0;
1a853e36 25static pid_t target_pid = -1;
16c8a109
PZ
26static int inherit = 1;
27static int nmi = 1;
de9ac07b
PZ
28
29const unsigned int default_count[] = {
30 1000000,
31 1000000,
32 10000,
33 10000,
34 1000000,
35 10000,
36};
37
de9ac07b
PZ
38struct mmap_data {
39 int counter;
40 void *base;
41 unsigned int mask;
42 unsigned int prev;
43};
44
45static unsigned int mmap_read_head(struct mmap_data *md)
46{
47 struct perf_counter_mmap_page *pc = md->base;
48 int head;
49
50 head = pc->data_head;
51 rmb();
52
53 return head;
54}
55
56static long events;
57static struct timeval last_read, this_read;
58
59static void mmap_read(struct mmap_data *md)
60{
61 unsigned int head = mmap_read_head(md);
62 unsigned int old = md->prev;
63 unsigned char *data = md->base + page_size;
64 unsigned long size;
65 void *buf;
66 int diff;
67
68 gettimeofday(&this_read, NULL);
69
70 /*
71 * If we're further behind than half the buffer, there's a chance
72 * the writer will bite our tail and screw up the events under us.
73 *
74 * If we somehow ended up ahead of the head, we got messed up.
75 *
76 * In either case, truncate and restart at head.
77 */
78 diff = head - old;
79 if (diff > md->mask / 2 || diff < 0) {
80 struct timeval iv;
81 unsigned long msecs;
82
83 timersub(&this_read, &last_read, &iv);
84 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
85
86 fprintf(stderr, "WARNING: failed to keep up with mmap data."
87 " Last read %lu msecs ago.\n", msecs);
88
89 /*
90 * head points to a known good entry, start there.
91 */
92 old = head;
93 }
94
95 last_read = this_read;
96
97 if (old != head)
98 events++;
99
100 size = head - old;
101
102 if ((old & md->mask) + size != (head & md->mask)) {
103 buf = &data[old & md->mask];
104 size = md->mask + 1 - (old & md->mask);
105 old += size;
106 while (size) {
107 int ret = write(output, buf, size);
108 if (ret < 0) {
109 perror("failed to write");
110 exit(-1);
111 }
112 size -= ret;
113 buf += ret;
114 }
115 }
116
117 buf = &data[old & md->mask];
118 size = head - old;
119 old += size;
120 while (size) {
121 int ret = write(output, buf, size);
122 if (ret < 0) {
123 perror("failed to write");
124 exit(-1);
125 }
126 size -= ret;
127 buf += ret;
128 }
129
130 md->prev = old;
131}
132
133static volatile int done = 0;
134
16c8a109 135static void sig_handler(int sig)
de9ac07b 136{
16c8a109 137 done = 1;
de9ac07b
PZ
138}
139
16c8a109
PZ
140static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
141static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
142
143static int nr_poll;
144static int nr_cpu;
145
1a853e36
ACM
146struct mmap_event {
147 struct perf_event_header header;
148 __u32 pid, tid;
149 __u64 start;
150 __u64 len;
151 __u64 pgoff;
152 char filename[PATH_MAX];
153};
154struct comm_event {
155 struct perf_event_header header;
156 __u32 pid,tid;
157 char comm[16];
158};
159
160static pid_t pid_synthesize_comm_event(pid_t pid)
161{
162 char filename[PATH_MAX];
163 char bf[BUFSIZ];
164 struct comm_event comm_ev;
165 size_t size;
166 int fd;
167
168 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
169
170 fd = open(filename, O_RDONLY);
171 if (fd < 0) {
172 fprintf(stderr, "couldn't open %s\n", filename);
173 exit(EXIT_FAILURE);
174 }
175 if (read(fd, bf, sizeof(bf)) < 0) {
176 fprintf(stderr, "couldn't read %s\n", filename);
177 exit(EXIT_FAILURE);
178 }
179 close(fd);
180
181 pid_t spid, ppid;
182 char state;
183 char comm[18];
184
185 memset(&comm_ev, 0, sizeof(comm_ev));
186 int nr = sscanf(bf, "%d %s %c %d %d ",
187 &spid, comm, &state, &ppid, &comm_ev.pid);
188 if (nr != 5) {
189 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
190 filename);
191 exit(EXIT_FAILURE);
192 }
193 comm_ev.header.type = PERF_EVENT_COMM;
194 comm_ev.tid = pid;
195 size = strlen(comm);
196 comm[--size] = '\0'; /* Remove the ')' at the end */
197 --size; /* Remove the '(' at the begin */
198 memcpy(comm_ev.comm, comm + 1, size);
199 size = ALIGN(size, sizeof(uint64_t));
200 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
201 int ret = write(output, &comm_ev, comm_ev.header.size);
202 if (ret < 0) {
203 perror("failed to write");
204 exit(-1);
205 }
206 return comm_ev.pid;
207}
208
209static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
210{
211 char filename[PATH_MAX];
212 FILE *fp;
213
214 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
215
216 fp = fopen(filename, "r");
217 if (fp == NULL) {
218 fprintf(stderr, "couldn't open %s\n", filename);
219 exit(EXIT_FAILURE);
220 }
221 while (1) {
222 char bf[BUFSIZ];
223 unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
224 struct mmap_event mmap_ev = {
225 .header.type = PERF_EVENT_MMAP,
226 };
227 unsigned long ino;
228 int major, minor;
229 size_t size;
230 if (fgets(bf, sizeof(bf), fp) == NULL)
231 break;
232
233 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
234 sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
235 &mmap_ev.start, &mmap_ev.len,
236 &vm_read, &vm_write, &vm_exec, &vm_mayshare,
237 &mmap_ev.pgoff, &major, &minor, &ino);
238 if (vm_exec == 'x') {
239 char *execname = strrchr(bf, ' ');
240
241 if (execname == NULL || execname[1] != '/')
242 continue;
243
244 execname += 1;
245 size = strlen(execname);
246 execname[size - 1] = '\0'; /* Remove \n */
247 memcpy(mmap_ev.filename, execname, size);
248 size = ALIGN(size, sizeof(uint64_t));
249 mmap_ev.len -= mmap_ev.start;
250 mmap_ev.header.size = (sizeof(mmap_ev) -
251 (sizeof(mmap_ev.filename) - size));
252 mmap_ev.pid = pgid;
253 mmap_ev.tid = pid;
254
255 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
256 perror("failed to write");
257 exit(-1);
258 }
259 }
260 }
261
262 fclose(fp);
263}
264
265static void open_counters(int cpu, pid_t pid)
de9ac07b 266{
de9ac07b 267 struct perf_counter_hw_event hw_event;
16c8a109
PZ
268 int counter, group_fd;
269 int track = 1;
16c8a109 270
1a853e36
ACM
271 if (pid > 0) {
272 pid_t pgid = pid_synthesize_comm_event(pid);
273 pid_synthesize_mmap_events(pid, pgid);
274 }
16c8a109
PZ
275
276 group_fd = -1;
277 for (counter = 0; counter < nr_counters; counter++) {
278
279 memset(&hw_event, 0, sizeof(hw_event));
280 hw_event.config = event_id[counter];
281 hw_event.irq_period = event_count[counter];
282 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
283 hw_event.nmi = nmi;
284 hw_event.mmap = track;
285 hw_event.comm = track;
286 hw_event.inherit = (cpu < 0) && inherit;
287
288 track = 0; // only the first counter needs these
289
290 fd[nr_cpu][counter] =
291 sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
292
293 if (fd[nr_cpu][counter] < 0) {
294 int err = errno;
295 printf("kerneltop error: syscall returned with %d (%s)\n",
296 fd[nr_cpu][counter], strerror(err));
297 if (err == EPERM)
298 printf("Are you root?\n");
299 exit(-1);
300 }
301 assert(fd[nr_cpu][counter] >= 0);
302 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
303
304 /*
305 * First counter acts as the group leader:
306 */
307 if (group && group_fd == -1)
308 group_fd = fd[nr_cpu][counter];
309
310 event_array[nr_poll].fd = fd[nr_cpu][counter];
311 event_array[nr_poll].events = POLLIN;
312 nr_poll++;
313
314 mmap_array[nr_cpu][counter].counter = counter;
315 mmap_array[nr_cpu][counter].prev = 0;
316 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
317 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
318 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
319 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
320 printf("kerneltop error: failed to mmap with %d (%s)\n",
321 errno, strerror(errno));
322 exit(-1);
323 }
324 }
325 nr_cpu++;
326}
327
0e9b20b8 328static int __cmd_record(int argc, const char **argv)
16c8a109
PZ
329{
330 int i, counter;
de9ac07b
PZ
331 pid_t pid;
332 int ret;
333
334 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b
PZ
335 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
336 assert(nr_cpus <= MAX_NR_CPUS);
337 assert(nr_cpus >= 0);
338
339 output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
340 if (output < 0) {
341 perror("failed to create output file");
342 exit(-1);
343 }
344
1a853e36
ACM
345 if (!system_wide) {
346 open_counters(-1, target_pid != -1 ? target_pid : 0);
347 } else for (i = 0; i < nr_cpus; i++)
348 open_counters(i, target_pid);
de9ac07b 349
16c8a109
PZ
350 signal(SIGCHLD, sig_handler);
351 signal(SIGINT, sig_handler);
de9ac07b 352
1a853e36
ACM
353 if (target_pid == -1) {
354 pid = fork();
355 if (pid < 0)
356 perror("failed to fork");
de9ac07b 357
1a853e36 358 if (!pid) {
0e9b20b8 359 if (execvp(argv[0], (char **)argv)) {
1a853e36
ACM
360 perror(argv[0]);
361 exit(-1);
362 }
de9ac07b
PZ
363 }
364 }
365
366 if (realtime_prio) {
367 struct sched_param param;
368
369 param.sched_priority = realtime_prio;
370 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
371 printf("Could not set realtime priority.\n");
372 exit(-1);
373 }
374 }
375
376 /*
377 * TODO: store the current /proc/$/maps information somewhere
378 */
379
380 while (!done) {
381 int hits = events;
382
16c8a109 383 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
384 for (counter = 0; counter < nr_counters; counter++)
385 mmap_read(&mmap_array[i][counter]);
386 }
387
388 if (hits == events)
389 ret = poll(event_array, nr_poll, 100);
390 }
391
392 return 0;
393}
0e9b20b8 394
0e9b20b8
IM
395static const char * const record_usage[] = {
396 "perf record [<options>] <command>",
397 NULL
398};
399
8ad8db37
IM
400static char events_help_msg[EVENTS_HELP_MAX];
401
5242519b 402static const struct option options[] = {
0e9b20b8 403 OPT_CALLBACK('e', "event", NULL, "event",
8ad8db37 404 events_help_msg, parse_events),
0e9b20b8
IM
405 OPT_INTEGER('c', "count", &default_interval,
406 "event period to sample"),
407 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
408 "number of mmap data pages"),
409 OPT_STRING('o', "output", &output_name, "file",
410 "output file name"),
411 OPT_BOOLEAN('i', "inherit", &inherit,
412 "child tasks inherit counters"),
413 OPT_INTEGER('p', "pid", &target_pid,
414 "record events on existing pid"),
415 OPT_INTEGER('r', "realtime", &realtime_prio,
416 "collect data with this RT SCHED_FIFO priority"),
417 OPT_BOOLEAN('a', "all-cpus", &system_wide,
418 "system-wide collection from all CPUs"),
419 OPT_END()
420};
421
422int cmd_record(int argc, const char **argv, const char *prefix)
423{
424 int counter;
425
8ad8db37 426 create_events_help(events_help_msg);
0e9b20b8
IM
427
428 argc = parse_options(argc, argv, options, record_usage, 0);
429 if (!argc)
430 usage_with_options(record_usage, options);
431
432 if (!nr_counters) {
433 nr_counters = 1;
434 event_id[0] = 0;
435 }
436
437 for (counter = 0; counter < nr_counters; counter++) {
438 if (event_count[counter])
439 continue;
440
441 event_count[counter] = default_interval;
442 }
443
444 return __cmd_record(argc, argv);
445}