perf_counter tools: Introduce stricter C code checking
[linux-2.6-block.git] / Documentation / perf_counter / builtin-record.c
CommitLineData
de9ac07b
PZ
1
2
1a482f38 3#include "perf.h"
16f762a2 4#include "builtin.h"
6eda5838 5#include "util/util.h"
0e9b20b8 6#include "util/parse-options.h"
8ad8db37 7#include "util/parse-events.h"
6eda5838 8
de9ac07b 9#include <sched.h>
de9ac07b 10
0e9b20b8
IM
11#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
12#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
1a853e36 13
de9ac07b
PZ
14static int default_interval = 100000;
15static int event_count[MAX_COUNTERS];
8ad8db37 16
de9ac07b
PZ
17static int fd[MAX_NR_CPUS][MAX_COUNTERS];
18static int nr_cpus = 0;
19static unsigned int page_size;
20static unsigned int mmap_pages = 16;
21static int output;
0e9b20b8 22static const char *output_name = "output.perf";
de9ac07b 23static int group = 0;
16c8a109
PZ
24static unsigned int realtime_prio = 0;
25static int system_wide = 0;
1a853e36 26static pid_t target_pid = -1;
16c8a109
PZ
27static int inherit = 1;
28static int nmi = 1;
de9ac07b
PZ
29
30const unsigned int default_count[] = {
31 1000000,
32 1000000,
33 10000,
34 10000,
35 1000000,
36 10000,
37};
38
de9ac07b
PZ
39struct mmap_data {
40 int counter;
41 void *base;
42 unsigned int mask;
43 unsigned int prev;
44};
45
46static unsigned int mmap_read_head(struct mmap_data *md)
47{
48 struct perf_counter_mmap_page *pc = md->base;
49 int head;
50
51 head = pc->data_head;
52 rmb();
53
54 return head;
55}
56
57static long events;
58static struct timeval last_read, this_read;
59
60static void mmap_read(struct mmap_data *md)
61{
62 unsigned int head = mmap_read_head(md);
63 unsigned int old = md->prev;
64 unsigned char *data = md->base + page_size;
65 unsigned long size;
66 void *buf;
67 int diff;
68
69 gettimeofday(&this_read, NULL);
70
71 /*
72 * If we're further behind than half the buffer, there's a chance
73 * the writer will bite our tail and screw up the events under us.
74 *
75 * If we somehow ended up ahead of the head, we got messed up.
76 *
77 * In either case, truncate and restart at head.
78 */
79 diff = head - old;
80 if (diff > md->mask / 2 || diff < 0) {
81 struct timeval iv;
82 unsigned long msecs;
83
84 timersub(&this_read, &last_read, &iv);
85 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
86
87 fprintf(stderr, "WARNING: failed to keep up with mmap data."
88 " Last read %lu msecs ago.\n", msecs);
89
90 /*
91 * head points to a known good entry, start there.
92 */
93 old = head;
94 }
95
96 last_read = this_read;
97
98 if (old != head)
99 events++;
100
101 size = head - old;
102
103 if ((old & md->mask) + size != (head & md->mask)) {
104 buf = &data[old & md->mask];
105 size = md->mask + 1 - (old & md->mask);
106 old += size;
107 while (size) {
108 int ret = write(output, buf, size);
109 if (ret < 0) {
110 perror("failed to write");
111 exit(-1);
112 }
113 size -= ret;
114 buf += ret;
115 }
116 }
117
118 buf = &data[old & md->mask];
119 size = head - old;
120 old += size;
121 while (size) {
122 int ret = write(output, buf, size);
123 if (ret < 0) {
124 perror("failed to write");
125 exit(-1);
126 }
127 size -= ret;
128 buf += ret;
129 }
130
131 md->prev = old;
132}
133
134static volatile int done = 0;
135
16c8a109 136static void sig_handler(int sig)
de9ac07b 137{
16c8a109 138 done = 1;
de9ac07b
PZ
139}
140
16c8a109
PZ
141static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
142static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
143
144static int nr_poll;
145static int nr_cpu;
146
1a853e36 147struct mmap_event {
16f762a2
IM
148 struct perf_event_header header;
149 __u32 pid;
150 __u32 tid;
151 __u64 start;
152 __u64 len;
153 __u64 pgoff;
154 char filename[PATH_MAX];
1a853e36 155};
16f762a2 156
1a853e36 157struct comm_event {
16f762a2
IM
158 struct perf_event_header header;
159 __u32 pid;
160 __u32 tid;
161 char comm[16];
1a853e36
ACM
162};
163
164static pid_t pid_synthesize_comm_event(pid_t pid)
165{
16f762a2 166 struct comm_event comm_ev;
1a853e36 167 char filename[PATH_MAX];
16f762a2 168 pid_t spid, ppid;
1a853e36 169 char bf[BUFSIZ];
16f762a2
IM
170 int fd, nr, ret;
171 char comm[18];
1a853e36 172 size_t size;
16f762a2 173 char state;
1a853e36
ACM
174
175 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
176
177 fd = open(filename, O_RDONLY);
178 if (fd < 0) {
179 fprintf(stderr, "couldn't open %s\n", filename);
180 exit(EXIT_FAILURE);
181 }
182 if (read(fd, bf, sizeof(bf)) < 0) {
183 fprintf(stderr, "couldn't read %s\n", filename);
184 exit(EXIT_FAILURE);
185 }
186 close(fd);
187
1a853e36 188 memset(&comm_ev, 0, sizeof(comm_ev));
16f762a2 189 nr = sscanf(bf, "%d %s %c %d %d ",
1a853e36
ACM
190 &spid, comm, &state, &ppid, &comm_ev.pid);
191 if (nr != 5) {
192 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
193 filename);
194 exit(EXIT_FAILURE);
195 }
196 comm_ev.header.type = PERF_EVENT_COMM;
197 comm_ev.tid = pid;
198 size = strlen(comm);
199 comm[--size] = '\0'; /* Remove the ')' at the end */
200 --size; /* Remove the '(' at the begin */
201 memcpy(comm_ev.comm, comm + 1, size);
202 size = ALIGN(size, sizeof(uint64_t));
203 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
16f762a2
IM
204
205 ret = write(output, &comm_ev, comm_ev.header.size);
1a853e36
ACM
206 if (ret < 0) {
207 perror("failed to write");
208 exit(-1);
209 }
210 return comm_ev.pid;
211}
212
213static void pid_synthesize_mmap_events(pid_t pid, pid_t pgid)
214{
215 char filename[PATH_MAX];
216 FILE *fp;
217
218 snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
219
220 fp = fopen(filename, "r");
221 if (fp == NULL) {
222 fprintf(stderr, "couldn't open %s\n", filename);
223 exit(EXIT_FAILURE);
224 }
225 while (1) {
226 char bf[BUFSIZ];
227 unsigned char vm_read, vm_write, vm_exec, vm_mayshare;
228 struct mmap_event mmap_ev = {
229 .header.type = PERF_EVENT_MMAP,
230 };
231 unsigned long ino;
232 int major, minor;
233 size_t size;
234 if (fgets(bf, sizeof(bf), fp) == NULL)
235 break;
236
237 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
238 sscanf(bf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
239 &mmap_ev.start, &mmap_ev.len,
240 &vm_read, &vm_write, &vm_exec, &vm_mayshare,
241 &mmap_ev.pgoff, &major, &minor, &ino);
242 if (vm_exec == 'x') {
243 char *execname = strrchr(bf, ' ');
244
245 if (execname == NULL || execname[1] != '/')
246 continue;
247
248 execname += 1;
249 size = strlen(execname);
250 execname[size - 1] = '\0'; /* Remove \n */
251 memcpy(mmap_ev.filename, execname, size);
252 size = ALIGN(size, sizeof(uint64_t));
253 mmap_ev.len -= mmap_ev.start;
254 mmap_ev.header.size = (sizeof(mmap_ev) -
255 (sizeof(mmap_ev.filename) - size));
256 mmap_ev.pid = pgid;
257 mmap_ev.tid = pid;
258
259 if (write(output, &mmap_ev, mmap_ev.header.size) < 0) {
260 perror("failed to write");
261 exit(-1);
262 }
263 }
264 }
265
266 fclose(fp);
267}
268
269static void open_counters(int cpu, pid_t pid)
de9ac07b 270{
de9ac07b 271 struct perf_counter_hw_event hw_event;
16c8a109
PZ
272 int counter, group_fd;
273 int track = 1;
16c8a109 274
1a853e36
ACM
275 if (pid > 0) {
276 pid_t pgid = pid_synthesize_comm_event(pid);
277 pid_synthesize_mmap_events(pid, pgid);
278 }
16c8a109
PZ
279
280 group_fd = -1;
281 for (counter = 0; counter < nr_counters; counter++) {
282
283 memset(&hw_event, 0, sizeof(hw_event));
284 hw_event.config = event_id[counter];
285 hw_event.irq_period = event_count[counter];
286 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
287 hw_event.nmi = nmi;
288 hw_event.mmap = track;
289 hw_event.comm = track;
290 hw_event.inherit = (cpu < 0) && inherit;
291
292 track = 0; // only the first counter needs these
293
294 fd[nr_cpu][counter] =
295 sys_perf_counter_open(&hw_event, pid, cpu, group_fd, 0);
296
297 if (fd[nr_cpu][counter] < 0) {
298 int err = errno;
299 printf("kerneltop error: syscall returned with %d (%s)\n",
300 fd[nr_cpu][counter], strerror(err));
301 if (err == EPERM)
302 printf("Are you root?\n");
303 exit(-1);
304 }
305 assert(fd[nr_cpu][counter] >= 0);
306 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
307
308 /*
309 * First counter acts as the group leader:
310 */
311 if (group && group_fd == -1)
312 group_fd = fd[nr_cpu][counter];
313
314 event_array[nr_poll].fd = fd[nr_cpu][counter];
315 event_array[nr_poll].events = POLLIN;
316 nr_poll++;
317
318 mmap_array[nr_cpu][counter].counter = counter;
319 mmap_array[nr_cpu][counter].prev = 0;
320 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
321 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
322 PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0);
323 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
324 printf("kerneltop error: failed to mmap with %d (%s)\n",
325 errno, strerror(errno));
326 exit(-1);
327 }
328 }
329 nr_cpu++;
330}
331
0e9b20b8 332static int __cmd_record(int argc, const char **argv)
16c8a109
PZ
333{
334 int i, counter;
de9ac07b
PZ
335 pid_t pid;
336 int ret;
337
338 page_size = sysconf(_SC_PAGE_SIZE);
de9ac07b
PZ
339 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
340 assert(nr_cpus <= MAX_NR_CPUS);
341 assert(nr_cpus >= 0);
342
343 output = open(output_name, O_CREAT|O_RDWR, S_IRWXU);
344 if (output < 0) {
345 perror("failed to create output file");
346 exit(-1);
347 }
348
1a853e36
ACM
349 if (!system_wide) {
350 open_counters(-1, target_pid != -1 ? target_pid : 0);
351 } else for (i = 0; i < nr_cpus; i++)
352 open_counters(i, target_pid);
de9ac07b 353
16c8a109
PZ
354 signal(SIGCHLD, sig_handler);
355 signal(SIGINT, sig_handler);
de9ac07b 356
1a853e36
ACM
357 if (target_pid == -1) {
358 pid = fork();
359 if (pid < 0)
360 perror("failed to fork");
de9ac07b 361
1a853e36 362 if (!pid) {
0e9b20b8 363 if (execvp(argv[0], (char **)argv)) {
1a853e36
ACM
364 perror(argv[0]);
365 exit(-1);
366 }
de9ac07b
PZ
367 }
368 }
369
370 if (realtime_prio) {
371 struct sched_param param;
372
373 param.sched_priority = realtime_prio;
374 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
375 printf("Could not set realtime priority.\n");
376 exit(-1);
377 }
378 }
379
380 /*
381 * TODO: store the current /proc/$/maps information somewhere
382 */
383
384 while (!done) {
385 int hits = events;
386
16c8a109 387 for (i = 0; i < nr_cpu; i++) {
de9ac07b
PZ
388 for (counter = 0; counter < nr_counters; counter++)
389 mmap_read(&mmap_array[i][counter]);
390 }
391
392 if (hits == events)
393 ret = poll(event_array, nr_poll, 100);
394 }
395
396 return 0;
397}
0e9b20b8 398
0e9b20b8
IM
399static const char * const record_usage[] = {
400 "perf record [<options>] <command>",
401 NULL
402};
403
8ad8db37
IM
404static char events_help_msg[EVENTS_HELP_MAX];
405
5242519b 406static const struct option options[] = {
0e9b20b8 407 OPT_CALLBACK('e', "event", NULL, "event",
8ad8db37 408 events_help_msg, parse_events),
0e9b20b8
IM
409 OPT_INTEGER('c', "count", &default_interval,
410 "event period to sample"),
411 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
412 "number of mmap data pages"),
413 OPT_STRING('o', "output", &output_name, "file",
414 "output file name"),
415 OPT_BOOLEAN('i', "inherit", &inherit,
416 "child tasks inherit counters"),
417 OPT_INTEGER('p', "pid", &target_pid,
418 "record events on existing pid"),
419 OPT_INTEGER('r', "realtime", &realtime_prio,
420 "collect data with this RT SCHED_FIFO priority"),
421 OPT_BOOLEAN('a', "all-cpus", &system_wide,
422 "system-wide collection from all CPUs"),
423 OPT_END()
424};
425
426int cmd_record(int argc, const char **argv, const char *prefix)
427{
428 int counter;
429
8ad8db37 430 create_events_help(events_help_msg);
0e9b20b8
IM
431
432 argc = parse_options(argc, argv, options, record_usage, 0);
433 if (!argc)
434 usage_with_options(record_usage, options);
435
436 if (!nr_counters) {
437 nr_counters = 1;
438 event_id[0] = 0;
439 }
440
441 for (counter = 0; counter < nr_counters; counter++) {
442 if (event_count[counter])
443 continue;
444
445 event_count[counter] = default_interval;
446 }
447
448 return __cmd_record(argc, argv);
449}