Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / tools / perf / builtin-lock.c
1 #include "builtin.h"
2 #include "perf.h"
3
4 #include "util/util.h"
5 #include "util/cache.h"
6 #include "util/symbol.h"
7 #include "util/thread.h"
8 #include "util/header.h"
9
10 #include "util/parse-options.h"
11 #include "util/trace-event.h"
12
13 #include "util/debug.h"
14 #include "util/session.h"
15
16 #include <sys/types.h>
17 #include <sys/prctl.h>
18 #include <semaphore.h>
19 #include <pthread.h>
20 #include <math.h>
21 #include <limits.h>
22
23 #include <linux/list.h>
24 #include <linux/hash.h>
25
26 /* based on kernel/lockdep.c */
27 #define LOCKHASH_BITS           12
28 #define LOCKHASH_SIZE           (1UL << LOCKHASH_BITS)
29
30 static struct list_head lockhash_table[LOCKHASH_SIZE];
31
32 #define __lockhashfn(key)       hash_long((unsigned long)key, LOCKHASH_BITS)
33 #define lockhashentry(key)      (lockhash_table + __lockhashfn((key)))
34
35 #define LOCK_STATE_UNLOCKED     0              /* initial state */
36 #define LOCK_STATE_LOCKED       1
37
38 struct lock_stat {
39         struct list_head        hash_entry;
40         struct rb_node          rb;             /* used for sorting */
41
42         /*
43          * FIXME: raw_field_value() returns unsigned long long,
44          * so address of lockdep_map should be dealed as 64bit.
45          * Is there more better solution?
46          */
47         void                    *addr;          /* address of lockdep_map, used as ID */
48         char                    *name;          /* for strcpy(), we cannot use const */
49
50         int                     state;
51         u64                     prev_event_time; /* timestamp of previous event */
52
53         unsigned int            nr_acquired;
54         unsigned int            nr_acquire;
55         unsigned int            nr_contended;
56         unsigned int            nr_release;
57
58         /* these times are in nano sec. */
59         u64                     wait_time_total;
60         u64                     wait_time_min;
61         u64                     wait_time_max;
62 };
63
64 /* build simple key function one is bigger than two */
65 #define SINGLE_KEY(member)                                              \
66         static int lock_stat_key_ ## member(struct lock_stat *one,      \
67                                          struct lock_stat *two)         \
68         {                                                               \
69                 return one->member > two->member;                       \
70         }
71
72 SINGLE_KEY(nr_acquired)
73 SINGLE_KEY(nr_contended)
74 SINGLE_KEY(wait_time_total)
75 SINGLE_KEY(wait_time_min)
76 SINGLE_KEY(wait_time_max)
77
78 struct lock_key {
79         /*
80          * name: the value for specify by user
81          * this should be simpler than raw name of member
82          * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
83          */
84         const char              *name;
85         int                     (*key)(struct lock_stat*, struct lock_stat*);
86 };
87
88 static const char               *sort_key = "acquired";
89
90 static int                      (*compare)(struct lock_stat *, struct lock_stat *);
91
92 static struct rb_root           result; /* place to store sorted data */
93
94 #define DEF_KEY_LOCK(name, fn_suffix)   \
95         { #name, lock_stat_key_ ## fn_suffix }
96 struct lock_key keys[] = {
97         DEF_KEY_LOCK(acquired, nr_acquired),
98         DEF_KEY_LOCK(contended, nr_contended),
99         DEF_KEY_LOCK(wait_total, wait_time_total),
100         DEF_KEY_LOCK(wait_min, wait_time_min),
101         DEF_KEY_LOCK(wait_max, wait_time_max),
102
103         /* extra comparisons much complicated should be here */
104
105         { NULL, NULL }
106 };
107
108 static void select_key(void)
109 {
110         int i;
111
112         for (i = 0; keys[i].name; i++) {
113                 if (!strcmp(keys[i].name, sort_key)) {
114                         compare = keys[i].key;
115                         return;
116                 }
117         }
118
119         die("Unknown compare key:%s\n", sort_key);
120 }
121
122 static void insert_to_result(struct lock_stat *st,
123                              int (*bigger)(struct lock_stat *, struct lock_stat *))
124 {
125         struct rb_node **rb = &result.rb_node;
126         struct rb_node *parent = NULL;
127         struct lock_stat *p;
128
129         while (*rb) {
130                 p = container_of(*rb, struct lock_stat, rb);
131                 parent = *rb;
132
133                 if (bigger(st, p))
134                         rb = &(*rb)->rb_left;
135                 else
136                         rb = &(*rb)->rb_right;
137         }
138
139         rb_link_node(&st->rb, parent, rb);
140         rb_insert_color(&st->rb, &result);
141 }
142
143 /* returns left most element of result, and erase it */
144 static struct lock_stat *pop_from_result(void)
145 {
146         struct rb_node *node = result.rb_node;
147
148         if (!node)
149                 return NULL;
150
151         while (node->rb_left)
152                 node = node->rb_left;
153
154         rb_erase(node, &result);
155         return container_of(node, struct lock_stat, rb);
156 }
157
158 static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
159 {
160         struct list_head *entry = lockhashentry(addr);
161         struct lock_stat *ret, *new;
162
163         list_for_each_entry(ret, entry, hash_entry) {
164                 if (ret->addr == addr)
165                         return ret;
166         }
167
168         new = zalloc(sizeof(struct lock_stat));
169         if (!new)
170                 goto alloc_failed;
171
172         new->addr = addr;
173         new->name = zalloc(sizeof(char) * strlen(name) + 1);
174         if (!new->name)
175                 goto alloc_failed;
176         strcpy(new->name, name);
177
178         /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */
179         new->state = LOCK_STATE_UNLOCKED;
180         new->wait_time_min = ULLONG_MAX;
181
182         list_add(&new->hash_entry, entry);
183         return new;
184
185 alloc_failed:
186         die("memory allocation failed\n");
187 }
188
189 static char                     const *input_name = "perf.data";
190
191 static int                      profile_cpu = -1;
192
193 struct raw_event_sample {
194         u32                     size;
195         char                    data[0];
196 };
197
198 struct trace_acquire_event {
199         void                    *addr;
200         const char              *name;
201 };
202
203 struct trace_acquired_event {
204         void                    *addr;
205         const char              *name;
206 };
207
208 struct trace_contended_event {
209         void                    *addr;
210         const char              *name;
211 };
212
213 struct trace_release_event {
214         void                    *addr;
215         const char              *name;
216 };
217
218 struct trace_lock_handler {
219         void (*acquire_event)(struct trace_acquire_event *,
220                               struct event *,
221                               int cpu,
222                               u64 timestamp,
223                               struct thread *thread);
224
225         void (*acquired_event)(struct trace_acquired_event *,
226                                struct event *,
227                                int cpu,
228                                u64 timestamp,
229                                struct thread *thread);
230
231         void (*contended_event)(struct trace_contended_event *,
232                                 struct event *,
233                                 int cpu,
234                                 u64 timestamp,
235                                 struct thread *thread);
236
237         void (*release_event)(struct trace_release_event *,
238                               struct event *,
239                               int cpu,
240                               u64 timestamp,
241                               struct thread *thread);
242 };
243
244 static void
245 report_lock_acquire_event(struct trace_acquire_event *acquire_event,
246                         struct event *__event __used,
247                         int cpu __used,
248                         u64 timestamp,
249                         struct thread *thread __used)
250 {
251         struct lock_stat *st;
252
253         st = lock_stat_findnew(acquire_event->addr, acquire_event->name);
254
255         switch (st->state) {
256         case LOCK_STATE_UNLOCKED:
257                 break;
258         case LOCK_STATE_LOCKED:
259                 break;
260         default:
261                 BUG_ON(1);
262                 break;
263         }
264
265         st->prev_event_time = timestamp;
266 }
267
268 static void
269 report_lock_acquired_event(struct trace_acquired_event *acquired_event,
270                          struct event *__event __used,
271                          int cpu __used,
272                          u64 timestamp,
273                          struct thread *thread __used)
274 {
275         struct lock_stat *st;
276
277         st = lock_stat_findnew(acquired_event->addr, acquired_event->name);
278
279         switch (st->state) {
280         case LOCK_STATE_UNLOCKED:
281                 st->state = LOCK_STATE_LOCKED;
282                 st->nr_acquired++;
283                 break;
284         case LOCK_STATE_LOCKED:
285                 break;
286         default:
287                 BUG_ON(1);
288                 break;
289         }
290
291         st->prev_event_time = timestamp;
292 }
293
294 static void
295 report_lock_contended_event(struct trace_contended_event *contended_event,
296                           struct event *__event __used,
297                           int cpu __used,
298                           u64 timestamp,
299                           struct thread *thread __used)
300 {
301         struct lock_stat *st;
302
303         st = lock_stat_findnew(contended_event->addr, contended_event->name);
304
305         switch (st->state) {
306         case LOCK_STATE_UNLOCKED:
307                 break;
308         case LOCK_STATE_LOCKED:
309                 st->nr_contended++;
310                 break;
311         default:
312                 BUG_ON(1);
313                 break;
314         }
315
316         st->prev_event_time = timestamp;
317 }
318
319 static void
320 report_lock_release_event(struct trace_release_event *release_event,
321                         struct event *__event __used,
322                         int cpu __used,
323                         u64 timestamp,
324                         struct thread *thread __used)
325 {
326         struct lock_stat *st;
327         u64 hold_time;
328
329         st = lock_stat_findnew(release_event->addr, release_event->name);
330
331         switch (st->state) {
332         case LOCK_STATE_UNLOCKED:
333                 break;
334         case LOCK_STATE_LOCKED:
335                 st->state = LOCK_STATE_UNLOCKED;
336                 hold_time = timestamp - st->prev_event_time;
337
338                 if (timestamp < st->prev_event_time) {
339                         /* terribly, this can happen... */
340                         goto end;
341                 }
342
343                 if (st->wait_time_min > hold_time)
344                         st->wait_time_min = hold_time;
345                 if (st->wait_time_max < hold_time)
346                         st->wait_time_max = hold_time;
347                 st->wait_time_total += hold_time;
348
349                 st->nr_release++;
350                 break;
351         default:
352                 BUG_ON(1);
353                 break;
354         }
355
356 end:
357         st->prev_event_time = timestamp;
358 }
359
360 /* lock oriented handlers */
361 /* TODO: handlers for CPU oriented, thread oriented */
362 static struct trace_lock_handler report_lock_ops  = {
363         .acquire_event          = report_lock_acquire_event,
364         .acquired_event         = report_lock_acquired_event,
365         .contended_event        = report_lock_contended_event,
366         .release_event          = report_lock_release_event,
367 };
368
369 static struct trace_lock_handler *trace_handler;
370
371 static void
372 process_lock_acquire_event(void *data,
373                            struct event *event __used,
374                            int cpu __used,
375                            u64 timestamp __used,
376                            struct thread *thread __used)
377 {
378         struct trace_acquire_event acquire_event;
379         u64 tmp;                /* this is required for casting... */
380
381         tmp = raw_field_value(event, "lockdep_addr", data);
382         memcpy(&acquire_event.addr, &tmp, sizeof(void *));
383         acquire_event.name = (char *)raw_field_ptr(event, "name", data);
384
385         if (trace_handler->acquire_event)
386                 trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread);
387 }
388
389 static void
390 process_lock_acquired_event(void *data,
391                             struct event *event __used,
392                             int cpu __used,
393                             u64 timestamp __used,
394                             struct thread *thread __used)
395 {
396         struct trace_acquired_event acquired_event;
397         u64 tmp;                /* this is required for casting... */
398
399         tmp = raw_field_value(event, "lockdep_addr", data);
400         memcpy(&acquired_event.addr, &tmp, sizeof(void *));
401         acquired_event.name = (char *)raw_field_ptr(event, "name", data);
402
403         if (trace_handler->acquire_event)
404                 trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread);
405 }
406
407 static void
408 process_lock_contended_event(void *data,
409                              struct event *event __used,
410                              int cpu __used,
411                              u64 timestamp __used,
412                              struct thread *thread __used)
413 {
414         struct trace_contended_event contended_event;
415         u64 tmp;                /* this is required for casting... */
416
417         tmp = raw_field_value(event, "lockdep_addr", data);
418         memcpy(&contended_event.addr, &tmp, sizeof(void *));
419         contended_event.name = (char *)raw_field_ptr(event, "name", data);
420
421         if (trace_handler->acquire_event)
422                 trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread);
423 }
424
425 static void
426 process_lock_release_event(void *data,
427                            struct event *event __used,
428                            int cpu __used,
429                            u64 timestamp __used,
430                            struct thread *thread __used)
431 {
432         struct trace_release_event release_event;
433         u64 tmp;                /* this is required for casting... */
434
435         tmp = raw_field_value(event, "lockdep_addr", data);
436         memcpy(&release_event.addr, &tmp, sizeof(void *));
437         release_event.name = (char *)raw_field_ptr(event, "name", data);
438
439         if (trace_handler->acquire_event)
440                 trace_handler->release_event(&release_event, event, cpu, timestamp, thread);
441 }
442
443 static void
444 process_raw_event(void *data, int cpu,
445                   u64 timestamp, struct thread *thread)
446 {
447         struct event *event;
448         int type;
449
450         type = trace_parse_common_type(data);
451         event = trace_find_event(type);
452
453         if (!strcmp(event->name, "lock_acquire"))
454                 process_lock_acquire_event(data, event, cpu, timestamp, thread);
455         if (!strcmp(event->name, "lock_acquired"))
456                 process_lock_acquired_event(data, event, cpu, timestamp, thread);
457         if (!strcmp(event->name, "lock_contended"))
458                 process_lock_contended_event(data, event, cpu, timestamp, thread);
459         if (!strcmp(event->name, "lock_release"))
460                 process_lock_release_event(data, event, cpu, timestamp, thread);
461 }
462
463 static int process_sample_event(event_t *event, struct perf_session *session)
464 {
465         struct thread *thread;
466         struct sample_data data;
467
468         bzero(&data, sizeof(struct sample_data));
469         event__parse_sample(event, session->sample_type, &data);
470         thread = perf_session__findnew(session, data.pid);
471
472         if (thread == NULL) {
473                 pr_debug("problem processing %d event, skipping it.\n",
474                          event->header.type);
475                 return -1;
476         }
477
478         dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
479
480         if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
481                 return 0;
482
483         process_raw_event(data.raw_data, data.cpu, data.time, thread);
484
485         return 0;
486 }
487
488 /* TODO: various way to print, coloring, nano or milli sec */
489 static void print_result(void)
490 {
491         struct lock_stat *st;
492         char cut_name[20];
493
494         printf("%18s ", "ID");
495         printf("%20s ", "Name");
496         printf("%10s ", "acquired");
497         printf("%10s ", "contended");
498
499         printf("%15s ", "total wait (ns)");
500         printf("%15s ", "max wait (ns)");
501         printf("%15s ", "min wait (ns)");
502
503         printf("\n\n");
504
505         while ((st = pop_from_result())) {
506                 bzero(cut_name, 20);
507
508                 printf("%p ", st->addr);
509
510                 if (strlen(st->name) < 16) {
511                         /* output raw name */
512                         printf("%20s ", st->name);
513                 } else {
514                         strncpy(cut_name, st->name, 16);
515                         cut_name[16] = '.';
516                         cut_name[17] = '.';
517                         cut_name[18] = '.';
518                         cut_name[19] = '\0';
519                         /* cut off name for saving output style */
520                         printf("%20s ", cut_name);
521                 }
522
523                 printf("%10u ", st->nr_acquired);
524                 printf("%10u ", st->nr_contended);
525
526                 printf("%15llu ", st->wait_time_total);
527                 printf("%15llu ", st->wait_time_max);
528                 printf("%15llu ", st->wait_time_min == ULLONG_MAX ?
529                        0 : st->wait_time_min);
530                 printf("\n");
531         }
532 }
533
534 static void dump_map(void)
535 {
536         unsigned int i;
537         struct lock_stat *st;
538
539         for (i = 0; i < LOCKHASH_SIZE; i++) {
540                 list_for_each_entry(st, &lockhash_table[i], hash_entry) {
541                         printf("%p: %s\n", st->addr, st->name);
542                 }
543         }
544 }
545
546 static struct perf_event_ops eops = {
547         .sample                 = process_sample_event,
548         .comm                   = event__process_comm,
549 };
550
551 static struct perf_session *session;
552
553 static int read_events(void)
554 {
555         session = perf_session__new(input_name, O_RDONLY, 0);
556         if (!session)
557                 die("Initializing perf session failed\n");
558
559         return perf_session__process_events(session, &eops);
560 }
561
562 static void sort_result(void)
563 {
564         unsigned int i;
565         struct lock_stat *st;
566
567         for (i = 0; i < LOCKHASH_SIZE; i++) {
568                 list_for_each_entry(st, &lockhash_table[i], hash_entry) {
569                         insert_to_result(st, compare);
570                 }
571         }
572 }
573
574 static void __cmd_report(void)
575 {
576         setup_pager();
577         select_key();
578         read_events();
579         sort_result();
580         print_result();
581 }
582
583 static const char * const report_usage[] = {
584         "perf lock report [<options>]",
585         NULL
586 };
587
588 static const struct option report_options[] = {
589         OPT_STRING('k', "key", &sort_key, "acquired",
590                     "key for sorting"),
591         /* TODO: type */
592         OPT_END()
593 };
594
595 static const char * const lock_usage[] = {
596         "perf lock [<options>] {record|trace|report}",
597         NULL
598 };
599
600 static const struct option lock_options[] = {
601         OPT_STRING('i', "input", &input_name, "file", "input file name"),
602         OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
603         OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
604         OPT_END()
605 };
606
607 static const char *record_args[] = {
608         "record",
609         "-a",
610         "-R",
611         "-M",
612         "-f",
613         "-m", "1024",
614         "-c", "1",
615         "-e", "lock:lock_acquire:r",
616         "-e", "lock:lock_acquired:r",
617         "-e", "lock:lock_contended:r",
618         "-e", "lock:lock_release:r",
619 };
620
621 static int __cmd_record(int argc, const char **argv)
622 {
623         unsigned int rec_argc, i, j;
624         const char **rec_argv;
625
626         rec_argc = ARRAY_SIZE(record_args) + argc - 1;
627         rec_argv = calloc(rec_argc + 1, sizeof(char *));
628
629         for (i = 0; i < ARRAY_SIZE(record_args); i++)
630                 rec_argv[i] = strdup(record_args[i]);
631
632         for (j = 1; j < (unsigned int)argc; j++, i++)
633                 rec_argv[i] = argv[j];
634
635         BUG_ON(i != rec_argc);
636
637         return cmd_record(i, rec_argv, NULL);
638 }
639
640 int cmd_lock(int argc, const char **argv, const char *prefix __used)
641 {
642         unsigned int i;
643
644         symbol__init();
645         for (i = 0; i < LOCKHASH_SIZE; i++)
646                 INIT_LIST_HEAD(lockhash_table + i);
647
648         argc = parse_options(argc, argv, lock_options, lock_usage,
649                              PARSE_OPT_STOP_AT_NON_OPTION);
650         if (!argc)
651                 usage_with_options(lock_usage, lock_options);
652
653         if (!strncmp(argv[0], "rec", 3)) {
654                 return __cmd_record(argc, argv);
655         } else if (!strncmp(argv[0], "report", 6)) {
656                 trace_handler = &report_lock_ops;
657                 if (argc) {
658                         argc = parse_options(argc, argv,
659                                              report_options, report_usage, 0);
660                         if (argc)
661                                 usage_with_options(report_usage, report_options);
662                 }
663                 __cmd_report();
664         } else if (!strcmp(argv[0], "trace")) {
665                 /* Aliased to 'perf trace' */
666                 return cmd_trace(argc, argv, prefix);
667         } else if (!strcmp(argv[0], "map")) {
668                 /* recycling report_lock_ops */
669                 trace_handler = &report_lock_ops;
670                 setup_pager();
671                 read_events();
672                 dump_map();
673         } else {
674                 usage_with_options(lock_usage, lock_options);
675         }
676
677         return 0;
678 }