[PATCH] blktrace: prepare for mmap usage again
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37 #include <sys/mman.h>
38
39 #include "blktrace.h"
40
41 #define BUF_SIZE        (128 *1024)
42 #define BUF_NR          (4)
43
44 #define RELAYFS_TYPE    0xF0B4A981
45
46 #define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
47 #define COMPARE_MASK_MAP(mmp, str)                                      \
48         (!strcasecmp((mmp)->short_form, (str)) ||                      \
49          !strcasecmp((mmp)->long_form, (str)))
50
51 #define VALID_SET(x)    ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
52
53 struct mask_map {
54         int mask;
55         char *short_form;
56         char *long_form;
57 };
58
59 struct mask_map mask_maps[] = {
60         DECLARE_MASK_MAP(READ),
61         DECLARE_MASK_MAP(WRITE),
62         DECLARE_MASK_MAP(BARRIER),
63         DECLARE_MASK_MAP(SYNC),
64         DECLARE_MASK_MAP(QUEUE),
65         DECLARE_MASK_MAP(REQUEUE),
66         DECLARE_MASK_MAP(ISSUE),
67         DECLARE_MASK_MAP(COMPLETE),
68         DECLARE_MASK_MAP(FS),
69         DECLARE_MASK_MAP(PC),
70 };
71
72 #define S_OPTS  "d:a:A:r:o:kw:"
73 static struct option l_opts[] = {
74         {
75                 .name = "dev",
76                 .has_arg = 1,
77                 .flag = NULL,
78                 .val = 'd'
79         },
80         {
81                 .name = "act-mask",
82                 .has_arg = 1,
83                 .flag = NULL,
84                 .val = 'a'
85         },
86         {
87                 .name = "set-mask",
88                 .has_arg = 1,
89                 .flag = NULL,
90                 .val = 'A'
91         },
92         {
93                 .name = "relay",
94                 .has_arg = 1,
95                 .flag = NULL,
96                 .val = 'r'
97         },
98         {
99                 .name = "output",
100                 .has_arg = 1,
101                 .flag = NULL,
102                 .val = 'o'
103         },
104         {
105                 .name = "kill",
106                 .has_arg = 0,
107                 .flag = NULL,
108                 .val = 'k'
109         },
110         {
111                 .name = "stopwatch",
112                 .has_arg = 1,
113                 .flag = NULL,
114                 .val = 'w'
115         },
116         {
117                 .name = NULL,
118                 .has_arg = 0,
119                 .flag = NULL,
120                 .val = 0
121         }
122 };
123
124 struct thread_information {
125         int cpu;
126         pthread_t thread;
127
128         int fd;
129         char fn[MAXPATHLEN + 64];
130         void *buf;
131         unsigned long buf_offset;
132         unsigned int buf_subbuf;
133         unsigned int sequence;
134
135         pthread_mutex_t *fd_lock;
136         int ofd;
137
138         unsigned long events_processed;
139         struct device_information *device;
140 };
141
142 struct device_information {
143         int fd;
144         char *path;
145         char buts_name[32];
146         int trace_started;
147         struct thread_information *threads;
148 };
149
150 static int ncpus;
151 static struct thread_information *thread_information;
152 static int ndevs;
153 static struct device_information *device_information;
154
155 /* command line option globals */
156 static char *relay_path;
157 static char *output_name;
158 static int act_mask = ~0U;
159 static int kill_running_trace;
160 static int use_mmap;
161
162 #define is_done()       (*(volatile int *)(&done))
163 static volatile int done;
164
165 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
166
167 static void exit_trace(int status);
168
169 static int find_mask_map(char *string)
170 {
171         int i;
172
173         for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
174                 if (COMPARE_MASK_MAP(&mask_maps[i], string))
175                         return mask_maps[i].mask;
176
177         return -1;
178 }
179
180 static int start_trace(struct device_information *dip)
181 {
182         struct blk_user_trace_setup buts;
183
184         memset(&buts, 0, sizeof(buts));
185         buts.buf_size = BUF_SIZE;
186         buts.buf_nr = BUF_NR;
187         buts.act_mask = act_mask;
188
189         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
190                 perror("BLKSTARTTRACE");
191                 return 1;
192         }
193
194         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
195         dip->trace_started = 1;
196         return 0;
197 }
198
199 static void stop_trace(struct device_information *dip)
200 {
201         if (dip->trace_started || kill_running_trace) {
202                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
203                         perror("BLKSTOPTRACE");
204                 close(dip->fd);
205                 dip->trace_started = 0;
206         }
207 }
208
209 static void stop_all_traces(void)
210 {
211         struct device_information *dip;
212         int i;
213
214         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
215                 stop_trace(dip);
216 }
217
218 static int get_data_read(struct thread_information *tip, void *buf, int len)
219 {
220         char *p = buf;
221         int ret, bytes_left = len;
222
223         while (!is_done() && bytes_left > 0) {
224                 ret = read(tip->fd, p, bytes_left);
225                 if (ret == len)
226                         break;
227
228                 if (ret < 0) {
229                         perror(tip->fn);
230                         fprintf(stderr,"Thread %d failed read of %s\n",
231                                 tip->cpu, tip->fn);
232                         exit_trace(1);
233                 } else if (ret > 0) {
234                         fprintf(stderr,"Thread %d misread %s %d,%d\n",
235                                 tip->cpu, tip->fn, ret, len);
236                         exit_trace(1);
237                 } else {
238                         p += ret;
239                         bytes_left -= ret;
240                 }
241
242                 usleep(10000);
243         }
244
245         return 0;
246 }
247
248 static int get_data_mmap(struct thread_information *tip, void *buf, int len,
249                          int check_magic)
250 {
251         if (len > (BUF_SIZE * (tip->buf_subbuf + 1)) - tip->buf_offset) {
252                 tip->buf_subbuf++;
253                 if (tip->buf_subbuf == BUF_NR)
254                         tip->buf_subbuf = 0;
255
256                 tip->buf_offset = tip->buf_subbuf * BUF_SIZE;
257         }
258
259         while (!is_done()) {
260                 struct blk_io_trace *t = buf;
261
262                 memcpy(buf, tip->buf + tip->buf_offset, len);
263
264                 if (!check_magic)
265                         break;
266
267                 if (CHECK_MAGIC(t) && t->sequence >= tip->sequence) {
268                         tip->sequence = t->sequence;
269                         break;
270                 }
271
272                 usleep(10000);
273         }
274
275         tip->buf_offset += len;
276         return 0;
277 }
278
279 static int get_data(struct thread_information *tip, void *buf, int len,
280                     int check_magic)
281 {
282         if (tip->buf)
283                 return get_data_mmap(tip, buf, len, check_magic);
284         else
285                 return get_data_read(tip, buf, len);
286 }
287
288 static void *extract_data(struct thread_information *tip, char *ofn, int nb)
289 {
290         unsigned char *buf;
291
292         buf = malloc(nb);
293         if (!get_data(tip, buf, nb, 0))
294                 return buf;
295
296         free(buf);
297         exit_trace(1);
298         return NULL;
299 }
300
301 static inline void tip_fd_unlock(struct thread_information *tip)
302 {
303         if (tip->fd_lock)
304                 pthread_mutex_unlock(tip->fd_lock);
305 }
306
307 static inline void tip_fd_lock(struct thread_information *tip)
308 {
309         if (tip->fd_lock)
310                 pthread_mutex_lock(tip->fd_lock);
311 }
312
313 static void *extract(void *arg)
314 {
315         struct thread_information *tip = arg;
316         int ret, pdu_len;
317         char dp[64], *pdu_data;
318         struct blk_io_trace t;
319         pid_t pid = getpid();
320         cpu_set_t cpu_mask;
321
322         CPU_ZERO(&cpu_mask);
323         CPU_SET((tip->cpu), &cpu_mask);
324
325         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
326                 perror("sched_setaffinity");
327                 exit_trace(1);
328         }
329
330         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
331                         relay_path, tip->device->buts_name, tip->cpu);
332         tip->fd = open(tip->fn, O_RDONLY);
333         if (tip->fd < 0) {
334                 perror(tip->fn);
335                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
336                         tip->fn);
337                 exit_trace(1);
338         }
339
340         if (use_mmap) {
341                 tip->buf = mmap(NULL, BUF_SIZE * BUF_NR, PROT_READ,
342                                         MAP_PRIVATE | MAP_POPULATE, tip->fd, 0);
343                 if (tip->buf == MAP_FAILED) {
344                         perror("mmap");
345                         exit_trace(1);
346                 }
347         }
348
349         pdu_data = NULL;
350         while (!is_done()) {
351                 if (get_data(tip, &t, sizeof(t), 1))
352                         break;
353
354                 if (verify_trace(&t))
355                         exit_trace(1);
356
357                 pdu_len = t.pdu_len;
358
359                 trace_to_be(&t);
360
361                 if (pdu_len)
362                         pdu_data = extract_data(tip, dp, pdu_len);
363
364                 /*
365                  * now we have both trace and payload, get a lock on the
366                  * output descriptor and send it off
367                  */
368                 tip_fd_lock(tip);
369
370                 ret = write(tip->ofd, &t, sizeof(t));
371                 if (ret < 0) {
372                         fprintf(stderr,"Thread %d failed write\n", tip->cpu);
373                         tip_fd_unlock(tip);
374                         exit_trace(1);
375                 }
376
377                 if (pdu_data) {
378                         ret = write(tip->ofd, pdu_data, pdu_len);
379                         if (ret != pdu_len) {
380                                 perror("write pdu data");
381                                 tip_fd_unlock(tip);
382                                 exit_trace(1);
383                         }
384
385                         free(pdu_data);
386                         pdu_data = NULL;
387                 }
388
389                 tip_fd_unlock(tip);
390                 tip->events_processed++;
391         }
392
393         return NULL;
394 }
395
396 static int start_threads(struct device_information *dip)
397 {
398         struct thread_information *tip;
399         char op[64];
400         int j, pipeline = output_name && !strcmp(output_name, "-");
401
402         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
403                 tip->cpu = j;
404                 tip->device = dip;
405                 tip->fd_lock = NULL;
406                 tip->events_processed = 0;
407
408                 if (pipeline) {
409                         tip->ofd = dup(STDOUT_FILENO);
410                         tip->fd_lock = &stdout_mutex;
411                 } else {
412                         if (output_name)
413                                 sprintf(op, "%s_%s_out.%d", output_name,
414                                         dip->buts_name, tip->cpu);
415                         else
416                                 sprintf(op, "%s_out.%d",
417                                         dip->buts_name, tip->cpu);
418                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
419                 }
420
421                 if (tip->ofd < 0) {
422                         perror(op);
423                         return 1;
424                 }
425
426                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
427                         perror("pthread_create");
428                         close(tip->ofd);
429                         return 1;
430                 }
431         }
432
433         return 0;
434 }
435
436 static void close_thread(struct thread_information *tip)
437 {
438         if (tip->buf)
439                 munmap(tip->buf, BUF_SIZE * BUF_NR);
440
441         if (tip->fd != -1)
442                 close(tip->fd);
443         if (tip->ofd != -1)
444                 close(tip->ofd);
445
446         tip->fd = tip->ofd = -1;
447 }
448
449 static void stop_threads(struct device_information *dip)
450 {
451         struct thread_information *tip;
452         long ret;
453         int j;
454
455         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
456                 if (pthread_join(tip->thread, (void *) &ret))
457                         perror("thread_join");
458                 close_thread(tip);
459         }
460 }
461
462 static void stop_all_threads(void)
463 {
464         struct device_information *dip;
465         int i;
466
467         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
468                 stop_threads(dip);
469 }
470
471 static void stop_all_tracing(void)
472 {
473         struct device_information *dip;
474         struct thread_information *tip;
475         int i, j;
476
477         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
478                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
479                         close_thread(tip);
480                 stop_trace(dip);
481         }
482 }
483
484 static void exit_trace(int status)
485 {
486         stop_all_tracing();
487         exit(status);
488 }
489
490 static int resize_devices(char *path)
491 {
492         int size = (ndevs + 1) * sizeof(struct device_information);
493
494         device_information = realloc(device_information, size);
495         if (!device_information) {
496                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
497                 return 1;
498         }
499         device_information[ndevs].path = path;
500         ndevs++;
501         return 0;
502 }
503
504 static int open_devices(void)
505 {
506         struct device_information *dip;
507         int i;
508
509         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
510                 dip->fd = open(dip->path, O_RDONLY);
511                 if (dip->fd < 0) {
512                         perror(dip->path);
513                         return 1;
514                 }
515         }
516         return 0;
517 }
518
519 static int start_devices(void)
520 {
521         struct device_information *dip;
522         int i, j, size;
523
524         size = ncpus * sizeof(struct thread_information);
525         thread_information = malloc(size * ndevs);
526         if (!thread_information) {
527                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
528                 return 1;
529         }
530
531         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
532                 if (start_trace(dip)) {
533                         close(dip->fd);
534                         fprintf(stderr, "Failed to start trace on %s\n",
535                                 dip->path);
536                         break;
537                 }
538         }
539         if (i != ndevs) {
540                 for (dip = device_information, j = 0; j < i; j++, dip++)
541                         stop_trace(dip);
542                 return 1;
543         }
544
545         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
546                 dip->threads = thread_information + (i * ncpus);
547                 if (start_threads(dip)) {
548                         fprintf(stderr, "Failed to start worker threads\n");
549                         break;
550                 }
551         }
552         if (i != ndevs) {
553                 for (dip = device_information, j = 0; j < i; j++, dip++)
554                         stop_threads(dip);
555                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
556                         stop_trace(dip);
557                 return 1;
558         }
559
560         return 0;
561 }
562
563 static void show_stats(void)
564 {
565         int i, j;
566         struct device_information *dip;
567         struct thread_information *tip;
568         unsigned long long events_processed;
569   
570         if (output_name && !strcmp(output_name, "-"))
571                 return;
572
573         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
574                 printf("Device: %s\n", dip->path);
575                 events_processed = 0;
576                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
577                         printf("  CPU%3d: %20ld events\n",
578                                tip->cpu, tip->events_processed);
579                         events_processed += tip->events_processed;
580                 }
581                 printf("  Total:  %20lld events\n", events_processed);
582         }
583 }
584   
585 static void show_usage(char *program)
586 {
587         fprintf(stderr,"Usage: %s [-d <dev>] "
588                        "[-a <trace> [-a <trace>]] <dev>\n",
589                 program);
590 }
591
592 static void handle_sigint(int sig)
593 {
594         done = 1;
595 }
596
597 int main(int argc, char *argv[])
598 {
599         static char default_relay_path[] = "/relay";
600         struct statfs st;
601         int i, c;
602         int stop_watch = 0;
603         int act_mask_tmp = 0;
604
605         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
606                 switch (c) {
607                 case 'a':
608                         i = find_mask_map(optarg);
609                         if (i < 0) {
610                                 fprintf(stderr,"Invalid action mask %s\n", 
611                                         optarg);
612                                 return 1;
613                         }
614                         act_mask_tmp |= i;
615                         break;
616
617                 case 'A':
618                         if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
619                                 fprintf(stderr,
620                                         "Invalid set action mask %s/0x%x\n", 
621                                         optarg, i);
622                                 return 1;
623                         }
624                         act_mask_tmp = i;
625                         break;
626
627                 case 'd':
628                         if (resize_devices(optarg) != 0)
629                                 return 1;
630                         break;
631
632                 case 'r':
633                         relay_path = optarg;
634                         break;
635
636                 case 'o':
637                         output_name = optarg;
638                         break;
639                 case 'k':
640                         kill_running_trace = 1;
641                         break;
642                 case 'w':
643                         stop_watch = atoi(optarg);
644                         if (stop_watch <= 0) {
645                                 fprintf(stderr,
646                                         "Invalid stopwatch value (%d secs)\n",
647                                         stop_watch);
648                                 return 1;
649                         }
650                         break;
651
652                 default:
653                         show_usage(argv[0]);
654                         return 1;
655                 }
656         }
657
658         while (optind < argc) {
659                 if (resize_devices(argv[optind++]) != 0)
660                         return 1;
661         }
662
663         if (ndevs == 0) {
664                 show_usage(argv[0]);
665                 return 1;
666         }
667
668         if (!relay_path)
669                 relay_path = default_relay_path;
670
671         if (act_mask_tmp != 0)
672                 act_mask = act_mask_tmp;
673
674         if (statfs(relay_path, &st) < 0) {
675                 perror("statfs");
676                 fprintf(stderr,"%s does not appear to be a valid path\n",
677                         relay_path);
678                 return 1;
679         } else if (st.f_type != RELAYFS_TYPE) {
680                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
681                         relay_path);
682                 return 1;
683         }
684
685         if (open_devices() != 0)
686                 return 1;
687
688         if (kill_running_trace) {
689                 stop_all_traces();
690                 return 0;
691         }
692
693         setlocale(LC_NUMERIC, "en_US");
694
695         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
696         if (ncpus < 0) {
697                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
698                 return 1;
699         }
700
701         if (start_devices() != 0)
702                 return 1;
703
704         signal(SIGINT, handle_sigint);
705         signal(SIGHUP, handle_sigint);
706         signal(SIGTERM, handle_sigint);
707         signal(SIGALRM, handle_sigint);
708
709         atexit(stop_all_tracing);
710
711         if (stop_watch)
712                 alarm(stop_watch);
713
714         while (!is_done())
715                 sleep(1);
716
717         stop_all_threads();
718         stop_all_traces();
719         show_stats();
720
721         return 0;
722 }
723