[PATCH] blktrace: fix exit
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37 #include <sys/mman.h>
38
39 #include "blktrace.h"
40
41 #define BUF_SIZE        (128 *1024)
42 #define BUF_NR          (4)
43
44 #define RELAYFS_TYPE    0xF0B4A981
45
46 #define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
47 #define COMPARE_MASK_MAP(mmp, str)                                      \
48         (!strcasecmp((mmp)->short_form, (str)) ||                      \
49          !strcasecmp((mmp)->long_form, (str)))
50
51 #define VALID_SET(x)    ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
52
53 struct mask_map {
54         int mask;
55         char *short_form;
56         char *long_form;
57 };
58
59 struct mask_map mask_maps[] = {
60         DECLARE_MASK_MAP(READ),
61         DECLARE_MASK_MAP(WRITE),
62         DECLARE_MASK_MAP(BARRIER),
63         DECLARE_MASK_MAP(SYNC),
64         DECLARE_MASK_MAP(QUEUE),
65         DECLARE_MASK_MAP(REQUEUE),
66         DECLARE_MASK_MAP(ISSUE),
67         DECLARE_MASK_MAP(COMPLETE),
68         DECLARE_MASK_MAP(FS),
69         DECLARE_MASK_MAP(PC),
70 };
71
72 #define S_OPTS  "d:a:A:r:o:kw:"
73 static struct option l_opts[] = {
74         {
75                 .name = "dev",
76                 .has_arg = 1,
77                 .flag = NULL,
78                 .val = 'd'
79         },
80         {
81                 .name = "act-mask",
82                 .has_arg = 1,
83                 .flag = NULL,
84                 .val = 'a'
85         },
86         {
87                 .name = "set-mask",
88                 .has_arg = 1,
89                 .flag = NULL,
90                 .val = 'A'
91         },
92         {
93                 .name = "relay",
94                 .has_arg = 1,
95                 .flag = NULL,
96                 .val = 'r'
97         },
98         {
99                 .name = "output",
100                 .has_arg = 1,
101                 .flag = NULL,
102                 .val = 'o'
103         },
104         {
105                 .name = "kill",
106                 .has_arg = 0,
107                 .flag = NULL,
108                 .val = 'k'
109         },
110         {
111                 .name = "stopwatch",
112                 .has_arg = 1,
113                 .flag = NULL,
114                 .val = 'w'
115         },
116         {
117                 .name = NULL,
118                 .has_arg = 0,
119                 .flag = NULL,
120                 .val = 0
121         }
122 };
123
124 struct thread_information {
125         int cpu;
126         pthread_t thread;
127
128         int fd;
129         char fn[MAXPATHLEN + 64];
130         void *buf;
131         unsigned long buf_offset;
132         unsigned int buf_subbuf;
133         unsigned int sequence;
134
135         pthread_mutex_t *fd_lock;
136         int ofd;
137
138         unsigned long events_processed;
139         struct device_information *device;
140 };
141
142 struct device_information {
143         int fd;
144         char *path;
145         char buts_name[32];
146         int trace_started;
147         struct thread_information *threads;
148 };
149
150 static int ncpus;
151 static struct thread_information *thread_information;
152 static int ndevs;
153 static struct device_information *device_information;
154
155 /* command line option globals */
156 static char *relay_path;
157 static char *output_name;
158 static int act_mask = ~0U;
159 static int kill_running_trace;
160 static int use_mmap;
161
162 #define is_done()       (*(volatile int *)(&done))
163 static volatile int done;
164
165 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
166
167 static void exit_trace(int status);
168
169 static int find_mask_map(char *string)
170 {
171         int i;
172
173         for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
174                 if (COMPARE_MASK_MAP(&mask_maps[i], string))
175                         return mask_maps[i].mask;
176
177         return -1;
178 }
179
180 static int start_trace(struct device_information *dip)
181 {
182         struct blk_user_trace_setup buts;
183
184         memset(&buts, 0, sizeof(buts));
185         buts.buf_size = BUF_SIZE;
186         buts.buf_nr = BUF_NR;
187         buts.act_mask = act_mask;
188
189         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
190                 perror("BLKSTARTTRACE");
191                 return 1;
192         }
193
194         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
195         dip->trace_started = 1;
196         return 0;
197 }
198
199 static void stop_trace(struct device_information *dip)
200 {
201         if (dip->trace_started || kill_running_trace) {
202                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
203                         perror("BLKSTOPTRACE");
204                 close(dip->fd);
205                 dip->trace_started = 0;
206         }
207 }
208
209 static void stop_all_traces(void)
210 {
211         struct device_information *dip;
212         int i;
213
214         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
215                 stop_trace(dip);
216 }
217
218 static int get_data_read(struct thread_information *tip, void *buf, int len)
219 {
220         char *p = buf;
221         int ret, bytes_left = len;
222
223         while (!is_done() && bytes_left > 0) {
224                 ret = read(tip->fd, p, bytes_left);
225                 if (ret == len)
226                         return 0;
227
228                 if (ret < 0) {
229                         perror(tip->fn);
230                         fprintf(stderr,"Thread %d failed read of %s\n",
231                                 tip->cpu, tip->fn);
232                         exit_trace(1);
233                 } else if (ret > 0) {
234                         fprintf(stderr,"Thread %d misread %s %d,%d\n",
235                                 tip->cpu, tip->fn, ret, len);
236                         exit_trace(1);
237                 } else {
238                         p += ret;
239                         bytes_left -= ret;
240                 }
241
242                 usleep(10000);
243         }
244
245         return -1;
246 }
247
248 static int get_data_mmap(struct thread_information *tip, void *buf, int len,
249                          int check_magic)
250 {
251         if (len > (BUF_SIZE * (tip->buf_subbuf + 1)) - tip->buf_offset) {
252                 tip->buf_subbuf++;
253                 if (tip->buf_subbuf == BUF_NR)
254                         tip->buf_subbuf = 0;
255
256                 tip->buf_offset = tip->buf_subbuf * BUF_SIZE;
257         }
258
259         while (1) {
260                 struct blk_io_trace *t = buf;
261
262                 memcpy(buf, tip->buf + tip->buf_offset, len);
263
264                 if (!check_magic)
265                         break;
266
267                 if (CHECK_MAGIC(t) && t->sequence >= tip->sequence) {
268                         tip->sequence = t->sequence;
269                         break;
270                 }
271         
272                 if (is_done())
273                         return -1;
274
275                 usleep(10000);
276         }
277
278         tip->buf_offset += len;
279         return 0;
280 }
281
282 static int get_data(struct thread_information *tip, void *buf, int len,
283                     int check_magic)
284 {
285         if (tip->buf)
286                 return get_data_mmap(tip, buf, len, check_magic);
287         else
288                 return get_data_read(tip, buf, len);
289 }
290
291 static void *extract_data(struct thread_information *tip, char *ofn, int nb)
292 {
293         unsigned char *buf;
294
295         buf = malloc(nb);
296         if (!get_data(tip, buf, nb, 0))
297                 return buf;
298
299         free(buf);
300         exit_trace(1);
301         return NULL;
302 }
303
304 static inline void tip_fd_unlock(struct thread_information *tip)
305 {
306         if (tip->fd_lock)
307                 pthread_mutex_unlock(tip->fd_lock);
308 }
309
310 static inline void tip_fd_lock(struct thread_information *tip)
311 {
312         if (tip->fd_lock)
313                 pthread_mutex_lock(tip->fd_lock);
314 }
315
316 static void *extract(void *arg)
317 {
318         struct thread_information *tip = arg;
319         int ret, pdu_len;
320         char dp[64], *pdu_data;
321         struct blk_io_trace t;
322         pid_t pid = getpid();
323         cpu_set_t cpu_mask;
324
325         CPU_ZERO(&cpu_mask);
326         CPU_SET((tip->cpu), &cpu_mask);
327
328         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
329                 perror("sched_setaffinity");
330                 exit_trace(1);
331         }
332
333         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
334                         relay_path, tip->device->buts_name, tip->cpu);
335         tip->fd = open(tip->fn, O_RDONLY);
336         if (tip->fd < 0) {
337                 perror(tip->fn);
338                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
339                         tip->fn);
340                 exit_trace(1);
341         }
342
343         if (use_mmap) {
344                 tip->buf = mmap(NULL, BUF_SIZE * BUF_NR, PROT_READ,
345                                         MAP_PRIVATE | MAP_POPULATE, tip->fd, 0);
346                 if (tip->buf == MAP_FAILED) {
347                         perror("mmap");
348                         exit_trace(1);
349                 }
350         }
351
352         pdu_data = NULL;
353         while (!is_done()) {
354                 if (get_data(tip, &t, sizeof(t), 1))
355                         break;
356
357                 if (verify_trace(&t))
358                         exit_trace(1);
359
360                 pdu_len = t.pdu_len;
361
362                 trace_to_be(&t);
363
364                 if (pdu_len)
365                         pdu_data = extract_data(tip, dp, pdu_len);
366
367                 /*
368                  * now we have both trace and payload, get a lock on the
369                  * output descriptor and send it off
370                  */
371                 tip_fd_lock(tip);
372
373                 ret = write(tip->ofd, &t, sizeof(t));
374                 if (ret < 0) {
375                         fprintf(stderr,"Thread %d failed write\n", tip->cpu);
376                         tip_fd_unlock(tip);
377                         exit_trace(1);
378                 }
379
380                 if (pdu_data) {
381                         ret = write(tip->ofd, pdu_data, pdu_len);
382                         if (ret != pdu_len) {
383                                 perror("write pdu data");
384                                 tip_fd_unlock(tip);
385                                 exit_trace(1);
386                         }
387
388                         free(pdu_data);
389                         pdu_data = NULL;
390                 }
391
392                 tip_fd_unlock(tip);
393                 tip->events_processed++;
394         }
395
396         return NULL;
397 }
398
399 static int start_threads(struct device_information *dip)
400 {
401         struct thread_information *tip;
402         char op[64];
403         int j, pipeline = output_name && !strcmp(output_name, "-");
404
405         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
406                 tip->cpu = j;
407                 tip->device = dip;
408                 tip->fd_lock = NULL;
409                 tip->events_processed = 0;
410
411                 if (pipeline) {
412                         tip->ofd = dup(STDOUT_FILENO);
413                         tip->fd_lock = &stdout_mutex;
414                 } else {
415                         if (output_name)
416                                 sprintf(op, "%s_%s_out.%d", output_name,
417                                         dip->buts_name, tip->cpu);
418                         else
419                                 sprintf(op, "%s_out.%d",
420                                         dip->buts_name, tip->cpu);
421                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
422                 }
423
424                 if (tip->ofd < 0) {
425                         perror(op);
426                         return 1;
427                 }
428
429                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
430                         perror("pthread_create");
431                         close(tip->ofd);
432                         return 1;
433                 }
434         }
435
436         return 0;
437 }
438
439 static void close_thread(struct thread_information *tip)
440 {
441         if (tip->buf)
442                 munmap(tip->buf, BUF_SIZE * BUF_NR);
443
444         if (tip->fd != -1)
445                 close(tip->fd);
446         if (tip->ofd != -1)
447                 close(tip->ofd);
448
449         tip->fd = tip->ofd = -1;
450 }
451
452 static void stop_threads(struct device_information *dip)
453 {
454         struct thread_information *tip;
455         long ret;
456         int j;
457
458         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
459                 if (pthread_join(tip->thread, (void *) &ret))
460                         perror("thread_join");
461                 close_thread(tip);
462         }
463 }
464
465 static void stop_all_threads(void)
466 {
467         struct device_information *dip;
468         int i;
469
470         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
471                 stop_threads(dip);
472 }
473
474 static void stop_all_tracing(void)
475 {
476         struct device_information *dip;
477         struct thread_information *tip;
478         int i, j;
479
480         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
481                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
482                         close_thread(tip);
483                 stop_trace(dip);
484         }
485 }
486
487 static void exit_trace(int status)
488 {
489         stop_all_tracing();
490         exit(status);
491 }
492
493 static int resize_devices(char *path)
494 {
495         int size = (ndevs + 1) * sizeof(struct device_information);
496
497         device_information = realloc(device_information, size);
498         if (!device_information) {
499                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
500                 return 1;
501         }
502         device_information[ndevs].path = path;
503         ndevs++;
504         return 0;
505 }
506
507 static int open_devices(void)
508 {
509         struct device_information *dip;
510         int i;
511
512         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
513                 dip->fd = open(dip->path, O_RDONLY);
514                 if (dip->fd < 0) {
515                         perror(dip->path);
516                         return 1;
517                 }
518         }
519         return 0;
520 }
521
522 static int start_devices(void)
523 {
524         struct device_information *dip;
525         int i, j, size;
526
527         size = ncpus * sizeof(struct thread_information);
528         thread_information = malloc(size * ndevs);
529         if (!thread_information) {
530                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
531                 return 1;
532         }
533
534         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
535                 if (start_trace(dip)) {
536                         close(dip->fd);
537                         fprintf(stderr, "Failed to start trace on %s\n",
538                                 dip->path);
539                         break;
540                 }
541         }
542         if (i != ndevs) {
543                 for (dip = device_information, j = 0; j < i; j++, dip++)
544                         stop_trace(dip);
545                 return 1;
546         }
547
548         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
549                 dip->threads = thread_information + (i * ncpus);
550                 if (start_threads(dip)) {
551                         fprintf(stderr, "Failed to start worker threads\n");
552                         break;
553                 }
554         }
555         if (i != ndevs) {
556                 for (dip = device_information, j = 0; j < i; j++, dip++)
557                         stop_threads(dip);
558                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
559                         stop_trace(dip);
560                 return 1;
561         }
562
563         return 0;
564 }
565
566 static void show_stats(void)
567 {
568         int i, j;
569         struct device_information *dip;
570         struct thread_information *tip;
571         unsigned long long events_processed;
572   
573         if (output_name && !strcmp(output_name, "-"))
574                 return;
575
576         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
577                 printf("Device: %s\n", dip->path);
578                 events_processed = 0;
579                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
580                         printf("  CPU%3d: %20ld events\n",
581                                tip->cpu, tip->events_processed);
582                         events_processed += tip->events_processed;
583                 }
584                 printf("  Total:  %20lld events\n", events_processed);
585         }
586 }
587   
588 static void show_usage(char *program)
589 {
590         fprintf(stderr,"Usage: %s [-d <dev>] "
591                        "[-a <trace> [-a <trace>]] <dev>\n",
592                 program);
593 }
594
595 static void handle_sigint(int sig)
596 {
597         done = 1;
598 }
599
600 int main(int argc, char *argv[])
601 {
602         static char default_relay_path[] = "/relay";
603         struct statfs st;
604         int i, c;
605         int stop_watch = 0;
606         int act_mask_tmp = 0;
607
608         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
609                 switch (c) {
610                 case 'a':
611                         i = find_mask_map(optarg);
612                         if (i < 0) {
613                                 fprintf(stderr,"Invalid action mask %s\n",
614                                         optarg);
615                                 return 1;
616                         }
617                         act_mask_tmp |= i;
618                         break;
619
620                 case 'A':
621                         if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
622                                 fprintf(stderr,
623                                         "Invalid set action mask %s/0x%x\n",
624                                         optarg, i);
625                                 return 1;
626                         }
627                         act_mask_tmp = i;
628                         break;
629
630                 case 'd':
631                         if (resize_devices(optarg) != 0)
632                                 return 1;
633                         break;
634
635                 case 'r':
636                         relay_path = optarg;
637                         break;
638
639                 case 'o':
640                         output_name = optarg;
641                         break;
642                 case 'k':
643                         kill_running_trace = 1;
644                         break;
645                 case 'w':
646                         stop_watch = atoi(optarg);
647                         if (stop_watch <= 0) {
648                                 fprintf(stderr,
649                                         "Invalid stopwatch value (%d secs)\n",
650                                         stop_watch);
651                                 return 1;
652                         }
653                         break;
654
655                 default:
656                         show_usage(argv[0]);
657                         return 1;
658                 }
659         }
660
661         while (optind < argc) {
662                 if (resize_devices(argv[optind++]) != 0)
663                         return 1;
664         }
665
666         if (ndevs == 0) {
667                 show_usage(argv[0]);
668                 return 1;
669         }
670
671         if (!relay_path)
672                 relay_path = default_relay_path;
673
674         if (act_mask_tmp != 0)
675                 act_mask = act_mask_tmp;
676
677         if (statfs(relay_path, &st) < 0) {
678                 perror("statfs");
679                 fprintf(stderr,"%s does not appear to be a valid path\n",
680                         relay_path);
681                 return 1;
682         } else if (st.f_type != RELAYFS_TYPE) {
683                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
684                         relay_path);
685                 return 1;
686         }
687
688         if (open_devices() != 0)
689                 return 1;
690
691         if (kill_running_trace) {
692                 stop_all_traces();
693                 return 0;
694         }
695
696         setlocale(LC_NUMERIC, "en_US");
697
698         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
699         if (ncpus < 0) {
700                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
701                 return 1;
702         }
703
704         if (start_devices() != 0)
705                 return 1;
706
707         signal(SIGINT, handle_sigint);
708         signal(SIGHUP, handle_sigint);
709         signal(SIGTERM, handle_sigint);
710         signal(SIGALRM, handle_sigint);
711
712         atexit(stop_all_tracing);
713
714         if (stop_watch)
715                 alarm(stop_watch);
716
717         while (!is_done())
718                 sleep(1);
719
720         stop_all_threads();
721         stop_all_traces();
722         show_stats();
723
724         return 0;
725 }
726