[PATCH] Add -D output/input directory option to blkparse and blktrace
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37
38 #include "blktrace.h"
39
40 static char blktrace_version[] = "0.90";
41
42 #define BUF_SIZE        (128 *1024)
43 #define BUF_NR          (4)
44
45 #define RELAYFS_TYPE    0xF0B4A981
46
47 #define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
48 #define COMPARE_MASK_MAP(mmp, str)                                      \
49         (!strcasecmp((mmp)->short_form, (str)) ||                      \
50          !strcasecmp((mmp)->long_form, (str)))
51
52 #define VALID_SET(x)    ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
53
54 struct mask_map {
55         int mask;
56         char *short_form;
57         char *long_form;
58 };
59
60 static struct mask_map mask_maps[] = {
61         DECLARE_MASK_MAP(READ),
62         DECLARE_MASK_MAP(WRITE),
63         DECLARE_MASK_MAP(BARRIER),
64         DECLARE_MASK_MAP(SYNC),
65         DECLARE_MASK_MAP(QUEUE),
66         DECLARE_MASK_MAP(REQUEUE),
67         DECLARE_MASK_MAP(ISSUE),
68         DECLARE_MASK_MAP(COMPLETE),
69         DECLARE_MASK_MAP(FS),
70         DECLARE_MASK_MAP(PC),
71 };
72
73 #define S_OPTS  "d:a:A:r:o:kw:vb:n:D:"
74 static struct option l_opts[] = {
75         {
76                 .name = "dev",
77                 .has_arg = required_argument,
78                 .flag = NULL,
79                 .val = 'd'
80         },
81         {
82                 .name = "act-mask",
83                 .has_arg = required_argument,
84                 .flag = NULL,
85                 .val = 'a'
86         },
87         {
88                 .name = "set-mask",
89                 .has_arg = required_argument,
90                 .flag = NULL,
91                 .val = 'A'
92         },
93         {
94                 .name = "relay",
95                 .has_arg = required_argument,
96                 .flag = NULL,
97                 .val = 'r'
98         },
99         {
100                 .name = "output",
101                 .has_arg = required_argument,
102                 .flag = NULL,
103                 .val = 'o'
104         },
105         {
106                 .name = "kill",
107                 .has_arg = no_argument,
108                 .flag = NULL,
109                 .val = 'k'
110         },
111         {
112                 .name = "stopwatch",
113                 .has_arg = required_argument,
114                 .flag = NULL,
115                 .val = 'w'
116         },
117         {
118                 .name = "version",
119                 .has_arg = no_argument,
120                 .flag = NULL,
121                 .val = 'v'
122         },
123         {
124                 .name = "buffer size (in KiB)",
125                 .has_arg = required_argument,
126                 .flag = NULL,
127                 .val = 'b'
128         },
129         {
130                 .name = "nr of sub buffers",
131                 .has_arg = required_argument,
132                 .flag = NULL,
133                 .val = 'n'
134         },
135         {
136                 .name = "output directory",
137                 .has_arg = required_argument,
138                 .flag = NULL,
139                 .val = 'D'
140         },
141 };
142
143 struct thread_information {
144         int cpu;
145         pthread_t thread;
146
147         int fd;
148         char fn[MAXPATHLEN + 64];
149         void *buf;
150         unsigned long buf_offset;
151         unsigned int buf_subbuf;
152         unsigned int sequence;
153
154         pthread_mutex_t *fd_lock;
155         int ofd;
156
157         unsigned long events_processed;
158         struct device_information *device;
159 };
160
161 struct device_information {
162         int fd;
163         char *path;
164         char buts_name[32];
165         int trace_started;
166         struct thread_information *threads;
167 };
168
169 static int ncpus;
170 static struct thread_information *thread_information;
171 static int ndevs;
172 static struct device_information *device_information;
173
174 /* command line option globals */
175 static char *relay_path;
176 static char *output_name;
177 static char *output_dir;
178 static int act_mask = ~0U;
179 static int kill_running_trace;
180 static unsigned int buf_size = BUF_SIZE;
181 static unsigned int buf_nr = BUF_NR;
182
183 #define is_done()       (*(volatile int *)(&done))
184 static volatile int done;
185
186 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
187
188 static void exit_trace(int status);
189
190 static int find_mask_map(char *string)
191 {
192         unsigned int i;
193
194         for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
195                 if (COMPARE_MASK_MAP(&mask_maps[i], string))
196                         return mask_maps[i].mask;
197
198         return -1;
199 }
200
201 static int start_trace(struct device_information *dip)
202 {
203         struct blk_user_trace_setup buts;
204
205         memset(&buts, 0, sizeof(buts));
206         buts.buf_size = buf_size;
207         buts.buf_nr = buf_nr;
208         buts.act_mask = act_mask;
209
210         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
211                 perror("BLKSTARTTRACE");
212                 return 1;
213         }
214
215         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
216         dip->trace_started = 1;
217         return 0;
218 }
219
220 static void stop_trace(struct device_information *dip)
221 {
222         if (dip->trace_started || kill_running_trace) {
223                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
224                         perror("BLKSTOPTRACE");
225                 close(dip->fd);
226                 dip->trace_started = 0;
227         }
228 }
229
230 static void stop_all_traces(void)
231 {
232         struct device_information *dip;
233         int i;
234
235         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
236                 stop_trace(dip);
237 }
238
239 static int read_data(struct thread_information *tip, void *buf, int len)
240 {
241         char *p = buf;
242         int ret, bytes_left = len;
243
244         while (!is_done() && bytes_left > 0) {
245                 ret = read(tip->fd, p, bytes_left);
246                 if (ret == bytes_left)
247                         return 0;
248
249                 if (ret < 0) {
250                         perror(tip->fn);
251                         fprintf(stderr,"Thread %d failed read of %s\n",
252                                 tip->cpu, tip->fn);
253                         break;
254                 } else if (ret > 0) {
255                         p += ret;
256                         bytes_left -= ret;
257                 } else
258                         usleep(1000);
259         }
260
261         return -1;
262 }
263
264 static int write_data(int fd, void *buf, unsigned int buf_len)
265 {
266         int ret, bytes_left;
267         char *p = buf;
268
269         bytes_left = buf_len;
270         while (bytes_left > 0) {
271                 ret = write(fd, p, bytes_left);
272                 if (ret == bytes_left)
273                         break;
274
275                 if (ret < 0) {
276                         perror("write");
277                         return 1;
278                 } else if (ret > 0) {
279                         p += ret;
280                         bytes_left -= ret;
281                 } else {
282                         fprintf(stderr, "Zero write?\n");
283                         return 1;
284                 }
285         }
286
287         return 0;
288 }
289
290 static void *extract_data(struct thread_information *tip, int nb)
291 {
292         unsigned char *buf;
293
294         buf = malloc(nb);
295         if (!read_data(tip, buf, nb))
296                 return buf;
297
298         free(buf);
299         return NULL;
300 }
301
302 /*
303  * trace may start inside 'bit' or may need to be gotten further on
304  */
305 static int get_event_slow(struct thread_information *tip,
306                           struct blk_io_trace *bit)
307 {
308         const int inc = sizeof(__u32);
309         struct blk_io_trace foo;
310         int offset;
311         void *p;
312
313         /*
314          * check is trace is inside
315          */
316         offset = 0;
317         p = bit;
318         while (offset < sizeof(*bit)) {
319                 p += inc;
320                 offset += inc;
321
322                 memcpy(&foo, p, inc);
323
324                 if (CHECK_MAGIC(&foo))
325                         break;
326         }
327
328         /*
329          * part trace found inside, read the rest
330          */
331         if (offset < sizeof(*bit)) {
332                 int good_bytes = sizeof(*bit) - offset;
333
334                 memmove(bit, p, good_bytes);
335                 p = (void *) bit + good_bytes;
336
337                 return read_data(tip, p, offset);
338         }
339
340         /*
341          * nothing found, keep looking for start of trace
342          */
343         do {
344                 if (read_data(tip, bit, sizeof(bit->magic)))
345                         return -1;
346         } while (!CHECK_MAGIC(bit));
347
348         /*
349          * now get the rest of it
350          */
351         p = &bit->sequence;
352         if (!read_data(tip, p, sizeof(*bit) - inc))
353                 return -1;
354
355         return 0;
356 }
357
358 /*
359  * Sometimes relayfs screws us a little, if an event crosses a sub buffer
360  * boundary. So keep looking forward in the trace data until an event
361  * is found
362  */
363 static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
364 {
365         /*
366          * optimize for the common fast case, a full trace read that
367          * succeeds
368          */
369         if (read_data(tip, bit, sizeof(*bit)))
370                 return -1;
371
372         if (CHECK_MAGIC(bit))
373                 return 0;
374
375         /*
376          * ok that didn't work, the event may start somewhere inside the
377          * trace itself
378          */
379         return get_event_slow(tip, bit);
380 }
381
382 static inline void tip_fd_unlock(struct thread_information *tip)
383 {
384         if (tip->fd_lock)
385                 pthread_mutex_unlock(tip->fd_lock);
386 }
387
388 static inline void tip_fd_lock(struct thread_information *tip)
389 {
390         if (tip->fd_lock)
391                 pthread_mutex_lock(tip->fd_lock);
392 }
393
394 static void *extract(void *arg)
395 {
396         struct thread_information *tip = arg;
397         int pdu_len;
398         char *pdu_data;
399         struct blk_io_trace t;
400         pid_t pid = getpid();
401         cpu_set_t cpu_mask;
402
403         CPU_ZERO(&cpu_mask);
404         CPU_SET((tip->cpu), &cpu_mask);
405
406         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
407                 perror("sched_setaffinity");
408                 exit_trace(1);
409         }
410
411         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
412                         relay_path, tip->device->buts_name, tip->cpu);
413         tip->fd = open(tip->fn, O_RDONLY);
414         if (tip->fd < 0) {
415                 perror(tip->fn);
416                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
417                         tip->fn);
418                 exit_trace(1);
419         }
420
421         pdu_data = NULL;
422         while (!is_done()) {
423                 if (get_event(tip, &t))
424                         break;
425
426                 if (verify_trace(&t))
427                         break;
428
429                 pdu_len = t.pdu_len;
430
431                 trace_to_be(&t);
432
433                 if (pdu_len) {
434                         pdu_data = extract_data(tip, pdu_len);
435                         if (!pdu_data)
436                                 break;
437                 }
438
439                 /*
440                  * now we have both trace and payload, get a lock on the
441                  * output descriptor and send it off
442                  */
443                 tip_fd_lock(tip);
444
445                 if (write_data(tip->ofd, &t, sizeof(t))) {
446                         tip_fd_unlock(tip);
447                         break;
448                 }
449
450                 if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
451                         tip_fd_unlock(tip);
452                         break;
453                 }
454
455                 tip_fd_unlock(tip);
456
457                 if (pdu_data) {
458                         free(pdu_data);
459                         pdu_data = NULL;
460                 }
461
462                 tip->events_processed++;
463         }
464
465         exit_trace(1);
466         return NULL;
467 }
468
469 static int start_threads(struct device_information *dip)
470 {
471         struct thread_information *tip;
472         char op[64];
473         int j, pipeline = output_name && !strcmp(output_name, "-");
474         int len;
475
476         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
477                 tip->cpu = j;
478                 tip->device = dip;
479                 tip->fd_lock = NULL;
480                 tip->events_processed = 0;
481
482                 if (pipeline) {
483                         tip->ofd = dup(STDOUT_FILENO);
484                         tip->fd_lock = &stdout_mutex;
485                 } else {
486                         len = 0;
487
488                         if (output_dir)
489                                 len = sprintf(op, "%s/", output_dir);
490
491                         if (output_name) {
492                                 sprintf(op + len, "%s.blktrace.%d", output_name,
493                                         tip->cpu);
494                         } else {
495                                 sprintf(op + len, "%s.blktrace.%d",
496                                         dip->buts_name, tip->cpu);
497                         }
498                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
499                 }
500
501                 if (tip->ofd < 0) {
502                         perror(op);
503                         return 1;
504                 }
505
506                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
507                         perror("pthread_create");
508                         close(tip->ofd);
509                         return 1;
510                 }
511         }
512
513         return 0;
514 }
515
516 static void close_thread(struct thread_information *tip)
517 {
518         if (tip->fd != -1)
519                 close(tip->fd);
520         if (tip->ofd != -1)
521                 close(tip->ofd);
522
523         tip->fd = tip->ofd = -1;
524 }
525
526 static void stop_threads(struct device_information *dip)
527 {
528         struct thread_information *tip;
529         long ret;
530         int j;
531
532         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
533                 if (pthread_join(tip->thread, (void *) &ret))
534                         perror("thread_join");
535                 close_thread(tip);
536         }
537 }
538
539 static void stop_all_threads(void)
540 {
541         struct device_information *dip;
542         int i;
543
544         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
545                 stop_threads(dip);
546 }
547
548 static void stop_all_tracing(void)
549 {
550         struct device_information *dip;
551         struct thread_information *tip;
552         int i, j;
553
554         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
555                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
556                         close_thread(tip);
557                 stop_trace(dip);
558         }
559 }
560
561 static void exit_trace(int status)
562 {
563         stop_all_tracing();
564         exit(status);
565 }
566
567 static int resize_devices(char *path)
568 {
569         int size = (ndevs + 1) * sizeof(struct device_information);
570
571         device_information = realloc(device_information, size);
572         if (!device_information) {
573                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
574                 return 1;
575         }
576         device_information[ndevs].path = path;
577         ndevs++;
578         return 0;
579 }
580
581 static int open_devices(void)
582 {
583         struct device_information *dip;
584         int i;
585
586         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
587                 dip->fd = open(dip->path, O_RDONLY);
588                 if (dip->fd < 0) {
589                         perror(dip->path);
590                         return 1;
591                 }
592         }
593         return 0;
594 }
595
596 static int start_devices(void)
597 {
598         struct device_information *dip;
599         int i, j, size;
600
601         size = ncpus * sizeof(struct thread_information);
602         thread_information = malloc(size * ndevs);
603         if (!thread_information) {
604                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
605                 return 1;
606         }
607
608         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
609                 if (start_trace(dip)) {
610                         close(dip->fd);
611                         fprintf(stderr, "Failed to start trace on %s\n",
612                                 dip->path);
613                         break;
614                 }
615         }
616         if (i != ndevs) {
617                 for (dip = device_information, j = 0; j < i; j++, dip++)
618                         stop_trace(dip);
619                 return 1;
620         }
621
622         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
623                 dip->threads = thread_information + (i * ncpus);
624                 if (start_threads(dip)) {
625                         fprintf(stderr, "Failed to start worker threads\n");
626                         break;
627                 }
628         }
629         if (i != ndevs) {
630                 for (dip = device_information, j = 0; j < i; j++, dip++)
631                         stop_threads(dip);
632                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
633                         stop_trace(dip);
634                 return 1;
635         }
636
637         return 0;
638 }
639
640 static void show_stats(void)
641 {
642         int i, j;
643         struct device_information *dip;
644         struct thread_information *tip;
645         unsigned long long events_processed;
646
647         if (output_name && !strcmp(output_name, "-"))
648                 return;
649
650         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
651                 printf("Device: %s\n", dip->path);
652                 events_processed = 0;
653                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
654                         printf("  CPU%3d: %20ld events\n",
655                                tip->cpu, tip->events_processed);
656                         events_processed += tip->events_processed;
657                 }
658                 printf("  Total:  %20lld events\n", events_processed);
659         }
660 }
661
662 static char usage_str[] = \
663         "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
664         "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
665         "\t-d Use specified device. May also be given last after options\n" \
666         "\t-r Path to mounted relayfs, defaults to /relay\n" \
667         "\t-o File(s) to send output to\n" \
668         "\t-D Directory to prepend to output file names\n" \
669         "\t-k Kill a running trace\n" \
670         "\t-w Stop after defined time, in seconds\n" \
671         "\t-a Only trace specified actions. See documentation\n" \
672         "\t-A Give trace mask as a single value. See documentation\n" \
673         "\t-b Sub buffer size in KiB\n" \
674         "\t-n Number of sub buffers\n" \
675         "\t-v Print program version info\n\n";
676
677 static void show_usage(char *program)
678 {
679         fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
680 }
681
682 static void handle_sigint(__attribute__((__unused__)) int sig)
683 {
684         done = 1;
685 }
686
687 int main(int argc, char *argv[])
688 {
689         static char default_relay_path[] = "/relay";
690         struct statfs st;
691         int i, c;
692         int stop_watch = 0;
693         int act_mask_tmp = 0;
694
695         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
696                 switch (c) {
697                 case 'a':
698                         i = find_mask_map(optarg);
699                         if (i < 0) {
700                                 fprintf(stderr,"Invalid action mask %s\n",
701                                         optarg);
702                                 return 1;
703                         }
704                         act_mask_tmp |= i;
705                         break;
706
707                 case 'A':
708                         if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
709                                 fprintf(stderr,
710                                         "Invalid set action mask %s/0x%x\n",
711                                         optarg, i);
712                                 return 1;
713                         }
714                         act_mask_tmp = i;
715                         break;
716
717                 case 'd':
718                         if (resize_devices(optarg) != 0)
719                                 return 1;
720                         break;
721
722                 case 'r':
723                         relay_path = optarg;
724                         break;
725
726                 case 'o':
727                         output_name = optarg;
728                         break;
729                 case 'k':
730                         kill_running_trace = 1;
731                         break;
732                 case 'w':
733                         stop_watch = atoi(optarg);
734                         if (stop_watch <= 0) {
735                                 fprintf(stderr,
736                                         "Invalid stopwatch value (%d secs)\n",
737                                         stop_watch);
738                                 return 1;
739                         }
740                         break;
741                 case 'v':
742                         printf("%s version %s\n", argv[0], blktrace_version);
743                         return 0;
744                 case 'b':
745                         buf_size = atoi(optarg);
746                         if (buf_size <= 0) {
747                                 fprintf(stderr,
748                                         "Invalid buffer size (%d)\n", buf_size);
749                                 return 1;
750                         }
751                         buf_size <<= 10;
752                         break;
753                 case 'n':
754                         buf_nr = atoi(optarg);
755                         if (buf_nr <= 0) {
756                                 fprintf(stderr,
757                                         "Invalid buffer nr (%d)\n", buf_nr);
758                                 return 1;
759                         }
760                         break;
761                 case 'D':
762                         output_dir = optarg;
763                         break;
764                 default:
765                         show_usage(argv[0]);
766                         return 1;
767                 }
768         }
769
770         while (optind < argc) {
771                 if (resize_devices(argv[optind++]) != 0)
772                         return 1;
773         }
774
775         if (ndevs == 0) {
776                 show_usage(argv[0]);
777                 return 1;
778         }
779
780         if (!relay_path)
781                 relay_path = default_relay_path;
782
783         if (act_mask_tmp != 0)
784                 act_mask = act_mask_tmp;
785
786         if (statfs(relay_path, &st) < 0) {
787                 perror("statfs");
788                 fprintf(stderr,"%s does not appear to be a valid path\n",
789                         relay_path);
790                 return 1;
791         } else if (st.f_type != RELAYFS_TYPE) {
792                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
793                         relay_path);
794                 return 1;
795         }
796
797         if (open_devices() != 0)
798                 return 1;
799
800         if (kill_running_trace) {
801                 stop_all_traces();
802                 return 0;
803         }
804
805         setlocale(LC_NUMERIC, "en_US");
806
807         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
808         if (ncpus < 0) {
809                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
810                 return 1;
811         }
812
813         if (start_devices() != 0)
814                 return 1;
815
816         signal(SIGINT, handle_sigint);
817         signal(SIGHUP, handle_sigint);
818         signal(SIGTERM, handle_sigint);
819         signal(SIGALRM, handle_sigint);
820
821         atexit(stop_all_tracing);
822
823         if (stop_watch)
824                 alarm(stop_watch);
825
826         while (!is_done())
827                 sleep(1);
828
829         stop_all_threads();
830         stop_all_traces();
831         show_stats();
832
833         return 0;
834 }
835