[PATCH] Remember to terminate options structure
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37
38 #include "blktrace.h"
39
40 static char blktrace_version[] = "0.90";
41
42 #define BUF_SIZE        (128 *1024)
43 #define BUF_NR          (4)
44
45 #define RELAYFS_TYPE    0xF0B4A981
46
47 #define S_OPTS  "d:a:A:r:o:kw:vb:n:D:"
48 static struct option l_opts[] = {
49         {
50                 .name = "dev",
51                 .has_arg = required_argument,
52                 .flag = NULL,
53                 .val = 'd'
54         },
55         {
56                 .name = "act-mask",
57                 .has_arg = required_argument,
58                 .flag = NULL,
59                 .val = 'a'
60         },
61         {
62                 .name = "set-mask",
63                 .has_arg = required_argument,
64                 .flag = NULL,
65                 .val = 'A'
66         },
67         {
68                 .name = "relay",
69                 .has_arg = required_argument,
70                 .flag = NULL,
71                 .val = 'r'
72         },
73         {
74                 .name = "output",
75                 .has_arg = required_argument,
76                 .flag = NULL,
77                 .val = 'o'
78         },
79         {
80                 .name = "kill",
81                 .has_arg = no_argument,
82                 .flag = NULL,
83                 .val = 'k'
84         },
85         {
86                 .name = "stopwatch",
87                 .has_arg = required_argument,
88                 .flag = NULL,
89                 .val = 'w'
90         },
91         {
92                 .name = "version",
93                 .has_arg = no_argument,
94                 .flag = NULL,
95                 .val = 'v'
96         },
97         {
98                 .name = "buffer size (in KiB)",
99                 .has_arg = required_argument,
100                 .flag = NULL,
101                 .val = 'b'
102         },
103         {
104                 .name = "nr of sub buffers",
105                 .has_arg = required_argument,
106                 .flag = NULL,
107                 .val = 'n'
108         },
109         {
110                 .name = "output directory",
111                 .has_arg = required_argument,
112                 .flag = NULL,
113                 .val = 'D'
114         },
115         {
116                 .name = NULL,
117         }
118 };
119
120 struct thread_information {
121         int cpu;
122         pthread_t thread;
123
124         int fd;
125         char fn[MAXPATHLEN + 64];
126         void *buf;
127         unsigned long buf_offset;
128         unsigned int buf_subbuf;
129         unsigned int sequence;
130
131         pthread_mutex_t *fd_lock;
132         int ofd;
133
134         unsigned long events_processed;
135         struct device_information *device;
136 };
137
138 struct device_information {
139         int fd;
140         char *path;
141         char buts_name[32];
142         int trace_started;
143         struct thread_information *threads;
144 };
145
146 static int ncpus;
147 static struct thread_information *thread_information;
148 static int ndevs;
149 static struct device_information *device_information;
150
151 /* command line option globals */
152 static char *relay_path;
153 static char *output_name;
154 static char *output_dir;
155 static int act_mask = ~0U;
156 static int kill_running_trace;
157 static unsigned int buf_size = BUF_SIZE;
158 static unsigned int buf_nr = BUF_NR;
159
160 #define is_done()       (*(volatile int *)(&done))
161 static volatile int done;
162
163 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
164
165 static void exit_trace(int status);
166
167 static int start_trace(struct device_information *dip)
168 {
169         struct blk_user_trace_setup buts;
170
171         memset(&buts, 0, sizeof(buts));
172         buts.buf_size = buf_size;
173         buts.buf_nr = buf_nr;
174         buts.act_mask = act_mask;
175
176         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
177                 perror("BLKSTARTTRACE");
178                 return 1;
179         }
180
181         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
182         dip->trace_started = 1;
183         return 0;
184 }
185
186 static void stop_trace(struct device_information *dip)
187 {
188         if (dip->trace_started || kill_running_trace) {
189                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
190                         perror("BLKSTOPTRACE");
191                 close(dip->fd);
192                 dip->trace_started = 0;
193         }
194 }
195
196 static void stop_all_traces(void)
197 {
198         struct device_information *dip;
199         int i;
200
201         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
202                 stop_trace(dip);
203 }
204
205 static int read_data(struct thread_information *tip, void *buf, int len)
206 {
207         char *p = buf;
208         int ret, bytes_left = len;
209
210         while (!is_done() && bytes_left > 0) {
211                 ret = read(tip->fd, p, bytes_left);
212                 if (ret == bytes_left)
213                         return 0;
214
215                 if (ret < 0) {
216                         perror(tip->fn);
217                         fprintf(stderr,"Thread %d failed read of %s\n",
218                                 tip->cpu, tip->fn);
219                         break;
220                 } else if (ret > 0) {
221                         p += ret;
222                         bytes_left -= ret;
223                 } else
224                         usleep(1000);
225         }
226
227         return -1;
228 }
229
230 static int write_data(int fd, void *buf, unsigned int buf_len)
231 {
232         int ret, bytes_left;
233         char *p = buf;
234
235         bytes_left = buf_len;
236         while (bytes_left > 0) {
237                 ret = write(fd, p, bytes_left);
238                 if (ret == bytes_left)
239                         break;
240
241                 if (ret < 0) {
242                         perror("write");
243                         return 1;
244                 } else if (ret > 0) {
245                         p += ret;
246                         bytes_left -= ret;
247                 } else {
248                         fprintf(stderr, "Zero write?\n");
249                         return 1;
250                 }
251         }
252
253         return 0;
254 }
255
256 static void *extract_data(struct thread_information *tip, int nb)
257 {
258         unsigned char *buf;
259
260         buf = malloc(nb);
261         if (!read_data(tip, buf, nb))
262                 return buf;
263
264         free(buf);
265         return NULL;
266 }
267
268 /*
269  * trace may start inside 'bit' or may need to be gotten further on
270  */
271 static int get_event_slow(struct thread_information *tip,
272                           struct blk_io_trace *bit)
273 {
274         const int inc = sizeof(__u32);
275         struct blk_io_trace foo;
276         int offset;
277         void *p;
278
279         /*
280          * check is trace is inside
281          */
282         offset = 0;
283         p = bit;
284         while (offset < sizeof(*bit)) {
285                 p += inc;
286                 offset += inc;
287
288                 memcpy(&foo, p, inc);
289
290                 if (CHECK_MAGIC(&foo))
291                         break;
292         }
293
294         /*
295          * part trace found inside, read the rest
296          */
297         if (offset < sizeof(*bit)) {
298                 int good_bytes = sizeof(*bit) - offset;
299
300                 memmove(bit, p, good_bytes);
301                 p = (void *) bit + good_bytes;
302
303                 return read_data(tip, p, offset);
304         }
305
306         /*
307          * nothing found, keep looking for start of trace
308          */
309         do {
310                 if (read_data(tip, bit, sizeof(bit->magic)))
311                         return -1;
312         } while (!CHECK_MAGIC(bit));
313
314         /*
315          * now get the rest of it
316          */
317         p = &bit->sequence;
318         if (!read_data(tip, p, sizeof(*bit) - inc))
319                 return -1;
320
321         return 0;
322 }
323
324 /*
325  * Sometimes relayfs screws us a little, if an event crosses a sub buffer
326  * boundary. So keep looking forward in the trace data until an event
327  * is found
328  */
329 static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
330 {
331         /*
332          * optimize for the common fast case, a full trace read that
333          * succeeds
334          */
335         if (read_data(tip, bit, sizeof(*bit)))
336                 return -1;
337
338         if (CHECK_MAGIC(bit))
339                 return 0;
340
341         /*
342          * ok that didn't work, the event may start somewhere inside the
343          * trace itself
344          */
345         return get_event_slow(tip, bit);
346 }
347
348 static inline void tip_fd_unlock(struct thread_information *tip)
349 {
350         if (tip->fd_lock)
351                 pthread_mutex_unlock(tip->fd_lock);
352 }
353
354 static inline void tip_fd_lock(struct thread_information *tip)
355 {
356         if (tip->fd_lock)
357                 pthread_mutex_lock(tip->fd_lock);
358 }
359
360 static void *extract(void *arg)
361 {
362         struct thread_information *tip = arg;
363         int pdu_len;
364         char *pdu_data;
365         struct blk_io_trace t;
366         pid_t pid = getpid();
367         cpu_set_t cpu_mask;
368
369         CPU_ZERO(&cpu_mask);
370         CPU_SET((tip->cpu), &cpu_mask);
371
372         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
373                 perror("sched_setaffinity");
374                 exit_trace(1);
375         }
376
377         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
378                         relay_path, tip->device->buts_name, tip->cpu);
379         tip->fd = open(tip->fn, O_RDONLY);
380         if (tip->fd < 0) {
381                 perror(tip->fn);
382                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
383                         tip->fn);
384                 exit_trace(1);
385         }
386
387         pdu_data = NULL;
388         while (!is_done()) {
389                 if (get_event(tip, &t))
390                         break;
391
392                 if (verify_trace(&t))
393                         break;
394
395                 pdu_len = t.pdu_len;
396
397                 trace_to_be(&t);
398
399                 if (pdu_len) {
400                         pdu_data = extract_data(tip, pdu_len);
401                         if (!pdu_data)
402                                 break;
403                 }
404
405                 /*
406                  * now we have both trace and payload, get a lock on the
407                  * output descriptor and send it off
408                  */
409                 tip_fd_lock(tip);
410
411                 if (write_data(tip->ofd, &t, sizeof(t))) {
412                         tip_fd_unlock(tip);
413                         break;
414                 }
415
416                 if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
417                         tip_fd_unlock(tip);
418                         break;
419                 }
420
421                 tip_fd_unlock(tip);
422
423                 if (pdu_data) {
424                         free(pdu_data);
425                         pdu_data = NULL;
426                 }
427
428                 tip->events_processed++;
429         }
430
431         exit_trace(1);
432         return NULL;
433 }
434
435 static int start_threads(struct device_information *dip)
436 {
437         struct thread_information *tip;
438         char op[64];
439         int j, pipeline = output_name && !strcmp(output_name, "-");
440         int len;
441
442         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
443                 tip->cpu = j;
444                 tip->device = dip;
445                 tip->fd_lock = NULL;
446                 tip->events_processed = 0;
447
448                 if (pipeline) {
449                         tip->ofd = dup(STDOUT_FILENO);
450                         tip->fd_lock = &stdout_mutex;
451                 } else {
452                         len = 0;
453
454                         if (output_dir)
455                                 len = sprintf(op, "%s/", output_dir);
456
457                         if (output_name) {
458                                 sprintf(op + len, "%s.blktrace.%d", output_name,
459                                         tip->cpu);
460                         } else {
461                                 sprintf(op + len, "%s.blktrace.%d",
462                                         dip->buts_name, tip->cpu);
463                         }
464                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
465                 }
466
467                 if (tip->ofd < 0) {
468                         perror(op);
469                         return 1;
470                 }
471
472                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
473                         perror("pthread_create");
474                         close(tip->ofd);
475                         return 1;
476                 }
477         }
478
479         return 0;
480 }
481
482 static void close_thread(struct thread_information *tip)
483 {
484         if (tip->fd != -1)
485                 close(tip->fd);
486         if (tip->ofd != -1)
487                 close(tip->ofd);
488
489         tip->fd = tip->ofd = -1;
490 }
491
492 static void stop_threads(struct device_information *dip)
493 {
494         struct thread_information *tip;
495         long ret;
496         int j;
497
498         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
499                 if (pthread_join(tip->thread, (void *) &ret))
500                         perror("thread_join");
501                 close_thread(tip);
502         }
503 }
504
505 static void stop_all_threads(void)
506 {
507         struct device_information *dip;
508         int i;
509
510         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
511                 stop_threads(dip);
512 }
513
514 static void stop_all_tracing(void)
515 {
516         struct device_information *dip;
517         struct thread_information *tip;
518         int i, j;
519
520         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
521                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
522                         close_thread(tip);
523                 stop_trace(dip);
524         }
525 }
526
527 static void exit_trace(int status)
528 {
529         stop_all_tracing();
530         exit(status);
531 }
532
533 static int resize_devices(char *path)
534 {
535         int size = (ndevs + 1) * sizeof(struct device_information);
536
537         device_information = realloc(device_information, size);
538         if (!device_information) {
539                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
540                 return 1;
541         }
542         device_information[ndevs].path = path;
543         ndevs++;
544         return 0;
545 }
546
547 static int open_devices(void)
548 {
549         struct device_information *dip;
550         int i;
551
552         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
553                 dip->fd = open(dip->path, O_RDONLY);
554                 if (dip->fd < 0) {
555                         perror(dip->path);
556                         return 1;
557                 }
558         }
559         return 0;
560 }
561
562 static int start_devices(void)
563 {
564         struct device_information *dip;
565         int i, j, size;
566
567         size = ncpus * sizeof(struct thread_information);
568         thread_information = malloc(size * ndevs);
569         if (!thread_information) {
570                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
571                 return 1;
572         }
573
574         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
575                 if (start_trace(dip)) {
576                         close(dip->fd);
577                         fprintf(stderr, "Failed to start trace on %s\n",
578                                 dip->path);
579                         break;
580                 }
581         }
582         if (i != ndevs) {
583                 for (dip = device_information, j = 0; j < i; j++, dip++)
584                         stop_trace(dip);
585                 return 1;
586         }
587
588         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
589                 dip->threads = thread_information + (i * ncpus);
590                 if (start_threads(dip)) {
591                         fprintf(stderr, "Failed to start worker threads\n");
592                         break;
593                 }
594         }
595         if (i != ndevs) {
596                 for (dip = device_information, j = 0; j < i; j++, dip++)
597                         stop_threads(dip);
598                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
599                         stop_trace(dip);
600                 return 1;
601         }
602
603         return 0;
604 }
605
606 static void show_stats(void)
607 {
608         int i, j;
609         struct device_information *dip;
610         struct thread_information *tip;
611         unsigned long long events_processed;
612
613         if (output_name && !strcmp(output_name, "-"))
614                 return;
615
616         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
617                 printf("Device: %s\n", dip->path);
618                 events_processed = 0;
619                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
620                         printf("  CPU%3d: %20ld events\n",
621                                tip->cpu, tip->events_processed);
622                         events_processed += tip->events_processed;
623                 }
624                 printf("  Total:  %20lld events\n", events_processed);
625         }
626 }
627
628 static char usage_str[] = \
629         "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
630         "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
631         "\t-d Use specified device. May also be given last after options\n" \
632         "\t-r Path to mounted relayfs, defaults to /relay\n" \
633         "\t-o File(s) to send output to\n" \
634         "\t-D Directory to prepend to output file names\n" \
635         "\t-k Kill a running trace\n" \
636         "\t-w Stop after defined time, in seconds\n" \
637         "\t-a Only trace specified actions. See documentation\n" \
638         "\t-A Give trace mask as a single value. See documentation\n" \
639         "\t-b Sub buffer size in KiB\n" \
640         "\t-n Number of sub buffers\n" \
641         "\t-v Print program version info\n\n";
642
643 static void show_usage(char *program)
644 {
645         fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
646 }
647
648 static void handle_sigint(__attribute__((__unused__)) int sig)
649 {
650         done = 1;
651 }
652
653 int main(int argc, char *argv[])
654 {
655         static char default_relay_path[] = "/relay";
656         struct statfs st;
657         int i, c;
658         int stop_watch = 0;
659         int act_mask_tmp = 0;
660
661         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
662                 switch (c) {
663                 case 'a':
664                         i = find_mask_map(optarg);
665                         if (i < 0) {
666                                 fprintf(stderr,"Invalid action mask %s\n",
667                                         optarg);
668                                 return 1;
669                         }
670                         act_mask_tmp |= i;
671                         break;
672
673                 case 'A':
674                         if ((sscanf(optarg, "%x", &i) != 1) || 
675                                                         !valid_act_opt(i)) {
676                                 fprintf(stderr,
677                                         "Invalid set action mask %s/0x%x\n",
678                                         optarg, i);
679                                 return 1;
680                         }
681                         act_mask_tmp = i;
682                         break;
683
684                 case 'd':
685                         if (resize_devices(optarg) != 0)
686                                 return 1;
687                         break;
688
689                 case 'r':
690                         relay_path = optarg;
691                         break;
692
693                 case 'o':
694                         output_name = optarg;
695                         break;
696                 case 'k':
697                         kill_running_trace = 1;
698                         break;
699                 case 'w':
700                         stop_watch = atoi(optarg);
701                         if (stop_watch <= 0) {
702                                 fprintf(stderr,
703                                         "Invalid stopwatch value (%d secs)\n",
704                                         stop_watch);
705                                 return 1;
706                         }
707                         break;
708                 case 'v':
709                         printf("%s version %s\n", argv[0], blktrace_version);
710                         return 0;
711                 case 'b':
712                         buf_size = atoi(optarg);
713                         if (buf_size <= 0) {
714                                 fprintf(stderr,
715                                         "Invalid buffer size (%d)\n", buf_size);
716                                 return 1;
717                         }
718                         buf_size <<= 10;
719                         break;
720                 case 'n':
721                         buf_nr = atoi(optarg);
722                         if (buf_nr <= 0) {
723                                 fprintf(stderr,
724                                         "Invalid buffer nr (%d)\n", buf_nr);
725                                 return 1;
726                         }
727                         break;
728                 case 'D':
729                         output_dir = optarg;
730                         break;
731                 default:
732                         show_usage(argv[0]);
733                         return 1;
734                 }
735         }
736
737         while (optind < argc) {
738                 if (resize_devices(argv[optind++]) != 0)
739                         return 1;
740         }
741
742         if (ndevs == 0) {
743                 show_usage(argv[0]);
744                 return 1;
745         }
746
747         if (!relay_path)
748                 relay_path = default_relay_path;
749
750         if (act_mask_tmp != 0)
751                 act_mask = act_mask_tmp;
752
753         if (statfs(relay_path, &st) < 0) {
754                 perror("statfs");
755                 fprintf(stderr,"%s does not appear to be a valid path\n",
756                         relay_path);
757                 return 1;
758         } else if (st.f_type != RELAYFS_TYPE) {
759                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
760                         relay_path);
761                 return 1;
762         }
763
764         if (open_devices() != 0)
765                 return 1;
766
767         if (kill_running_trace) {
768                 stop_all_traces();
769                 return 0;
770         }
771
772         setlocale(LC_NUMERIC, "en_US");
773
774         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
775         if (ncpus < 0) {
776                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
777                 return 1;
778         }
779
780         if (start_devices() != 0)
781                 return 1;
782
783         signal(SIGINT, handle_sigint);
784         signal(SIGHUP, handle_sigint);
785         signal(SIGTERM, handle_sigint);
786         signal(SIGALRM, handle_sigint);
787
788         atexit(stop_all_tracing);
789
790         if (stop_watch)
791                 alarm(stop_watch);
792
793         while (!is_done())
794                 sleep(1);
795
796         stop_all_threads();
797         stop_all_traces();
798         show_stats();
799
800         return 0;
801 }
802