[PATCH] blktrace: fix a few -W warnings
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37
38 #include "blktrace.h"
39
40 static char blktrace_version[] = "0.90";
41
42 #define BUF_SIZE        (128 *1024)
43 #define BUF_NR          (4)
44
45 #define RELAYFS_TYPE    0xF0B4A981
46
47 #define S_OPTS  "d:a:A:r:o:kw:vb:n:D:"
48 static struct option l_opts[] = {
49         {
50                 .name = "dev",
51                 .has_arg = required_argument,
52                 .flag = NULL,
53                 .val = 'd'
54         },
55         {
56                 .name = "act-mask",
57                 .has_arg = required_argument,
58                 .flag = NULL,
59                 .val = 'a'
60         },
61         {
62                 .name = "set-mask",
63                 .has_arg = required_argument,
64                 .flag = NULL,
65                 .val = 'A'
66         },
67         {
68                 .name = "relay",
69                 .has_arg = required_argument,
70                 .flag = NULL,
71                 .val = 'r'
72         },
73         {
74                 .name = "output",
75                 .has_arg = required_argument,
76                 .flag = NULL,
77                 .val = 'o'
78         },
79         {
80                 .name = "kill",
81                 .has_arg = no_argument,
82                 .flag = NULL,
83                 .val = 'k'
84         },
85         {
86                 .name = "stopwatch",
87                 .has_arg = required_argument,
88                 .flag = NULL,
89                 .val = 'w'
90         },
91         {
92                 .name = "version",
93                 .has_arg = no_argument,
94                 .flag = NULL,
95                 .val = 'v'
96         },
97         {
98                 .name = "buffer size (in KiB)",
99                 .has_arg = required_argument,
100                 .flag = NULL,
101                 .val = 'b'
102         },
103         {
104                 .name = "nr of sub buffers",
105                 .has_arg = required_argument,
106                 .flag = NULL,
107                 .val = 'n'
108         },
109         {
110                 .name = "output directory",
111                 .has_arg = required_argument,
112                 .flag = NULL,
113                 .val = 'D'
114         },
115 };
116
117 struct thread_information {
118         int cpu;
119         pthread_t thread;
120
121         int fd;
122         char fn[MAXPATHLEN + 64];
123         void *buf;
124         unsigned long buf_offset;
125         unsigned int buf_subbuf;
126         unsigned int sequence;
127
128         pthread_mutex_t *fd_lock;
129         int ofd;
130
131         unsigned long events_processed;
132         struct device_information *device;
133 };
134
135 struct device_information {
136         int fd;
137         char *path;
138         char buts_name[32];
139         int trace_started;
140         struct thread_information *threads;
141 };
142
143 static int ncpus;
144 static struct thread_information *thread_information;
145 static int ndevs;
146 static struct device_information *device_information;
147
148 /* command line option globals */
149 static char *relay_path;
150 static char *output_name;
151 static char *output_dir;
152 static int act_mask = ~0U;
153 static int kill_running_trace;
154 static unsigned int buf_size = BUF_SIZE;
155 static unsigned int buf_nr = BUF_NR;
156
157 #define is_done()       (*(volatile int *)(&done))
158 static volatile int done;
159
160 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
161
162 static void exit_trace(int status);
163
164 static int start_trace(struct device_information *dip)
165 {
166         struct blk_user_trace_setup buts;
167
168         memset(&buts, 0, sizeof(buts));
169         buts.buf_size = buf_size;
170         buts.buf_nr = buf_nr;
171         buts.act_mask = act_mask;
172
173         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
174                 perror("BLKSTARTTRACE");
175                 return 1;
176         }
177
178         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
179         dip->trace_started = 1;
180         return 0;
181 }
182
183 static void stop_trace(struct device_information *dip)
184 {
185         if (dip->trace_started || kill_running_trace) {
186                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
187                         perror("BLKSTOPTRACE");
188                 close(dip->fd);
189                 dip->trace_started = 0;
190         }
191 }
192
193 static void stop_all_traces(void)
194 {
195         struct device_information *dip;
196         int i;
197
198         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
199                 stop_trace(dip);
200 }
201
202 static int read_data(struct thread_information *tip, void *buf, int len)
203 {
204         char *p = buf;
205         int ret, bytes_left = len;
206
207         while (!is_done() && bytes_left > 0) {
208                 ret = read(tip->fd, p, bytes_left);
209                 if (ret == bytes_left)
210                         return 0;
211
212                 if (ret < 0) {
213                         perror(tip->fn);
214                         fprintf(stderr,"Thread %d failed read of %s\n",
215                                 tip->cpu, tip->fn);
216                         break;
217                 } else if (ret > 0) {
218                         p += ret;
219                         bytes_left -= ret;
220                 } else
221                         usleep(1000);
222         }
223
224         return -1;
225 }
226
227 static int write_data(int fd, void *buf, unsigned int buf_len)
228 {
229         int ret, bytes_left;
230         char *p = buf;
231
232         bytes_left = buf_len;
233         while (bytes_left > 0) {
234                 ret = write(fd, p, bytes_left);
235                 if (ret == bytes_left)
236                         break;
237
238                 if (ret < 0) {
239                         perror("write");
240                         return 1;
241                 } else if (ret > 0) {
242                         p += ret;
243                         bytes_left -= ret;
244                 } else {
245                         fprintf(stderr, "Zero write?\n");
246                         return 1;
247                 }
248         }
249
250         return 0;
251 }
252
253 static void *extract_data(struct thread_information *tip, int nb)
254 {
255         unsigned char *buf;
256
257         buf = malloc(nb);
258         if (!read_data(tip, buf, nb))
259                 return buf;
260
261         free(buf);
262         return NULL;
263 }
264
265 /*
266  * trace may start inside 'bit' or may need to be gotten further on
267  */
268 static int get_event_slow(struct thread_information *tip,
269                           struct blk_io_trace *bit)
270 {
271         const int inc = sizeof(__u32);
272         struct blk_io_trace foo;
273         unsigned int offset;
274         void *p;
275
276         /*
277          * check is trace is inside
278          */
279         offset = 0;
280         p = bit;
281         while (offset < sizeof(*bit)) {
282                 p += inc;
283                 offset += inc;
284
285                 memcpy(&foo, p, inc);
286
287                 if (CHECK_MAGIC(&foo))
288                         break;
289         }
290
291         /*
292          * part trace found inside, read the rest
293          */
294         if (offset < sizeof(*bit)) {
295                 int good_bytes = sizeof(*bit) - offset;
296
297                 memmove(bit, p, good_bytes);
298                 p = (void *) bit + good_bytes;
299
300                 return read_data(tip, p, offset);
301         }
302
303         /*
304          * nothing found, keep looking for start of trace
305          */
306         do {
307                 if (read_data(tip, bit, sizeof(bit->magic)))
308                         return -1;
309         } while (!CHECK_MAGIC(bit));
310
311         /*
312          * now get the rest of it
313          */
314         p = &bit->sequence;
315         if (!read_data(tip, p, sizeof(*bit) - inc))
316                 return -1;
317
318         return 0;
319 }
320
321 /*
322  * Sometimes relayfs screws us a little, if an event crosses a sub buffer
323  * boundary. So keep looking forward in the trace data until an event
324  * is found
325  */
326 static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
327 {
328         /*
329          * optimize for the common fast case, a full trace read that
330          * succeeds
331          */
332         if (read_data(tip, bit, sizeof(*bit)))
333                 return -1;
334
335         if (CHECK_MAGIC(bit))
336                 return 0;
337
338         /*
339          * ok that didn't work, the event may start somewhere inside the
340          * trace itself
341          */
342         return get_event_slow(tip, bit);
343 }
344
345 static inline void tip_fd_unlock(struct thread_information *tip)
346 {
347         if (tip->fd_lock)
348                 pthread_mutex_unlock(tip->fd_lock);
349 }
350
351 static inline void tip_fd_lock(struct thread_information *tip)
352 {
353         if (tip->fd_lock)
354                 pthread_mutex_lock(tip->fd_lock);
355 }
356
357 static void *extract(void *arg)
358 {
359         struct thread_information *tip = arg;
360         int pdu_len;
361         char *pdu_data;
362         struct blk_io_trace t;
363         pid_t pid = getpid();
364         cpu_set_t cpu_mask;
365
366         CPU_ZERO(&cpu_mask);
367         CPU_SET((tip->cpu), &cpu_mask);
368
369         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
370                 perror("sched_setaffinity");
371                 exit_trace(1);
372         }
373
374         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
375                         relay_path, tip->device->buts_name, tip->cpu);
376         tip->fd = open(tip->fn, O_RDONLY);
377         if (tip->fd < 0) {
378                 perror(tip->fn);
379                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
380                         tip->fn);
381                 exit_trace(1);
382         }
383
384         pdu_data = NULL;
385         while (!is_done()) {
386                 if (get_event(tip, &t))
387                         break;
388
389                 if (verify_trace(&t))
390                         break;
391
392                 pdu_len = t.pdu_len;
393
394                 trace_to_be(&t);
395
396                 if (pdu_len) {
397                         pdu_data = extract_data(tip, pdu_len);
398                         if (!pdu_data)
399                                 break;
400                 }
401
402                 /*
403                  * now we have both trace and payload, get a lock on the
404                  * output descriptor and send it off
405                  */
406                 tip_fd_lock(tip);
407
408                 if (write_data(tip->ofd, &t, sizeof(t))) {
409                         tip_fd_unlock(tip);
410                         break;
411                 }
412
413                 if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
414                         tip_fd_unlock(tip);
415                         break;
416                 }
417
418                 tip_fd_unlock(tip);
419
420                 if (pdu_data) {
421                         free(pdu_data);
422                         pdu_data = NULL;
423                 }
424
425                 tip->events_processed++;
426         }
427
428         exit_trace(1);
429         return NULL;
430 }
431
432 static int start_threads(struct device_information *dip)
433 {
434         struct thread_information *tip;
435         char op[64];
436         int j, pipeline = output_name && !strcmp(output_name, "-");
437         int len;
438
439         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
440                 tip->cpu = j;
441                 tip->device = dip;
442                 tip->fd_lock = NULL;
443                 tip->events_processed = 0;
444
445                 if (pipeline) {
446                         tip->ofd = dup(STDOUT_FILENO);
447                         tip->fd_lock = &stdout_mutex;
448                 } else {
449                         len = 0;
450
451                         if (output_dir)
452                                 len = sprintf(op, "%s/", output_dir);
453
454                         if (output_name) {
455                                 sprintf(op + len, "%s.blktrace.%d", output_name,
456                                         tip->cpu);
457                         } else {
458                                 sprintf(op + len, "%s.blktrace.%d",
459                                         dip->buts_name, tip->cpu);
460                         }
461                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
462                 }
463
464                 if (tip->ofd < 0) {
465                         perror(op);
466                         return 1;
467                 }
468
469                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
470                         perror("pthread_create");
471                         close(tip->ofd);
472                         return 1;
473                 }
474         }
475
476         return 0;
477 }
478
479 static void close_thread(struct thread_information *tip)
480 {
481         if (tip->fd != -1)
482                 close(tip->fd);
483         if (tip->ofd != -1)
484                 close(tip->ofd);
485
486         tip->fd = tip->ofd = -1;
487 }
488
489 static void stop_threads(struct device_information *dip)
490 {
491         struct thread_information *tip;
492         long ret;
493         int j;
494
495         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
496                 if (pthread_join(tip->thread, (void *) &ret))
497                         perror("thread_join");
498                 close_thread(tip);
499         }
500 }
501
502 static void stop_all_threads(void)
503 {
504         struct device_information *dip;
505         int i;
506
507         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
508                 stop_threads(dip);
509 }
510
511 static void stop_all_tracing(void)
512 {
513         struct device_information *dip;
514         struct thread_information *tip;
515         int i, j;
516
517         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
518                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
519                         close_thread(tip);
520                 stop_trace(dip);
521         }
522 }
523
524 static void exit_trace(int status)
525 {
526         stop_all_tracing();
527         exit(status);
528 }
529
530 static int resize_devices(char *path)
531 {
532         int size = (ndevs + 1) * sizeof(struct device_information);
533
534         device_information = realloc(device_information, size);
535         if (!device_information) {
536                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
537                 return 1;
538         }
539         device_information[ndevs].path = path;
540         ndevs++;
541         return 0;
542 }
543
544 static int open_devices(void)
545 {
546         struct device_information *dip;
547         int i;
548
549         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
550                 dip->fd = open(dip->path, O_RDONLY);
551                 if (dip->fd < 0) {
552                         perror(dip->path);
553                         return 1;
554                 }
555         }
556         return 0;
557 }
558
559 static int start_devices(void)
560 {
561         struct device_information *dip;
562         int i, j, size;
563
564         size = ncpus * sizeof(struct thread_information);
565         thread_information = malloc(size * ndevs);
566         if (!thread_information) {
567                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
568                 return 1;
569         }
570
571         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
572                 if (start_trace(dip)) {
573                         close(dip->fd);
574                         fprintf(stderr, "Failed to start trace on %s\n",
575                                 dip->path);
576                         break;
577                 }
578         }
579         if (i != ndevs) {
580                 for (dip = device_information, j = 0; j < i; j++, dip++)
581                         stop_trace(dip);
582                 return 1;
583         }
584
585         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
586                 dip->threads = thread_information + (i * ncpus);
587                 if (start_threads(dip)) {
588                         fprintf(stderr, "Failed to start worker threads\n");
589                         break;
590                 }
591         }
592         if (i != ndevs) {
593                 for (dip = device_information, j = 0; j < i; j++, dip++)
594                         stop_threads(dip);
595                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
596                         stop_trace(dip);
597                 return 1;
598         }
599
600         return 0;
601 }
602
603 static void show_stats(void)
604 {
605         int i, j;
606         struct device_information *dip;
607         struct thread_information *tip;
608         unsigned long long events_processed;
609
610         if (output_name && !strcmp(output_name, "-"))
611                 return;
612
613         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
614                 printf("Device: %s\n", dip->path);
615                 events_processed = 0;
616                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
617                         printf("  CPU%3d: %20ld events\n",
618                                tip->cpu, tip->events_processed);
619                         events_processed += tip->events_processed;
620                 }
621                 printf("  Total:  %20lld events\n", events_processed);
622         }
623 }
624
625 static char usage_str[] = \
626         "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
627         "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
628         "\t-d Use specified device. May also be given last after options\n" \
629         "\t-r Path to mounted relayfs, defaults to /relay\n" \
630         "\t-o File(s) to send output to\n" \
631         "\t-D Directory to prepend to output file names\n" \
632         "\t-k Kill a running trace\n" \
633         "\t-w Stop after defined time, in seconds\n" \
634         "\t-a Only trace specified actions. See documentation\n" \
635         "\t-A Give trace mask as a single value. See documentation\n" \
636         "\t-b Sub buffer size in KiB\n" \
637         "\t-n Number of sub buffers\n" \
638         "\t-v Print program version info\n\n";
639
640 static void show_usage(char *program)
641 {
642         fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
643 }
644
645 static void handle_sigint(__attribute__((__unused__)) int sig)
646 {
647         done = 1;
648 }
649
650 int main(int argc, char *argv[])
651 {
652         static char default_relay_path[] = "/relay";
653         struct statfs st;
654         int i, c;
655         int stop_watch = 0;
656         int act_mask_tmp = 0;
657
658         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
659                 switch (c) {
660                 case 'a':
661                         i = find_mask_map(optarg);
662                         if (i < 0) {
663                                 fprintf(stderr,"Invalid action mask %s\n",
664                                         optarg);
665                                 return 1;
666                         }
667                         act_mask_tmp |= i;
668                         break;
669
670                 case 'A':
671                         if ((sscanf(optarg, "%x", &i) != 1) || 
672                                                         !valid_act_opt(i)) {
673                                 fprintf(stderr,
674                                         "Invalid set action mask %s/0x%x\n",
675                                         optarg, i);
676                                 return 1;
677                         }
678                         act_mask_tmp = i;
679                         break;
680
681                 case 'd':
682                         if (resize_devices(optarg) != 0)
683                                 return 1;
684                         break;
685
686                 case 'r':
687                         relay_path = optarg;
688                         break;
689
690                 case 'o':
691                         output_name = optarg;
692                         break;
693                 case 'k':
694                         kill_running_trace = 1;
695                         break;
696                 case 'w':
697                         stop_watch = atoi(optarg);
698                         if (stop_watch <= 0) {
699                                 fprintf(stderr,
700                                         "Invalid stopwatch value (%d secs)\n",
701                                         stop_watch);
702                                 return 1;
703                         }
704                         break;
705                 case 'v':
706                         printf("%s version %s\n", argv[0], blktrace_version);
707                         return 0;
708                 case 'b':
709                         buf_size = atoi(optarg);
710                         if (buf_size <= 0) {
711                                 fprintf(stderr,
712                                         "Invalid buffer size (%d)\n", buf_size);
713                                 return 1;
714                         }
715                         buf_size <<= 10;
716                         break;
717                 case 'n':
718                         buf_nr = atoi(optarg);
719                         if (buf_nr <= 0) {
720                                 fprintf(stderr,
721                                         "Invalid buffer nr (%d)\n", buf_nr);
722                                 return 1;
723                         }
724                         break;
725                 case 'D':
726                         output_dir = optarg;
727                         break;
728                 default:
729                         show_usage(argv[0]);
730                         return 1;
731                 }
732         }
733
734         while (optind < argc) {
735                 if (resize_devices(argv[optind++]) != 0)
736                         return 1;
737         }
738
739         if (ndevs == 0) {
740                 show_usage(argv[0]);
741                 return 1;
742         }
743
744         if (!relay_path)
745                 relay_path = default_relay_path;
746
747         if (act_mask_tmp != 0)
748                 act_mask = act_mask_tmp;
749
750         if (statfs(relay_path, &st) < 0) {
751                 perror("statfs");
752                 fprintf(stderr,"%s does not appear to be a valid path\n",
753                         relay_path);
754                 return 1;
755         } else if (st.f_type != RELAYFS_TYPE) {
756                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
757                         relay_path);
758                 return 1;
759         }
760
761         if (open_devices() != 0)
762                 return 1;
763
764         if (kill_running_trace) {
765                 stop_all_traces();
766                 return 0;
767         }
768
769         setlocale(LC_NUMERIC, "en_US");
770
771         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
772         if (ncpus < 0) {
773                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
774                 return 1;
775         }
776
777         if (start_devices() != 0)
778                 return 1;
779
780         signal(SIGINT, handle_sigint);
781         signal(SIGHUP, handle_sigint);
782         signal(SIGTERM, handle_sigint);
783         signal(SIGALRM, handle_sigint);
784
785         atexit(stop_all_tracing);
786
787         if (stop_watch)
788                 alarm(stop_watch);
789
790         while (!is_done())
791                 sleep(1);
792
793         stop_all_threads();
794         stop_all_traces();
795         show_stats();
796
797         return 0;
798 }
799