[PATCH] Add -v for being more verbose on non-fatal errors
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <sched.h>
35 #include <ctype.h>
36 #include <getopt.h>
37
38 #include "blktrace.h"
39
40 static char blktrace_version[] = "0.90";
41
42 #define BUF_SIZE        (128 *1024)
43 #define BUF_NR          (4)
44
45 #define RELAYFS_TYPE    0xF0B4A981
46
47 #define S_OPTS  "d:a:A:r:o:kw:Vb:n:D:"
48 static struct option l_opts[] = {
49         {
50                 .name = "dev",
51                 .has_arg = required_argument,
52                 .flag = NULL,
53                 .val = 'd'
54         },
55         {
56                 .name = "act-mask",
57                 .has_arg = required_argument,
58                 .flag = NULL,
59                 .val = 'a'
60         },
61         {
62                 .name = "set-mask",
63                 .has_arg = required_argument,
64                 .flag = NULL,
65                 .val = 'A'
66         },
67         {
68                 .name = "relay",
69                 .has_arg = required_argument,
70                 .flag = NULL,
71                 .val = 'r'
72         },
73         {
74                 .name = "output",
75                 .has_arg = required_argument,
76                 .flag = NULL,
77                 .val = 'o'
78         },
79         {
80                 .name = "kill",
81                 .has_arg = no_argument,
82                 .flag = NULL,
83                 .val = 'k'
84         },
85         {
86                 .name = "stopwatch",
87                 .has_arg = required_argument,
88                 .flag = NULL,
89                 .val = 'w'
90         },
91         {
92                 .name = "version",
93                 .has_arg = no_argument,
94                 .flag = NULL,
95                 .val = 'V'
96         },
97         {
98                 .name = "buffer-size",
99                 .has_arg = required_argument,
100                 .flag = NULL,
101                 .val = 'b'
102         },
103         {
104                 .name = "num-sub-buffers",
105                 .has_arg = required_argument,
106                 .flag = NULL,
107                 .val = 'n'
108         },
109         {
110                 .name = "output-dir",
111                 .has_arg = required_argument,
112                 .flag = NULL,
113                 .val = 'D'
114         },
115         {
116                 .name = NULL,
117         }
118 };
119
120 struct thread_information {
121         int cpu;
122         pthread_t thread;
123
124         int fd;
125         char fn[MAXPATHLEN + 64];
126         void *buf;
127         unsigned long buf_offset;
128         unsigned int buf_subbuf;
129         unsigned int sequence;
130
131         pthread_mutex_t *fd_lock;
132         int ofd;
133
134         unsigned long events_processed;
135         struct device_information *device;
136 };
137
138 struct device_information {
139         int fd;
140         char *path;
141         char buts_name[32];
142         int trace_started;
143         struct thread_information *threads;
144 };
145
146 static int ncpus;
147 static struct thread_information *thread_information;
148 static int ndevs;
149 static struct device_information *device_information;
150
151 /* command line option globals */
152 static char *relay_path;
153 static char *output_name;
154 static char *output_dir;
155 static int act_mask = ~0U;
156 static int kill_running_trace;
157 static unsigned int buf_size = BUF_SIZE;
158 static unsigned int buf_nr = BUF_NR;
159
160 #define is_done()       (*(volatile int *)(&done))
161 static volatile int done;
162
163 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
164
165 static void exit_trace(int status);
166
167 static int start_trace(struct device_information *dip)
168 {
169         struct blk_user_trace_setup buts;
170
171         memset(&buts, 0, sizeof(buts));
172         buts.buf_size = buf_size;
173         buts.buf_nr = buf_nr;
174         buts.act_mask = act_mask;
175
176         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
177                 perror("BLKSTARTTRACE");
178                 return 1;
179         }
180
181         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
182         dip->trace_started = 1;
183         return 0;
184 }
185
186 static void stop_trace(struct device_information *dip)
187 {
188         if (dip->trace_started || kill_running_trace) {
189                 dip->trace_started = 0;
190
191                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
192                         perror("BLKSTOPTRACE");
193
194                 close(dip->fd);
195                 dip->fd = -1;
196         }
197 }
198
199 static void stop_all_traces(void)
200 {
201         struct device_information *dip;
202         int i;
203
204         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
205                 stop_trace(dip);
206 }
207
208 static int read_data(struct thread_information *tip, void *buf, int len)
209 {
210         char *p = buf;
211         int ret, bytes_left = len;
212
213         while (!is_done() && bytes_left > 0) {
214                 ret = read(tip->fd, p, bytes_left);
215                 if (ret == bytes_left)
216                         return 0;
217
218                 if (ret < 0) {
219                         perror(tip->fn);
220                         fprintf(stderr,"Thread %d failed read of %s\n",
221                                 tip->cpu, tip->fn);
222                         break;
223                 } else if (ret > 0) {
224                         p += ret;
225                         bytes_left -= ret;
226                 } else
227                         usleep(1000);
228         }
229
230         return -1;
231 }
232
233 static int write_data(int fd, void *buf, unsigned int buf_len)
234 {
235         int ret, bytes_left;
236         char *p = buf;
237
238         bytes_left = buf_len;
239         while (bytes_left > 0) {
240                 ret = write(fd, p, bytes_left);
241                 if (ret == bytes_left)
242                         break;
243
244                 if (ret < 0) {
245                         perror("write");
246                         return 1;
247                 } else if (ret > 0) {
248                         p += ret;
249                         bytes_left -= ret;
250                 } else {
251                         fprintf(stderr, "Zero write?\n");
252                         return 1;
253                 }
254         }
255
256         return 0;
257 }
258
259 static void *extract_data(struct thread_information *tip, int nb)
260 {
261         unsigned char *buf;
262
263         buf = malloc(nb);
264         if (!read_data(tip, buf, nb))
265                 return buf;
266
267         free(buf);
268         return NULL;
269 }
270
271 /*
272  * trace may start inside 'bit' or may need to be gotten further on
273  */
274 static int get_event_slow(struct thread_information *tip,
275                           struct blk_io_trace *bit)
276 {
277         const int inc = sizeof(__u32);
278         struct blk_io_trace foo;
279         unsigned int offset;
280         void *p;
281
282         /*
283          * check is trace is inside
284          */
285         offset = 0;
286         p = bit;
287         while (offset < sizeof(*bit)) {
288                 p += inc;
289                 offset += inc;
290
291                 memcpy(&foo, p, inc);
292
293                 if (CHECK_MAGIC(&foo))
294                         break;
295         }
296
297         /*
298          * part trace found inside, read the rest
299          */
300         if (offset < sizeof(*bit)) {
301                 int good_bytes = sizeof(*bit) - offset;
302
303                 memmove(bit, p, good_bytes);
304                 p = (void *) bit + good_bytes;
305
306                 return read_data(tip, p, offset);
307         }
308
309         /*
310          * nothing found, keep looking for start of trace
311          */
312         do {
313                 if (read_data(tip, bit, sizeof(bit->magic)))
314                         return -1;
315         } while (!CHECK_MAGIC(bit));
316
317         /*
318          * now get the rest of it
319          */
320         p = &bit->sequence;
321         if (!read_data(tip, p, sizeof(*bit) - inc))
322                 return -1;
323
324         return 0;
325 }
326
327 /*
328  * Sometimes relayfs screws us a little, if an event crosses a sub buffer
329  * boundary. So keep looking forward in the trace data until an event
330  * is found
331  */
332 static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
333 {
334         /*
335          * optimize for the common fast case, a full trace read that
336          * succeeds
337          */
338         if (read_data(tip, bit, sizeof(*bit)))
339                 return -1;
340
341         if (CHECK_MAGIC(bit))
342                 return 0;
343
344         /*
345          * ok that didn't work, the event may start somewhere inside the
346          * trace itself
347          */
348         return get_event_slow(tip, bit);
349 }
350
351 static inline void tip_fd_unlock(struct thread_information *tip)
352 {
353         if (tip->fd_lock)
354                 pthread_mutex_unlock(tip->fd_lock);
355 }
356
357 static inline void tip_fd_lock(struct thread_information *tip)
358 {
359         if (tip->fd_lock)
360                 pthread_mutex_lock(tip->fd_lock);
361 }
362
363 static void *extract(void *arg)
364 {
365         struct thread_information *tip = arg;
366         int pdu_len;
367         char *pdu_data;
368         struct blk_io_trace t;
369         pid_t pid = getpid();
370         cpu_set_t cpu_mask;
371
372         CPU_ZERO(&cpu_mask);
373         CPU_SET((tip->cpu), &cpu_mask);
374
375         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
376                 perror("sched_setaffinity");
377                 exit_trace(1);
378         }
379
380         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
381                         relay_path, tip->device->buts_name, tip->cpu);
382         tip->fd = open(tip->fn, O_RDONLY);
383         if (tip->fd < 0) {
384                 perror(tip->fn);
385                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
386                         tip->fn);
387                 exit_trace(1);
388         }
389
390         pdu_data = NULL;
391         while (!is_done()) {
392                 if (get_event(tip, &t))
393                         break;
394
395                 if (verify_trace(&t))
396                         break;
397
398                 pdu_len = t.pdu_len;
399
400                 trace_to_be(&t);
401
402                 if (pdu_len) {
403                         pdu_data = extract_data(tip, pdu_len);
404                         if (!pdu_data)
405                                 break;
406                 }
407
408                 /*
409                  * now we have both trace and payload, get a lock on the
410                  * output descriptor and send it off
411                  */
412                 tip_fd_lock(tip);
413
414                 if (write_data(tip->ofd, &t, sizeof(t))) {
415                         tip_fd_unlock(tip);
416                         break;
417                 }
418
419                 if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
420                         tip_fd_unlock(tip);
421                         break;
422                 }
423
424                 tip_fd_unlock(tip);
425
426                 if (pdu_data) {
427                         free(pdu_data);
428                         pdu_data = NULL;
429                 }
430
431                 tip->events_processed++;
432         }
433
434         exit_trace(1);
435         return NULL;
436 }
437
438 static int start_threads(struct device_information *dip)
439 {
440         struct thread_information *tip;
441         char op[64];
442         int j, pipeline = output_name && !strcmp(output_name, "-");
443         int len;
444
445         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
446                 tip->cpu = j;
447                 tip->device = dip;
448                 tip->fd_lock = NULL;
449                 tip->events_processed = 0;
450
451                 if (pipeline) {
452                         tip->ofd = dup(STDOUT_FILENO);
453                         tip->fd_lock = &stdout_mutex;
454                 } else {
455                         len = 0;
456
457                         if (output_dir)
458                                 len = sprintf(op, "%s/", output_dir);
459
460                         if (output_name) {
461                                 sprintf(op + len, "%s.blktrace.%d", output_name,
462                                         tip->cpu);
463                         } else {
464                                 sprintf(op + len, "%s.blktrace.%d",
465                                         dip->buts_name, tip->cpu);
466                         }
467                         tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
468                 }
469
470                 if (tip->ofd < 0) {
471                         perror(op);
472                         return 1;
473                 }
474
475                 if (pthread_create(&tip->thread, NULL, extract, tip)) {
476                         perror("pthread_create");
477                         close(tip->ofd);
478                         return 1;
479                 }
480         }
481
482         return 0;
483 }
484
485 static void close_thread(struct thread_information *tip)
486 {
487         if (tip->fd != -1)
488                 close(tip->fd);
489         if (tip->ofd != -1)
490                 close(tip->ofd);
491
492         tip->fd = tip->ofd = -1;
493 }
494
495 static void stop_threads(struct device_information *dip)
496 {
497         struct thread_information *tip;
498         long ret;
499         int j;
500
501         for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
502                 if (pthread_join(tip->thread, (void *) &ret))
503                         perror("thread_join");
504                 close_thread(tip);
505         }
506 }
507
508 static void stop_all_threads(void)
509 {
510         struct device_information *dip;
511         int i;
512
513         for (dip = device_information, i = 0; i < ndevs; i++, dip++)
514                 stop_threads(dip);
515 }
516
517 static void stop_all_tracing(void)
518 {
519         struct device_information *dip;
520         struct thread_information *tip;
521         int i, j;
522
523         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
524                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
525                         close_thread(tip);
526                 stop_trace(dip);
527         }
528 }
529
530 static void exit_trace(int status)
531 {
532         stop_all_tracing();
533         exit(status);
534 }
535
536 static int resize_devices(char *path)
537 {
538         int size = (ndevs + 1) * sizeof(struct device_information);
539
540         device_information = realloc(device_information, size);
541         if (!device_information) {
542                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
543                 return 1;
544         }
545         device_information[ndevs].path = path;
546         ndevs++;
547         return 0;
548 }
549
550 static int open_devices(void)
551 {
552         struct device_information *dip;
553         int i;
554
555         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
556                 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
557                 if (dip->fd < 0) {
558                         perror(dip->path);
559                         return 1;
560                 }
561         }
562         return 0;
563 }
564
565 static int start_devices(void)
566 {
567         struct device_information *dip;
568         int i, j, size;
569
570         size = ncpus * sizeof(struct thread_information);
571         thread_information = malloc(size * ndevs);
572         if (!thread_information) {
573                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
574                 return 1;
575         }
576
577         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
578                 if (start_trace(dip)) {
579                         close(dip->fd);
580                         fprintf(stderr, "Failed to start trace on %s\n",
581                                 dip->path);
582                         break;
583                 }
584         }
585         if (i != ndevs) {
586                 for (dip = device_information, j = 0; j < i; j++, dip++)
587                         stop_trace(dip);
588                 return 1;
589         }
590
591         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
592                 dip->threads = thread_information + (i * ncpus);
593                 if (start_threads(dip)) {
594                         fprintf(stderr, "Failed to start worker threads\n");
595                         break;
596                 }
597         }
598         if (i != ndevs) {
599                 for (dip = device_information, j = 0; j < i; j++, dip++)
600                         stop_threads(dip);
601                 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
602                         stop_trace(dip);
603                 return 1;
604         }
605
606         return 0;
607 }
608
609 static void show_stats(void)
610 {
611         int i, j;
612         struct device_information *dip;
613         struct thread_information *tip;
614         unsigned long long events_processed;
615
616         if (output_name && !strcmp(output_name, "-"))
617                 return;
618
619         for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
620                 printf("Device: %s\n", dip->path);
621                 events_processed = 0;
622                 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
623                         printf("  CPU%3d: %20ld events\n",
624                                tip->cpu, tip->events_processed);
625                         events_processed += tip->events_processed;
626                 }
627                 printf("  Total:  %20lld events\n", events_processed);
628         }
629 }
630
631 static char usage_str[] = \
632         "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
633         "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
634         "\t-d Use specified device. May also be given last after options\n" \
635         "\t-r Path to mounted relayfs, defaults to /relay\n" \
636         "\t-o File(s) to send output to\n" \
637         "\t-D Directory to prepend to output file names\n" \
638         "\t-k Kill a running trace\n" \
639         "\t-w Stop after defined time, in seconds\n" \
640         "\t-a Only trace specified actions. See documentation\n" \
641         "\t-A Give trace mask as a single value. See documentation\n" \
642         "\t-b Sub buffer size in KiB\n" \
643         "\t-n Number of sub buffers\n" \
644         "\t-v Print program version info\n\n";
645
646 static void show_usage(char *program)
647 {
648         fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
649 }
650
651 static void handle_sigint(__attribute__((__unused__)) int sig)
652 {
653         done = 1;
654 }
655
656 int main(int argc, char *argv[])
657 {
658         static char default_relay_path[] = "/relay";
659         struct statfs st;
660         int i, c;
661         int stop_watch = 0;
662         int act_mask_tmp = 0;
663
664         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
665                 switch (c) {
666                 case 'a':
667                         i = find_mask_map(optarg);
668                         if (i < 0) {
669                                 fprintf(stderr,"Invalid action mask %s\n",
670                                         optarg);
671                                 return 1;
672                         }
673                         act_mask_tmp |= i;
674                         break;
675
676                 case 'A':
677                         if ((sscanf(optarg, "%x", &i) != 1) || 
678                                                         !valid_act_opt(i)) {
679                                 fprintf(stderr,
680                                         "Invalid set action mask %s/0x%x\n",
681                                         optarg, i);
682                                 return 1;
683                         }
684                         act_mask_tmp = i;
685                         break;
686
687                 case 'd':
688                         if (resize_devices(optarg) != 0)
689                                 return 1;
690                         break;
691
692                 case 'r':
693                         relay_path = optarg;
694                         break;
695
696                 case 'o':
697                         output_name = optarg;
698                         break;
699                 case 'k':
700                         kill_running_trace = 1;
701                         break;
702                 case 'w':
703                         stop_watch = atoi(optarg);
704                         if (stop_watch <= 0) {
705                                 fprintf(stderr,
706                                         "Invalid stopwatch value (%d secs)\n",
707                                         stop_watch);
708                                 return 1;
709                         }
710                         break;
711                 case 'V':
712                         printf("%s version %s\n", argv[0], blktrace_version);
713                         return 0;
714                 case 'b':
715                         buf_size = atoi(optarg);
716                         if (buf_size <= 0 || buf_size > 16*1024) {
717                                 fprintf(stderr,
718                                         "Invalid buffer size (%d)\n", buf_size);
719                                 return 1;
720                         }
721                         buf_size <<= 10;
722                         break;
723                 case 'n':
724                         buf_nr = atoi(optarg);
725                         if (buf_nr <= 0) {
726                                 fprintf(stderr,
727                                         "Invalid buffer nr (%d)\n", buf_nr);
728                                 return 1;
729                         }
730                         break;
731                 case 'D':
732                         output_dir = optarg;
733                         break;
734                 default:
735                         show_usage(argv[0]);
736                         return 1;
737                 }
738         }
739
740         while (optind < argc) {
741                 if (resize_devices(argv[optind++]) != 0)
742                         return 1;
743         }
744
745         if (ndevs == 0) {
746                 show_usage(argv[0]);
747                 return 1;
748         }
749
750         if (!relay_path)
751                 relay_path = default_relay_path;
752
753         if (act_mask_tmp != 0)
754                 act_mask = act_mask_tmp;
755
756         if (statfs(relay_path, &st) < 0) {
757                 perror("statfs");
758                 fprintf(stderr,"%s does not appear to be a valid path\n",
759                         relay_path);
760                 return 1;
761         } else if (st.f_type != (long) RELAYFS_TYPE) {
762                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
763                         relay_path);
764                 return 1;
765         }
766
767         if (open_devices() != 0)
768                 return 1;
769
770         if (kill_running_trace) {
771                 stop_all_traces();
772                 return 0;
773         }
774
775         setlocale(LC_NUMERIC, "en_US");
776
777         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
778         if (ncpus < 0) {
779                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
780                 return 1;
781         }
782
783         if (start_devices() != 0)
784                 return 1;
785
786         signal(SIGINT, handle_sigint);
787         signal(SIGHUP, handle_sigint);
788         signal(SIGTERM, handle_sigint);
789         signal(SIGALRM, handle_sigint);
790
791         atexit(stop_all_tracing);
792
793         if (stop_watch)
794                 alarm(stop_watch);
795
796         while (!is_done())
797                 sleep(1);
798
799         stop_all_threads();
800         stop_all_traces();
801         show_stats();
802
803         return 0;
804 }
805