[PATCH] blktrace: remove one user -> kernel copy by mmap'ing output buffers
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <sys/poll.h>
33 #include <sys/mman.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <sched.h>
37 #include <ctype.h>
38 #include <getopt.h>
39 #include <errno.h>
40
41 #include "blktrace.h"
42 #include "barrier.h"
43
44 static char blktrace_version[] = "0.99";
45
46 /*
47  * You may want to increase this even more, if you are logging at a high
48  * rate and see skipped/missed events
49  */
50 #define BUF_SIZE        (512 * 1024)
51 #define BUF_NR          (4)
52
53 #define OFILE_BUF       (128 * 1024)
54
55 #define RELAYFS_TYPE    0xF0B4A981
56
57 #define RING_INIT_NR    (2)
58 #define RING_MAX_NR     (16UL)
59
60 #define S_OPTS  "d:a:A:r:o:kw:Vb:n:D:"
61 static struct option l_opts[] = {
62         {
63                 .name = "dev",
64                 .has_arg = required_argument,
65                 .flag = NULL,
66                 .val = 'd'
67         },
68         {
69                 .name = "act-mask",
70                 .has_arg = required_argument,
71                 .flag = NULL,
72                 .val = 'a'
73         },
74         {
75                 .name = "set-mask",
76                 .has_arg = required_argument,
77                 .flag = NULL,
78                 .val = 'A'
79         },
80         {
81                 .name = "relay",
82                 .has_arg = required_argument,
83                 .flag = NULL,
84                 .val = 'r'
85         },
86         {
87                 .name = "output",
88                 .has_arg = required_argument,
89                 .flag = NULL,
90                 .val = 'o'
91         },
92         {
93                 .name = "kill",
94                 .has_arg = no_argument,
95                 .flag = NULL,
96                 .val = 'k'
97         },
98         {
99                 .name = "stopwatch",
100                 .has_arg = required_argument,
101                 .flag = NULL,
102                 .val = 'w'
103         },
104         {
105                 .name = "version",
106                 .has_arg = no_argument,
107                 .flag = NULL,
108                 .val = 'V'
109         },
110         {
111                 .name = "buffer-size",
112                 .has_arg = required_argument,
113                 .flag = NULL,
114                 .val = 'b'
115         },
116         {
117                 .name = "num-sub-buffers",
118                 .has_arg = required_argument,
119                 .flag = NULL,
120                 .val = 'n'
121         },
122         {
123                 .name = "output-dir",
124                 .has_arg = required_argument,
125                 .flag = NULL,
126                 .val = 'D'
127         },
128         {
129                 .name = NULL,
130         }
131 };
132
133 struct tip_subbuf {
134         void *buf;
135         unsigned int len;
136         unsigned int max_len;
137 };
138
139 #define FIFO_SIZE       (1024)  /* should be plenty big! */
140 #define CL_SIZE         (128)   /* cache line, any bigger? */
141
142 struct tip_subbuf_fifo {
143         int tail __attribute__((aligned(CL_SIZE)));
144         int head __attribute__((aligned(CL_SIZE)));
145         struct tip_subbuf *q[FIFO_SIZE];
146 };
147
148 struct thread_information {
149         int cpu;
150         pthread_t thread;
151
152         int fd;
153         void *fd_buf;
154         char fn[MAXPATHLEN + 64];
155
156         FILE *ofile;
157         char *ofile_buffer;
158         int ofile_stdout;
159
160         unsigned long events_processed;
161         unsigned long long data_read;
162         struct device_information *device;
163
164         int exited;
165
166         /*
167          * piped fifo buffers
168          */
169         struct tip_subbuf_fifo fifo;
170         struct tip_subbuf *leftover_ts;
171
172         /*
173          * mmap controlled output files
174          */
175         unsigned long long fs_size;
176         unsigned long long fs_max_size;
177         unsigned long fs_off;
178         void *fs_buf;
179         unsigned long fs_buf_len;
180 };
181
182 struct device_information {
183         int fd;
184         char *path;
185         char buts_name[32];
186         volatile int trace_started;
187         unsigned long drop_count;
188         struct thread_information *threads;
189 };
190
191 static int ncpus;
192 static struct thread_information *thread_information;
193 static int ndevs;
194 static struct device_information *device_information;
195
196 /* command line option globals */
197 static char *relay_path;
198 static char *output_name;
199 static char *output_dir;
200 static int act_mask = ~0U;
201 static int kill_running_trace;
202 static unsigned long buf_size = BUF_SIZE;
203 static unsigned long buf_nr = BUF_NR;
204 static unsigned int page_size;
205
206 #define is_done()       (*(volatile int *)(&done))
207 static volatile int done;
208
209 #define is_trace_stopped()      (*(volatile int *)(&trace_stopped))
210 static volatile int trace_stopped;
211
212 #define is_stat_shown() (*(volatile int *)(&stat_shown))
213 static volatile int stat_shown;
214
215 static void exit_trace(int status);
216
217 #define dip_tracing(dip)        (*(volatile int *)(&(dip)->trace_started))
218 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
219
220 #define __for_each_dip(__d, __i, __e)   \
221         for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
222
223 #define for_each_dip(__d, __i)  __for_each_dip(__d, __i, ndevs)
224 #define for_each_tip(__d, __t, __j)     \
225         for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
226
227 static int get_dropped_count(const char *buts_name)
228 {
229         int fd;
230         char tmp[MAXPATHLEN + 64];
231
232         snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
233                  relay_path, buts_name);
234
235         fd = open(tmp, O_RDONLY);
236         if (fd < 0) {
237                 /*
238                  * this may be ok, if the kernel doesn't support dropped counts
239                  */
240                 if (errno == ENOENT)
241                         return 0;
242
243                 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
244                 return -1;
245         }
246
247         if (read(fd, tmp, sizeof(tmp)) < 0) {
248                 perror(tmp);
249                 close(fd);
250                 return -1;
251         }
252
253         close(fd);
254
255         return atoi(tmp);
256 }
257
258 static int start_trace(struct device_information *dip)
259 {
260         struct blk_user_trace_setup buts;
261
262         memset(&buts, 0, sizeof(buts));
263         buts.buf_size = buf_size;
264         buts.buf_nr = buf_nr;
265         buts.act_mask = act_mask;
266
267         if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
268                 perror("BLKSTARTTRACE");
269                 return 1;
270         }
271
272         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
273         dip_set_tracing(dip, 1);
274         return 0;
275 }
276
277 static void stop_trace(struct device_information *dip)
278 {
279         if (dip_tracing(dip) || kill_running_trace) {
280                 dip_set_tracing(dip, 0);
281
282                 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
283                         perror("BLKSTOPTRACE");
284
285                 close(dip->fd);
286                 dip->fd = -1;
287         }
288 }
289
290 static void stop_all_traces(void)
291 {
292         struct device_information *dip;
293         int i;
294
295         for_each_dip(dip, i) {
296                 dip->drop_count = get_dropped_count(dip->buts_name);
297                 stop_trace(dip);
298         }
299 }
300
301 static void wait_for_data(struct thread_information *tip)
302 {
303         struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
304
305         do {
306                 poll(&pfd, 1, 100);
307                 if (pfd.revents & POLLIN)
308                         break;
309                 if (tip->ofile_stdout)
310                         break;
311         } while (!is_done());
312 }
313
314 static int read_data(struct thread_information *tip, void *buf, int len)
315 {
316         int ret = 0;
317
318         do {
319                 wait_for_data(tip);
320
321                 ret = read(tip->fd, buf, len);
322                 if (!ret)
323                         continue;
324                 else if (ret > 0)
325                         return ret;
326                 else {
327                         if (errno != EAGAIN) {
328                                 perror(tip->fn);
329                                 fprintf(stderr,"Thread %d failed read of %s\n",
330                                         tip->cpu, tip->fn);
331                                 break;
332                         }
333                         continue;
334                 }
335         } while (!is_done());
336
337         return ret;
338 }
339
340 static inline struct tip_subbuf *subbuf_fifo_dequeue(struct thread_information *tip)
341 {
342         const int head = tip->fifo.head;
343         const int next = (head + 1) & (FIFO_SIZE - 1);
344
345         if (head != tip->fifo.tail) {
346                 struct tip_subbuf *ts = tip->fifo.q[head];
347
348                 store_barrier();
349                 tip->fifo.head = next;
350                 return ts;
351         }
352
353         return NULL;
354 }
355
356 static inline int subbuf_fifo_queue(struct thread_information *tip,
357                                     struct tip_subbuf *ts)
358 {
359         const int tail = tip->fifo.tail;
360         const int next = (tail + 1) & (FIFO_SIZE - 1);
361
362         if (next != tip->fifo.head) {
363                 tip->fifo.q[tail] = ts;
364                 store_barrier();
365                 tip->fifo.tail = next;
366                 return 0;
367         }
368
369         fprintf(stderr, "fifo too small!\n");
370         return 1;
371 }
372
373 /*
374  * For file output, truncate and mmap the file appropriately
375  */
376 static int mmap_subbuf(struct thread_information *tip)
377 {
378         int ofd = fileno(tip->ofile);
379         int ret;
380
381         /*
382          * extend file, if we have to. use chunks of 16 subbuffers.
383          */
384         if (tip->fs_off + buf_size > tip->fs_buf_len) {
385                 if (tip->fs_buf) {
386                         munmap(tip->fs_buf, tip->fs_buf_len);
387                         tip->fs_buf = NULL;
388                 }
389
390                 tip->fs_off = tip->fs_size & (page_size - 1);
391                 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
392                 tip->fs_max_size += tip->fs_buf_len;
393
394                 if (ftruncate(ofd, tip->fs_max_size) < 0) {
395                         perror("ftruncate");
396                         return -1;
397                 }
398
399                 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
400                                    MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
401                 if (tip->fs_buf == MAP_FAILED) {
402                         perror("mmap");
403                         return -1;
404                 }
405         }
406
407         ret = read_data(tip, tip->fs_buf + tip->fs_off, buf_size);
408         if (ret >= 0) {
409                 tip->data_read += ret;
410                 tip->fs_size += ret;
411                 tip->fs_off += ret;
412                 return 0;
413         }
414
415         return -1;
416 }
417
418 /*
419  * Use the copy approach for pipes
420  */
421 static int get_subbuf(struct thread_information *tip)
422 {
423         struct tip_subbuf *ts;
424         int ret;
425
426         ts = malloc(sizeof(*ts));
427         ts->buf = malloc(buf_size);
428         ts->max_len = buf_size;
429
430         ret = read_data(tip, ts->buf, ts->max_len);
431         if (ret > 0) {
432                 ts->len = ret;
433                 return subbuf_fifo_queue(tip, ts);
434         }
435
436         free(ts->buf);
437         free(ts);
438         return ret;
439 }
440
441 static void close_thread(struct thread_information *tip)
442 {
443         if (tip->fd != -1)
444                 close(tip->fd);
445         if (tip->ofile)
446                 fclose(tip->ofile);
447         if (tip->ofile_buffer)
448                 free(tip->ofile_buffer);
449         if (tip->fd_buf)
450                 free(tip->fd_buf);
451
452         tip->fd = -1;
453         tip->ofile = NULL;
454         tip->ofile_buffer = NULL;
455         tip->fd_buf = NULL;
456 }
457
458 static void *thread_main(void *arg)
459 {
460         struct thread_information *tip = arg;
461         pid_t pid = getpid();
462         cpu_set_t cpu_mask;
463
464         CPU_ZERO(&cpu_mask);
465         CPU_SET((tip->cpu), &cpu_mask);
466
467         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
468                 perror("sched_setaffinity");
469                 exit_trace(1);
470         }
471
472         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
473                         relay_path, tip->device->buts_name, tip->cpu);
474         tip->fd = open(tip->fn, O_RDONLY);
475         if (tip->fd < 0) {
476                 perror(tip->fn);
477                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
478                         tip->fn);
479                 exit_trace(1);
480         }
481
482         while (!is_done()) {
483                 if (tip->ofile_stdout) {
484                         if (get_subbuf(tip))
485                                 break;
486                 } else {
487                         if (mmap_subbuf(tip))
488                                 break;
489                 }
490         }
491
492         /*
493          * truncate to right size and cleanup mmap
494          */
495         if (!tip->ofile_stdout) {
496                 int ofd = fileno(tip->ofile);
497
498                 if (tip->fs_buf)
499                         munmap(tip->fs_buf, tip->fs_buf_len);
500
501                 ftruncate(ofd, tip->fs_size);
502         }
503
504         tip->exited = 1;
505         return NULL;
506 }
507
508 static int write_data(struct thread_information *tip,
509                       void *buf, unsigned int buf_len)
510 {
511         int ret;
512
513         if (!buf_len)
514                 return 0;
515
516         while (1) {
517                 ret = fwrite(buf, buf_len, 1, tip->ofile);
518                 if (ret == 1)
519                         break;
520
521                 if (ret < 0) {
522                         perror("write");
523                         return 1;
524                 }
525         }
526
527         if (tip->ofile_stdout)
528                 fflush(tip->ofile);
529
530         return 0;
531 }
532
533 static int flush_subbuf(struct thread_information *tip, struct tip_subbuf *ts)
534 {
535         unsigned int offset = 0;
536         struct blk_io_trace *t;
537         int pdu_len, events = 0;
538
539         /*
540          * surplus from last run
541          */
542         if (tip->leftover_ts) {
543                 struct tip_subbuf *prev_ts = tip->leftover_ts;
544
545                 if (prev_ts->len + ts->len > prev_ts->max_len) {
546                         prev_ts->max_len += ts->len;
547                         prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
548                 }
549
550                 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
551                 prev_ts->len += ts->len;
552
553                 free(ts->buf);
554                 free(ts);
555
556                 ts = prev_ts;
557                 tip->leftover_ts = NULL;
558         }
559
560         while (offset + sizeof(*t) <= ts->len) {
561                 t = ts->buf + offset;
562
563                 if (verify_trace(t)) {
564                         write_data(tip, ts->buf, offset);
565                         return -1;
566                 }
567
568                 pdu_len = t->pdu_len;
569
570                 if (offset + sizeof(*t) + pdu_len > ts->len)
571                         break;
572
573                 offset += sizeof(*t) + pdu_len;
574                 tip->events_processed++;
575                 tip->data_read += sizeof(*t) + pdu_len;
576                 events++;
577         }
578
579         if (write_data(tip, ts->buf, offset))
580                 return -1;
581
582         /*
583          * leftover bytes, save them for next time
584          */
585         if (offset != ts->len) {
586                 tip->leftover_ts = ts;
587                 ts->len -= offset;
588                 memmove(ts->buf, ts->buf + offset, ts->len);
589         } else {
590                 free(ts->buf);
591                 free(ts);
592         }
593
594         return events;
595 }
596
597 static int write_tip_events(struct thread_information *tip)
598 {
599         struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
600
601         if (ts)
602                 return flush_subbuf(tip, ts);
603
604         return 0;
605 }
606
607 /*
608  * scans the tips we know and writes out the subbuffers we accumulate
609  */
610 static void get_and_write_events(void)
611 {
612         struct device_information *dip;
613         struct thread_information *tip;
614         int i, j, events, ret, tips_running;
615
616         while (!is_done()) {
617                 events = 0;
618
619                 for_each_dip(dip, i) {
620                         for_each_tip(dip, tip, j) {
621                                 ret = write_tip_events(tip);
622                                 if (ret > 0)
623                                         events += ret;
624                         }
625                 }
626
627                 if (!events)
628                         usleep(10);
629         }
630
631         /*
632          * reap stored events
633          */
634         do {
635                 events = 0;
636                 tips_running = 0;
637                 for_each_dip(dip, i) {
638                         for_each_tip(dip, tip, j) {
639                                 ret = write_tip_events(tip);
640                                 if (ret > 0)
641                                         events += ret;
642                                 tips_running += !tip->exited;
643                         }
644                 }
645                 usleep(10);
646         } while (events || tips_running);
647 }
648
649 static void wait_for_threads(void)
650 {
651         /*
652          * for piped output, poll and fetch data for writeout. for files,
653          * we just wait around for trace threads to exit
654          */
655         if (output_name && !strcmp(output_name, "-"))
656                 get_and_write_events();
657         else {
658                 struct device_information *dip;
659                 struct thread_information *tip;
660                 int i, j, tips_running;
661
662                 do {
663                         tips_running = 0;
664                         usleep(1000);
665
666                         for_each_dip(dip, i)
667                                 for_each_tip(dip, tip, j)
668                                         tips_running += !tip->exited;
669                 } while (tips_running);
670         }
671 }
672
673 static int start_threads(struct device_information *dip)
674 {
675         struct thread_information *tip;
676         char op[64];
677         int j, pipeline = output_name && !strcmp(output_name, "-");
678         int len, mode, vbuf_size;
679
680         for_each_tip(dip, tip, j) {
681                 tip->cpu = j;
682                 tip->device = dip;
683                 tip->events_processed = 0;
684                 memset(&tip->fifo, 0, sizeof(tip->fifo));
685                 tip->leftover_ts = NULL;
686
687                 if (pipeline) {
688                         tip->ofile = fdopen(STDOUT_FILENO, "w");
689                         tip->ofile_stdout = 1;
690                         mode = _IOLBF;
691                         vbuf_size = 512;
692                 } else {
693                         len = 0;
694
695                         if (output_dir)
696                                 len = sprintf(op, "%s/", output_dir);
697
698                         if (output_name) {
699                                 sprintf(op + len, "%s.blktrace.%d", output_name,
700                                         tip->cpu);
701                         } else {
702                                 sprintf(op + len, "%s.blktrace.%d",
703                                         dip->buts_name, tip->cpu);
704                         }
705                         tip->ofile = fopen(op, "w+");
706                         tip->ofile_stdout = 0;
707                         mode = _IOFBF;
708                         vbuf_size = OFILE_BUF;
709                 }
710
711                 if (tip->ofile == NULL) {
712                         perror(op);
713                         return 1;
714                 }
715
716                 tip->ofile_buffer = malloc(vbuf_size);
717                 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
718                         perror("setvbuf");
719                         close_thread(tip);
720                         return 1;
721                 }
722
723                 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
724                         perror("pthread_create");
725                         close_thread(tip);
726                         return 1;
727                 }
728         }
729
730         return 0;
731 }
732
733 static void stop_threads(struct device_information *dip)
734 {
735         struct thread_information *tip;
736         unsigned long ret;
737         int i;
738
739         for_each_tip(dip, tip, i) {
740                 (void) pthread_join(tip->thread, (void *) &ret);
741                 close_thread(tip);
742         }
743 }
744
745 static void stop_all_threads(void)
746 {
747         struct device_information *dip;
748         int i;
749
750         for_each_dip(dip, i)
751                 stop_threads(dip);
752 }
753
754 static void stop_all_tracing(void)
755 {
756         struct device_information *dip;
757         int i;
758
759         for_each_dip(dip, i)
760                 stop_trace(dip);
761 }
762
763 static void exit_trace(int status)
764 {
765         if (!is_trace_stopped()) {
766                 trace_stopped = 1;
767                 stop_all_threads();
768                 stop_all_tracing();
769         }
770
771         exit(status);
772 }
773
774 static int resize_devices(char *path)
775 {
776         int size = (ndevs + 1) * sizeof(struct device_information);
777
778         device_information = realloc(device_information, size);
779         if (!device_information) {
780                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
781                 return 1;
782         }
783         device_information[ndevs].path = path;
784         ndevs++;
785         return 0;
786 }
787
788 static int open_devices(void)
789 {
790         struct device_information *dip;
791         int i;
792
793         for_each_dip(dip, i) {
794                 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
795                 if (dip->fd < 0) {
796                         perror(dip->path);
797                         return 1;
798                 }
799         }
800
801         return 0;
802 }
803
804 static int start_devices(void)
805 {
806         struct device_information *dip;
807         int i, j, size;
808
809         size = ncpus * sizeof(struct thread_information);
810         thread_information = malloc(size * ndevs);
811         if (!thread_information) {
812                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
813                 return 1;
814         }
815
816         for_each_dip(dip, i) {
817                 if (start_trace(dip)) {
818                         close(dip->fd);
819                         fprintf(stderr, "Failed to start trace on %s\n",
820                                 dip->path);
821                         break;
822                 }
823         }
824
825         if (i != ndevs) {
826                 __for_each_dip(dip, j, i)
827                         stop_trace(dip);
828
829                 return 1;
830         }
831
832         for_each_dip(dip, i) {
833                 dip->threads = thread_information + (i * ncpus);
834                 if (start_threads(dip)) {
835                         fprintf(stderr, "Failed to start worker threads\n");
836                         break;
837                 }
838         }
839
840         if (i != ndevs) {
841                 __for_each_dip(dip, j, i)
842                         stop_threads(dip);
843                 for_each_dip(dip, i)
844                         stop_trace(dip);
845
846                 return 1;
847         }
848
849         return 0;
850 }
851
852 static void show_stats(void)
853 {
854         struct device_information *dip;
855         struct thread_information *tip;
856         unsigned long long events_processed, data_read;
857         unsigned long total_drops;
858         int i, j, no_stdout = 0;
859
860         if (is_stat_shown())
861                 return;
862
863         if (output_name && !strcmp(output_name, "-"))
864                 no_stdout = 1;
865
866         stat_shown = 1;
867
868         total_drops = 0;
869         for_each_dip(dip, i) {
870                 if (!no_stdout)
871                         printf("Device: %s\n", dip->path);
872                 events_processed = 0;
873                 data_read = 0;
874                 for_each_tip(dip, tip, j) {
875                         if (!no_stdout)
876                                 printf("  CPU%3d: %20lu events, %8llu KiB data\n",
877                                         tip->cpu, tip->events_processed,
878                                         tip->data_read >> 10);
879                         events_processed += tip->events_processed;
880                         data_read += tip->data_read;
881                 }
882                 total_drops += dip->drop_count;
883                 if (!no_stdout)
884                         printf("  Total:  %20llu events (dropped %lu), %8llu KiB data\n",
885                                         events_processed, dip->drop_count,
886                                         data_read >> 10);
887         }
888
889         if (total_drops)
890                 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
891 }
892
893 static char usage_str[] = \
894         "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
895         "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
896         "\t-d Use specified device. May also be given last after options\n" \
897         "\t-r Path to mounted relayfs, defaults to /relay\n" \
898         "\t-o File(s) to send output to\n" \
899         "\t-D Directory to prepend to output file names\n" \
900         "\t-k Kill a running trace\n" \
901         "\t-w Stop after defined time, in seconds\n" \
902         "\t-a Only trace specified actions. See documentation\n" \
903         "\t-A Give trace mask as a single value. See documentation\n" \
904         "\t-b Sub buffer size in KiB\n" \
905         "\t-n Number of sub buffers\n" \
906         "\t-v Print program version info\n\n";
907
908 static void show_usage(char *program)
909 {
910         fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
911 }
912 static void handle_sigint(__attribute__((__unused__)) int sig)
913 {
914         done = 1;
915 }
916
917 int main(int argc, char *argv[])
918 {
919         static char default_relay_path[] = "/relay";
920         struct statfs st;
921         int i, c;
922         int stop_watch = 0;
923         int act_mask_tmp = 0;
924
925         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
926                 switch (c) {
927                 case 'a':
928                         i = find_mask_map(optarg);
929                         if (i < 0) {
930                                 fprintf(stderr,"Invalid action mask %s\n",
931                                         optarg);
932                                 return 1;
933                         }
934                         act_mask_tmp |= i;
935                         break;
936
937                 case 'A':
938                         if ((sscanf(optarg, "%x", &i) != 1) || 
939                                                         !valid_act_opt(i)) {
940                                 fprintf(stderr,
941                                         "Invalid set action mask %s/0x%x\n",
942                                         optarg, i);
943                                 return 1;
944                         }
945                         act_mask_tmp = i;
946                         break;
947
948                 case 'd':
949                         if (resize_devices(optarg) != 0)
950                                 return 1;
951                         break;
952
953                 case 'r':
954                         relay_path = optarg;
955                         break;
956
957                 case 'o':
958                         output_name = optarg;
959                         break;
960                 case 'k':
961                         kill_running_trace = 1;
962                         break;
963                 case 'w':
964                         stop_watch = atoi(optarg);
965                         if (stop_watch <= 0) {
966                                 fprintf(stderr,
967                                         "Invalid stopwatch value (%d secs)\n",
968                                         stop_watch);
969                                 return 1;
970                         }
971                         break;
972                 case 'V':
973                         printf("%s version %s\n", argv[0], blktrace_version);
974                         return 0;
975                 case 'b':
976                         buf_size = strtoul(optarg, NULL, 10);
977                         if (buf_size <= 0 || buf_size > 16*1024) {
978                                 fprintf(stderr,
979                                         "Invalid buffer size (%lu)\n",buf_size);
980                                 return 1;
981                         }
982                         buf_size <<= 10;
983                         break;
984                 case 'n':
985                         buf_nr = strtoul(optarg, NULL, 10);
986                         if (buf_nr <= 0) {
987                                 fprintf(stderr,
988                                         "Invalid buffer nr (%lu)\n", buf_nr);
989                                 return 1;
990                         }
991                         break;
992                 case 'D':
993                         output_dir = optarg;
994                         break;
995                 default:
996                         show_usage(argv[0]);
997                         return 1;
998                 }
999         }
1000
1001         while (optind < argc) {
1002                 if (resize_devices(argv[optind++]) != 0)
1003                         return 1;
1004         }
1005
1006         if (ndevs == 0) {
1007                 show_usage(argv[0]);
1008                 return 1;
1009         }
1010
1011         if (!relay_path)
1012                 relay_path = default_relay_path;
1013
1014         if (act_mask_tmp != 0)
1015                 act_mask = act_mask_tmp;
1016
1017         if (statfs(relay_path, &st) < 0) {
1018                 perror("statfs");
1019                 fprintf(stderr,"%s does not appear to be a valid path\n",
1020                         relay_path);
1021                 return 1;
1022         } else if (st.f_type != (long) RELAYFS_TYPE) {
1023                 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
1024                         relay_path);
1025                 return 1;
1026         }
1027
1028         if (open_devices() != 0)
1029                 return 1;
1030
1031         if (kill_running_trace) {
1032                 stop_all_traces();
1033                 return 0;
1034         }
1035
1036         setlocale(LC_NUMERIC, "en_US");
1037
1038         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1039         if (ncpus < 0) {
1040                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
1041                 return 1;
1042         }
1043
1044         page_size = getpagesize();
1045
1046         if (start_devices() != 0)
1047                 return 1;
1048
1049         signal(SIGINT, handle_sigint);
1050         signal(SIGHUP, handle_sigint);
1051         signal(SIGTERM, handle_sigint);
1052         signal(SIGALRM, handle_sigint);
1053
1054         atexit(stop_all_tracing);
1055
1056         if (stop_watch)
1057                 alarm(stop_watch);
1058
1059         wait_for_threads();
1060
1061         if (!is_trace_stopped()) {
1062                 trace_stopped = 1;
1063                 stop_all_threads();
1064                 stop_all_traces();
1065         }
1066
1067         show_stats();
1068
1069         return 0;
1070 }
1071