[PATCH] blktrace: remember to initialize mutex
[blktrace.git] / blktrace.c
index 0fc69ebec1fe056832db690373709296aef7d106..992b4eb9af988005aeb6e1b41f9468b0ca42f296 100644 (file)
 #include <sys/ioctl.h>
 #include <sys/param.h>
 #include <sys/statfs.h>
+#include <sys/poll.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sched.h>
 #include <ctype.h>
 #include <getopt.h>
-#include <sys/mman.h>
+#include <errno.h>
+#include <assert.h>
 
 #include "blktrace.h"
+#include "list.h"
 
-static char blktrace_version[] = "0.90";
+static char blktrace_version[] = "0.99";
 
-#define BUF_SIZE       (128 *1024)
+/*
+ * You may want to increase this even more, if you are logging at a high
+ * rate and see skipped/missed events
+ */
+#define BUF_SIZE       (512 * 1024)
 #define BUF_NR         (4)
 
-#define RELAYFS_TYPE   0xF0B4A981
-
-#define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
-#define COMPARE_MASK_MAP(mmp, str)                                      \
-        (!strcasecmp((mmp)->short_form, (str)) ||                      \
-         !strcasecmp((mmp)->long_form, (str)))
-
-#define VALID_SET(x)   ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
+#define OFILE_BUF      (128 * 1024)
 
-struct mask_map {
-       int mask;
-       char *short_form;
-       char *long_form;
-};
+#define RELAYFS_TYPE   0xF0B4A981
 
-static struct mask_map mask_maps[] = {
-       DECLARE_MASK_MAP(READ),
-       DECLARE_MASK_MAP(WRITE),
-       DECLARE_MASK_MAP(BARRIER),
-       DECLARE_MASK_MAP(SYNC),
-       DECLARE_MASK_MAP(QUEUE),
-       DECLARE_MASK_MAP(REQUEUE),
-       DECLARE_MASK_MAP(ISSUE),
-       DECLARE_MASK_MAP(COMPLETE),
-       DECLARE_MASK_MAP(FS),
-       DECLARE_MASK_MAP(PC),
-};
+#define RING_INIT_NR   (2)
+#define RING_MAX_NR    (16UL)
 
-#define S_OPTS "d:a:A:r:o:kw:vb:n:"
+#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:"
 static struct option l_opts[] = {
        {
                .name = "dev",
@@ -119,20 +105,36 @@ static struct option l_opts[] = {
                .name = "version",
                .has_arg = no_argument,
                .flag = NULL,
-               .val = 'v'
+               .val = 'V'
        },
        {
-               .name = "buffer size",
+               .name = "buffer-size",
                .has_arg = required_argument,
                .flag = NULL,
                .val = 'b'
        },
        {
-               .name = "nr of sub buffers",
+               .name = "num-sub-buffers",
                .has_arg = required_argument,
                .flag = NULL,
                .val = 'n'
        },
+       {
+               .name = "output-dir",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'D'
+       },
+       {
+               .name = NULL,
+       }
+};
+
+struct tip_subbuf {
+       struct list_head list;
+       void *buf;
+       unsigned int len;
+       unsigned int max_len;
 };
 
 struct thread_information {
@@ -140,24 +142,32 @@ struct thread_information {
        pthread_t thread;
 
        int fd;
+       void *fd_buf;
+       unsigned long fd_off;
+       unsigned long fd_size;
+       unsigned long fd_max_size;
        char fn[MAXPATHLEN + 64];
-       void *buf;
-       unsigned long buf_offset;
-       unsigned int buf_subbuf;
-       unsigned int sequence;
 
-       pthread_mutex_t *fd_lock;
-       int ofd;
+       FILE *ofile;
+       char *ofile_buffer;
+       int ofile_stdout;
 
        unsigned long events_processed;
        struct device_information *device;
+
+       int exited;
+
+       pthread_mutex_t lock;
+       struct list_head subbuf_list;
+       struct tip_subbuf *leftover_ts;
 };
 
 struct device_information {
        int fd;
        char *path;
        char buts_name[32];
-       int trace_started;
+       volatile int trace_started;
+       unsigned long drop_count;
        struct thread_information *threads;
 };
 
@@ -169,28 +179,62 @@ static struct device_information *device_information;
 /* command line option globals */
 static char *relay_path;
 static char *output_name;
+static char *output_dir;
 static int act_mask = ~0U;
 static int kill_running_trace;
-static int use_mmap;
-static int buf_size = BUF_SIZE;
-static int buf_nr = BUF_NR;
+static unsigned long buf_size = BUF_SIZE;
+static unsigned long buf_nr = BUF_NR;
 
 #define is_done()      (*(volatile int *)(&done))
 static volatile int done;
 
-static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
+#define is_trace_stopped()     (*(volatile int *)(&trace_stopped))
+static volatile int trace_stopped;
+
+#define is_stat_shown()        (*(volatile int *)(&stat_shown))
+static volatile int stat_shown;
 
 static void exit_trace(int status);
 
-static int find_mask_map(char *string)
+#define dip_tracing(dip)       (*(volatile int *)(&(dip)->trace_started))
+#define dip_set_tracing(dip, v)        ((dip)->trace_started = (v))
+
+#define __for_each_dip(__d, __i, __e)  \
+       for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
+
+#define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
+#define for_each_tip(__d, __t, __j)    \
+       for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
+
+static int get_dropped_count(const char *buts_name)
 {
-       int i;
+       int fd;
+       char tmp[MAXPATHLEN + 64];
 
-       for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
-               if (COMPARE_MASK_MAP(&mask_maps[i], string))
-                       return mask_maps[i].mask;
+       snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
+                relay_path, buts_name);
 
-       return -1;
+       fd = open(tmp, O_RDONLY);
+       if (fd < 0) {
+               /*
+                * this may be ok, if the kernel doesn't support dropped counts
+                */
+               if (errno == ENOENT)
+                       return 0;
+
+               fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
+               return -1;
+       }
+
+       if (read(fd, tmp, sizeof(tmp)) < 0) {
+               perror(tmp);
+               close(fd);
+               return -1;
+       }
+
+       close(fd);
+
+       return atoi(tmp);
 }
 
 static int start_trace(struct device_information *dip)
@@ -208,17 +252,20 @@ static int start_trace(struct device_information *dip)
        }
 
        memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
-       dip->trace_started = 1;
+       dip_set_tracing(dip, 1);
        return 0;
 }
 
 static void stop_trace(struct device_information *dip)
 {
-       if (dip->trace_started || kill_running_trace) {
+       if (dip_tracing(dip) || kill_running_trace) {
+               dip_set_tracing(dip, 0);
+
                if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
                        perror("BLKSTOPTRACE");
+
                close(dip->fd);
-               dip->trace_started = 0;
+               dip->fd = -1;
        }
 }
 
@@ -227,114 +274,104 @@ static void stop_all_traces(void)
        struct device_information *dip;
        int i;
 
-       for (dip = device_information, i = 0; i < ndevs; i++, dip++)
+       for_each_dip(dip, i) {
+               dip->drop_count = get_dropped_count(dip->buts_name);
                stop_trace(dip);
-}
-
-static int get_data_read(struct thread_information *tip, void *buf, int len)
-{
-       char *p = buf;
-       int ret, bytes_left = len;
-
-       while (!is_done() && bytes_left > 0) {
-               ret = read(tip->fd, p, bytes_left);
-               if (ret == len)
-                       return 0;
-
-               if (ret < 0) {
-                       perror(tip->fn);
-                       fprintf(stderr,"Thread %d failed read of %s\n",
-                               tip->cpu, tip->fn);
-                       exit_trace(1);
-               } else if (ret > 0) {
-                       fprintf(stderr,"Thread %d misread %s %d,%d\n",
-                               tip->cpu, tip->fn, ret, len);
-                       exit_trace(1);
-               } else {
-                       p += ret;
-                       bytes_left -= ret;
-               }
-
-               usleep(10000);
        }
-
-       return -1;
 }
 
-static int get_data_mmap(struct thread_information *tip, void *buf, int len,
-                        int check_magic)
+static void wait_for_data(struct thread_information *tip)
 {
-       if (len > (buf_size * (tip->buf_subbuf + 1)) - tip->buf_offset) {
-               tip->buf_subbuf++;
-               if (tip->buf_subbuf == buf_nr)
-                       tip->buf_subbuf = 0;
+       struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
 
-               tip->buf_offset = tip->buf_subbuf * buf_size;
-       }
-
-       while (1) {
-               struct blk_io_trace *t = buf;
-
-               memcpy(buf, tip->buf + tip->buf_offset, len);
-
-               if (!check_magic)
+       do {
+               poll(&pfd, 1, 100);
+               if (pfd.revents & POLLIN)
                        break;
-
-               if (CHECK_MAGIC(t) && t->sequence >= tip->sequence) {
-                       tip->sequence = t->sequence;
+               if (tip->ofile_stdout)
                        break;
+       } while (!is_done());
+}
+
+static int read_data(struct thread_information *tip, void *buf, int len)
+{
+       int ret = 0;
+
+       do {
+               wait_for_data(tip);
+
+               ret = read(tip->fd, buf, len);
+               if (!ret)
+                       continue;
+               else if (ret > 0)
+                       return ret;
+               else {
+                       if (errno != EAGAIN) {
+                               perror(tip->fn);
+                               fprintf(stderr,"Thread %d failed read of %s\n",
+                                       tip->cpu, tip->fn);
+                               break;
+                       }
+                       continue;
                }
-       
-               if (is_done())
-                       return -1;
+       } while (!is_done());
 
-               usleep(10000);
-       }
+       return ret;
+}
 
-       tip->buf_offset += len;
-       return 0;
+static inline void tip_fd_unlock(struct thread_information *tip)
+{
+       pthread_mutex_unlock(&tip->lock);
 }
 
-static int get_data(struct thread_information *tip, void *buf, int len,
-                   int check_magic)
+static inline void tip_fd_lock(struct thread_information *tip)
 {
-       if (tip->buf)
-               return get_data_mmap(tip, buf, len, check_magic);
-       else
-               return get_data_read(tip, buf, len);
+       pthread_mutex_lock(&tip->lock);
 }
 
-static void *extract_data(struct thread_information *tip, char *ofn, int nb)
+static int get_subbuf(struct thread_information *tip)
 {
-       unsigned char *buf;
+       struct tip_subbuf *ts;
+       int ret;
 
-       buf = malloc(nb);
-       if (!get_data(tip, buf, nb, 0))
-               return buf;
+       ts = malloc(sizeof(*ts));
+       ts->buf = malloc(buf_size);
+       ts->max_len = buf_size;
 
-       free(buf);
-       exit_trace(1);
-       return NULL;
-}
+       ret = read_data(tip, ts->buf, ts->max_len);
+       if (ret > 0) {
+               ts->len = ret;
+               tip_fd_lock(tip);
+               list_add_tail(&ts->list, &tip->subbuf_list);
+               tip_fd_unlock(tip);
+               return 0;
+       }
 
-static inline void tip_fd_unlock(struct thread_information *tip)
-{
-       if (tip->fd_lock)
-               pthread_mutex_unlock(tip->fd_lock);
+       free(ts->buf);
+       free(ts);
+       return -1;
 }
 
-static inline void tip_fd_lock(struct thread_information *tip)
+static void close_thread(struct thread_information *tip)
 {
-       if (tip->fd_lock)
-               pthread_mutex_lock(tip->fd_lock);
+       if (tip->fd != -1)
+               close(tip->fd);
+       if (tip->ofile)
+               fclose(tip->ofile);
+       if (tip->ofile_buffer)
+               free(tip->ofile_buffer);
+       if (tip->fd_buf)
+               free(tip->fd_buf);
+
+       tip->fd = -1;
+       tip->ofile = NULL;
+       tip->ofile_buffer = NULL;
+       tip->fd_buf = NULL;
 }
 
-static void *extract(void *arg)
+static void *thread_main(void *arg)
 {
        struct thread_information *tip = arg;
-       int ret, pdu_len;
-       char dp[64], *pdu_data;
-       struct blk_io_trace t;
        pid_t pid = getpid();
        cpu_set_t cpu_mask;
 
@@ -356,60 +393,157 @@ static void *extract(void *arg)
                exit_trace(1);
        }
 
-       if (use_mmap) {
-               tip->buf = mmap(NULL, buf_size * buf_nr, PROT_READ,
-                                       MAP_PRIVATE | MAP_POPULATE, tip->fd, 0);
-               if (tip->buf == MAP_FAILED) {
-                       perror("mmap");
-                       exit_trace(1);
+       for (;;) {
+               if (get_subbuf(tip))
+                       break;
+       }
+
+       tip->exited = 1;
+       return NULL;
+}
+
+static int write_data(struct thread_information *tip,
+                     void *buf, unsigned int buf_len)
+{
+       int ret;
+
+       while (1) {
+               ret = fwrite(buf, buf_len, 1, tip->ofile);
+               if (ret == 1)
+                       break;
+
+               if (ret < 0) {
+                       perror("write");
+                       return 1;
                }
        }
 
-       pdu_data = NULL;
-       while (!is_done()) {
-               if (get_data(tip, &t, sizeof(t), 1))
+       if (tip->ofile_stdout)
+               fflush(tip->ofile);
+
+       return 0;
+}
+
+static int flush_subbuf(struct thread_information *tip, struct tip_subbuf *ts)
+{
+       unsigned int offset = 0;
+       struct blk_io_trace *t;
+       int pdu_len, events = 0;
+
+       /*
+        * surplus from last run
+        */
+       if (tip->leftover_ts) {
+               struct tip_subbuf *prev_ts = tip->leftover_ts;
+
+               if (prev_ts->len + ts->len > prev_ts->max_len) {
+                       prev_ts->max_len += ts->len;
+                       prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
+               }
+
+               memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
+               prev_ts->len += ts->len;
+
+               free(ts->buf);
+               free(ts);
+
+               ts = prev_ts;
+               tip->leftover_ts = NULL;
+       }
+
+       while (offset + sizeof(*t) <= ts->len) {
+               t = ts->buf + offset;
+
+               if (verify_trace(t))
+                       return -1;
+
+               pdu_len = t->pdu_len;
+
+               if (offset + sizeof(*t) + pdu_len > ts->len)
                        break;
 
-               if (verify_trace(&t))
-                       exit_trace(1);
+               trace_to_be(t);
+
+               if (write_data(tip, t, sizeof(*t) + pdu_len))
+                       return -1;
+
+               offset += sizeof(*t) + pdu_len;
+               tip->events_processed++;
+               events++;
+       }
 
-               pdu_len = t.pdu_len;
+       /*
+        * leftover bytes, save them for next time
+        */
+       if (offset != ts->len) {
+               tip->leftover_ts = ts;
+               ts->len -= offset;
+               memmove(ts->buf, ts->buf + offset, ts->len);
+       } else {
+               free(ts->buf);
+               free(ts);
+       }
 
-               trace_to_be(&t);
+       return events;
+}
 
-               if (pdu_len)
-                       pdu_data = extract_data(tip, dp, pdu_len);
+static int write_tip_events(struct thread_information *tip)
+{
+       struct tip_subbuf *ts = NULL;
 
-               /*
-                * now we have both trace and payload, get a lock on the
-                * output descriptor and send it off
-                */
-               tip_fd_lock(tip);
+       tip_fd_lock(tip);
+       if (!list_empty(&tip->subbuf_list)) {
+               ts = list_entry(tip->subbuf_list.next, struct tip_subbuf, list);
+               list_del(&ts->list);
+       }
+       tip_fd_unlock(tip);
 
-               ret = write(tip->ofd, &t, sizeof(t));
-               if (ret < 0) {
-                       fprintf(stderr,"Thread %d failed write\n", tip->cpu);
-                       tip_fd_unlock(tip);
-                       exit_trace(1);
-               }
+       if (ts)
+               return flush_subbuf(tip, ts);
 
-               if (pdu_data) {
-                       ret = write(tip->ofd, pdu_data, pdu_len);
-                       if (ret != pdu_len) {
-                               perror("write pdu data");
-                               tip_fd_unlock(tip);
-                               exit_trace(1);
-                       }
+       return 0;
+}
+
+/*
+ * scans the tips we know and writes out the subbuffers we accumulate
+ */
+static void get_and_write_events(void)
+{
+       struct device_information *dip;
+       struct thread_information *tip;
+       int i, j, events, ret, tips_running;
+
+       while (!is_done()) {
+               events = 0;
 
-                       free(pdu_data);
-                       pdu_data = NULL;
+               for_each_dip(dip, i) {
+                       for_each_tip(dip, tip, j) {
+                               ret = write_tip_events(tip);
+                               if (ret > 0)
+                                       events += ret;
+                       }
                }
 
-               tip_fd_unlock(tip);
-               tip->events_processed++;
+               if (!events)
+                       usleep(10);
        }
 
-       return NULL;
+       /*
+        * reap stored events
+        */
+       do {
+               events = 0;
+               tips_running = 0;
+               for_each_dip(dip, i) {
+                       for_each_tip(dip, tip, j) {
+                               ret = write_tip_events(tip);
+                               if (ret > 0)
+                                       events += ret;
+                               tips_running += !tip->exited;
+                       }
+               }
+               usleep(10);
+       } while (events || tips_running);
 }
 
 static int start_threads(struct device_information *dip)
@@ -417,35 +551,55 @@ static int start_threads(struct device_information *dip)
        struct thread_information *tip;
        char op[64];
        int j, pipeline = output_name && !strcmp(output_name, "-");
+       int len, mode, vbuf_size;
 
-       for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
+       for_each_tip(dip, tip, j) {
                tip->cpu = j;
                tip->device = dip;
-               tip->fd_lock = NULL;
                tip->events_processed = 0;
+               pthread_mutex_init(&tip->lock, NULL);
+               INIT_LIST_HEAD(&tip->subbuf_list);
+               tip->leftover_ts = NULL;
 
                if (pipeline) {
-                       tip->ofd = dup(STDOUT_FILENO);
-                       tip->fd_lock = &stdout_mutex;
+                       tip->ofile = fdopen(STDOUT_FILENO, "w");
+                       tip->ofile_stdout = 1;
+                       mode = _IOLBF;
+                       vbuf_size = 512;
                } else {
+                       len = 0;
+
+                       if (output_dir)
+                               len = sprintf(op, "%s/", output_dir);
+
                        if (output_name) {
-                               sprintf(op, "%s.blktrace.%d", output_name,
+                               sprintf(op + len, "%s.blktrace.%d", output_name,
                                        tip->cpu);
                        } else {
-                               sprintf(op, "%s.blktrace.%d",
+                               sprintf(op + len, "%s.blktrace.%d",
                                        dip->buts_name, tip->cpu);
                        }
-                       tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+                       tip->ofile = fopen(op, "w");
+                       tip->ofile_stdout = 0;
+                       mode = _IOFBF;
+                       vbuf_size = OFILE_BUF;
                }
 
-               if (tip->ofd < 0) {
+               if (tip->ofile == NULL) {
                        perror(op);
                        return 1;
                }
 
-               if (pthread_create(&tip->thread, NULL, extract, tip)) {
+               tip->ofile_buffer = malloc(vbuf_size);
+               if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
+                       perror("setvbuf");
+                       close_thread(tip);
+                       return 1;
+               }
+
+               if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
                        perror("pthread_create");
-                       close(tip->ofd);
+                       close_thread(tip);
                        return 1;
                }
        }
@@ -453,28 +607,14 @@ static int start_threads(struct device_information *dip)
        return 0;
 }
 
-static void close_thread(struct thread_information *tip)
-{
-       if (tip->buf)
-               munmap(tip->buf, buf_size * buf_nr);
-
-       if (tip->fd != -1)
-               close(tip->fd);
-       if (tip->ofd != -1)
-               close(tip->ofd);
-
-       tip->fd = tip->ofd = -1;
-}
-
 static void stop_threads(struct device_information *dip)
 {
        struct thread_information *tip;
-       long ret;
-       int j;
+       unsigned long ret;
+       int i;
 
-       for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
-               if (pthread_join(tip->thread, (void *) &ret))
-                       perror("thread_join");
+       for_each_tip(dip, tip, i) {
+               (void) pthread_join(tip->thread, (void *) &ret);
                close_thread(tip);
        }
 }
@@ -484,26 +624,27 @@ static void stop_all_threads(void)
        struct device_information *dip;
        int i;
 
-       for (dip = device_information, i = 0; i < ndevs; i++, dip++)
+       for_each_dip(dip, i)
                stop_threads(dip);
 }
 
 static void stop_all_tracing(void)
 {
        struct device_information *dip;
-       struct thread_information *tip;
-       int i, j;
+       int i;
 
-       for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
-               for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
-                       close_thread(tip);
+       for_each_dip(dip, i)
                stop_trace(dip);
-       }
 }
 
 static void exit_trace(int status)
 {
-       stop_all_tracing();
+       if (!is_trace_stopped()) {
+               trace_stopped = 1;
+               stop_all_threads();
+               stop_all_tracing();
+       }
+
        exit(status);
 }
 
@@ -526,13 +667,14 @@ static int open_devices(void)
        struct device_information *dip;
        int i;
 
-       for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
-               dip->fd = open(dip->path, O_RDONLY);
+       for_each_dip(dip, i) {
+               dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
                if (dip->fd < 0) {
                        perror(dip->path);
                        return 1;
                }
        }
+
        return 0;
 }
 
@@ -548,7 +690,7 @@ static int start_devices(void)
                return 1;
        }
 
-       for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
+       for_each_dip(dip, i) {
                if (start_trace(dip)) {
                        close(dip->fd);
                        fprintf(stderr, "Failed to start trace on %s\n",
@@ -556,24 +698,28 @@ static int start_devices(void)
                        break;
                }
        }
+
        if (i != ndevs) {
-               for (dip = device_information, j = 0; j < i; j++, dip++)
+               __for_each_dip(dip, j, i)
                        stop_trace(dip);
+
                return 1;
        }
 
-       for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
+       for_each_dip(dip, i) {
                dip->threads = thread_information + (i * ncpus);
                if (start_threads(dip)) {
                        fprintf(stderr, "Failed to start worker threads\n");
                        break;
                }
        }
+
        if (i != ndevs) {
-               for (dip = device_information, j = 0; j < i; j++, dip++)
+               __for_each_dip(dip, j, i)
                        stop_threads(dip);
-               for (dip = device_information, i = 0; i < ndevs; i++, dip++)
+               for_each_dip(dip, i)
                        stop_trace(dip);
+
                return 1;
        }
 
@@ -582,24 +728,39 @@ static int start_devices(void)
 
 static void show_stats(void)
 {
-       int i, j;
        struct device_information *dip;
        struct thread_information *tip;
        unsigned long long events_processed;
+       unsigned long total_drops;
+       int i, j, no_stdout = 0;
 
-       if (output_name && !strcmp(output_name, "-"))
+       if (is_stat_shown())
                return;
 
-       for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
-               printf("Device: %s\n", dip->path);
+       if (output_name && !strcmp(output_name, "-"))
+               no_stdout = 1;
+
+       stat_shown = 1;
+
+       total_drops = 0;
+       for_each_dip(dip, i) {
+               if (!no_stdout)
+                       printf("Device: %s\n", dip->path);
                events_processed = 0;
-               for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
-                       printf("  CPU%3d: %20ld events\n",
-                              tip->cpu, tip->events_processed);
+               for_each_tip(dip, tip, j) {
+                       if (!no_stdout)
+                               printf("  CPU%3d: %20ld events\n",
+                                       tip->cpu, tip->events_processed);
                        events_processed += tip->events_processed;
                }
-               printf("  Total:  %20lld events\n", events_processed);
+               total_drops += dip->drop_count;
+               if (!no_stdout)
+                       printf("  Total:  %20lld events (dropped %lu)\n",
+                                       events_processed, dip->drop_count);
        }
+
+       if (total_drops)
+               fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
 }
 
 static char usage_str[] = \
@@ -608,6 +769,7 @@ static char usage_str[] = \
        "\t-d Use specified device. May also be given last after options\n" \
        "\t-r Path to mounted relayfs, defaults to /relay\n" \
        "\t-o File(s) to send output to\n" \
+       "\t-D Directory to prepend to output file names\n" \
        "\t-k Kill a running trace\n" \
        "\t-w Stop after defined time, in seconds\n" \
        "\t-a Only trace specified actions. See documentation\n" \
@@ -620,8 +782,7 @@ static void show_usage(char *program)
 {
        fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
 }
-
-static void handle_sigint(int sig)
+static void handle_sigint(__attribute__((__unused__)) int sig)
 {
        done = 1;
 }
@@ -647,7 +808,8 @@ int main(int argc, char *argv[])
                        break;
 
                case 'A':
-                       if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
+                       if ((sscanf(optarg, "%x", &i) != 1) || 
+                                                       !valid_act_opt(i)) {
                                fprintf(stderr,
                                        "Invalid set action mask %s/0x%x\n",
                                        optarg, i);
@@ -680,26 +842,29 @@ int main(int argc, char *argv[])
                                return 1;
                        }
                        break;
-               case 'v':
+               case 'V':
                        printf("%s version %s\n", argv[0], blktrace_version);
                        return 0;
                case 'b':
-                       buf_size = atoi(optarg);
-                       if (buf_size <= 0) {
+                       buf_size = strtoul(optarg, NULL, 10);
+                       if (buf_size <= 0 || buf_size > 16*1024) {
                                fprintf(stderr,
-                                       "Invalid buffer size (%d)\n", buf_size);
+                                       "Invalid buffer size (%lu)\n",buf_size);
                                return 1;
                        }
                        buf_size <<= 10;
                        break;
                case 'n':
-                       buf_nr = atoi(optarg);
+                       buf_nr = strtoul(optarg, NULL, 10);
                        if (buf_nr <= 0) {
                                fprintf(stderr,
-                                       "Invalid buffer nr (%d)\n", buf_nr);
+                                       "Invalid buffer nr (%lu)\n", buf_nr);
                                return 1;
                        }
                        break;
+               case 'D':
+                       output_dir = optarg;
+                       break;
                default:
                        show_usage(argv[0]);
                        return 1;
@@ -727,7 +892,7 @@ int main(int argc, char *argv[])
                fprintf(stderr,"%s does not appear to be a valid path\n",
                        relay_path);
                return 1;
-       } else if (st.f_type != RELAYFS_TYPE) {
+       } else if (st.f_type != (long) RELAYFS_TYPE) {
                fprintf(stderr,"%s does not appear to be a relay filesystem\n",
                        relay_path);
                return 1;
@@ -762,11 +927,14 @@ int main(int argc, char *argv[])
        if (stop_watch)
                alarm(stop_watch);
 
-       while (!is_done())
-               sleep(1);
+       get_and_write_events();
+
+       if (!is_trace_stopped()) {
+               trace_stopped = 1;
+               stop_all_threads();
+               stop_all_traces();
+       }
 
-       stop_all_threads();
-       stop_all_traces();
        show_stats();
 
        return 0;