[PATCH] blktrace: note that the -b option is in KiB
[blktrace.git] / blktrace.c
index 37e088c7c23909c85cc1eaef352b0db6cb80ce78..de0d9d1e0dc3af43550d7b60ff2cb1be2a22e7bc 100644 (file)
 #include <sched.h>
 #include <ctype.h>
 #include <getopt.h>
-#include <sys/mman.h>
 
 #include "blktrace.h"
 
+static char blktrace_version[] = "0.90";
+
 #define BUF_SIZE       (128 *1024)
 #define BUF_NR         (4)
 
@@ -56,7 +57,7 @@ struct mask_map {
        char *long_form;
 };
 
-struct mask_map mask_maps[] = {
+static struct mask_map mask_maps[] = {
        DECLARE_MASK_MAP(READ),
        DECLARE_MASK_MAP(WRITE),
        DECLARE_MASK_MAP(BARRIER),
@@ -69,7 +70,7 @@ struct mask_map mask_maps[] = {
        DECLARE_MASK_MAP(PC),
 };
 
-#define S_OPTS "d:a:A:r:o:kw:"
+#define S_OPTS "d:a:A:r:o:kw:vb:n:"
 static struct option l_opts[] = {
        {
                .name = "dev",
@@ -113,6 +114,24 @@ static struct option l_opts[] = {
                .flag = NULL,
                .val = 'w'
        },
+       {
+               .name = "version",
+               .has_arg = no_argument,
+               .flag = NULL,
+               .val = 'v'
+       },
+       {
+               .name = "buffer size (in KiB)",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'b'
+       },
+       {
+               .name = "nr of sub buffers",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'n'
+       },
 };
 
 struct thread_information {
@@ -151,7 +170,8 @@ static char *relay_path;
 static char *output_name;
 static int act_mask = ~0U;
 static int kill_running_trace;
-static int use_mmap;
+static unsigned int buf_size = BUF_SIZE;
+static unsigned int buf_nr = BUF_NR;
 
 #define is_done()      (*(volatile int *)(&done))
 static volatile int done;
@@ -162,7 +182,7 @@ static void exit_trace(int status);
 
 static int find_mask_map(char *string)
 {
-       int i;
+       unsigned int i;
 
        for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
                if (COMPARE_MASK_MAP(&mask_maps[i], string))
@@ -176,8 +196,8 @@ static int start_trace(struct device_information *dip)
        struct blk_user_trace_setup buts;
 
        memset(&buts, 0, sizeof(buts));
-       buts.buf_size = BUF_SIZE;
-       buts.buf_nr = BUF_NR;
+       buts.buf_size = buf_size;
+       buts.buf_nr = buf_nr;
        buts.act_mask = act_mask;
 
        if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
@@ -209,92 +229,149 @@ static void stop_all_traces(void)
                stop_trace(dip);
 }
 
-static int get_data_read(struct thread_information *tip, void *buf, int len)
+static int read_data(struct thread_information *tip, void *buf, int len)
 {
        char *p = buf;
        int ret, bytes_left = len;
 
        while (!is_done() && bytes_left > 0) {
                ret = read(tip->fd, p, bytes_left);
-               if (ret == len)
+               if (ret == bytes_left)
                        return 0;
 
                if (ret < 0) {
                        perror(tip->fn);
                        fprintf(stderr,"Thread %d failed read of %s\n",
                                tip->cpu, tip->fn);
-                       exit_trace(1);
+                       break;
                } else if (ret > 0) {
-                       fprintf(stderr,"Thread %d misread %s %d,%d\n",
-                               tip->cpu, tip->fn, ret, len);
-                       exit_trace(1);
-               } else {
                        p += ret;
                        bytes_left -= ret;
-               }
-
-               usleep(10000);
+               } else
+                       usleep(1000);
        }
 
        return -1;
 }
 
-static int get_data_mmap(struct thread_information *tip, void *buf, int len,
-                        int check_magic)
+static int write_data(int fd, void *buf, unsigned int buf_len)
 {
-       if (len > (BUF_SIZE * (tip->buf_subbuf + 1)) - tip->buf_offset) {
-               tip->buf_subbuf++;
-               if (tip->buf_subbuf == BUF_NR)
-                       tip->buf_subbuf = 0;
-
-               tip->buf_offset = tip->buf_subbuf * BUF_SIZE;
-       }
-
-       while (1) {
-               struct blk_io_trace *t = buf;
-
-               memcpy(buf, tip->buf + tip->buf_offset, len);
+       int ret, bytes_left;
+       char *p = buf;
 
-               if (!check_magic)
+       bytes_left = buf_len;
+       while (bytes_left > 0) {
+               ret = write(fd, p, bytes_left);
+               if (ret == bytes_left)
                        break;
 
-               if (CHECK_MAGIC(t) && t->sequence >= tip->sequence) {
-                       tip->sequence = t->sequence;
-                       break;
+               if (ret < 0) {
+                       perror("write");
+                       return 1;
+               } else if (ret > 0) {
+                       p += ret;
+                       bytes_left -= ret;
+               } else {
+                       fprintf(stderr, "Zero write?\n");
+                       return 1;
                }
-       
-               if (is_done())
-                       return -1;
-
-               usleep(10000);
        }
 
-       tip->buf_offset += len;
        return 0;
 }
 
-static int get_data(struct thread_information *tip, void *buf, int len,
-                   int check_magic)
-{
-       if (tip->buf)
-               return get_data_mmap(tip, buf, len, check_magic);
-       else
-               return get_data_read(tip, buf, len);
-}
-
-static void *extract_data(struct thread_information *tip, char *ofn, int nb)
+static void *extract_data(struct thread_information *tip, int nb)
 {
        unsigned char *buf;
 
        buf = malloc(nb);
-       if (!get_data(tip, buf, nb, 0))
+       if (!read_data(tip, buf, nb))
                return buf;
 
        free(buf);
-       exit_trace(1);
        return NULL;
 }
 
+/*
+ * trace may start inside 'bit' or may need to be gotten further on
+ */
+static int get_event_slow(struct thread_information *tip,
+                         struct blk_io_trace *bit)
+{
+       const int inc = sizeof(__u32);
+       struct blk_io_trace foo;
+       int offset;
+       void *p;
+
+       /*
+        * check is trace is inside
+        */
+       offset = 0;
+       p = bit;
+       while (offset < sizeof(*bit)) {
+               p += inc;
+               offset += inc;
+
+               memcpy(&foo, p, inc);
+
+               if (CHECK_MAGIC(&foo))
+                       break;
+       }
+
+       /*
+        * part trace found inside, read the rest
+        */
+       if (offset < sizeof(*bit)) {
+               int good_bytes = sizeof(*bit) - offset;
+
+               memmove(bit, p, good_bytes);
+               p = (void *) bit + good_bytes;
+
+               return read_data(tip, p, offset);
+       }
+
+       /*
+        * nothing found, keep looking for start of trace
+        */
+       do {
+               if (read_data(tip, bit, sizeof(bit->magic)))
+                       return -1;
+       } while (!CHECK_MAGIC(bit));
+
+       /*
+        * now get the rest of it
+        */
+       p = &bit->sequence;
+       if (!read_data(tip, p, sizeof(*bit) - inc))
+               return -1;
+
+       return 0;
+}
+
+/*
+ * Sometimes relayfs screws us a little, if an event crosses a sub buffer
+ * boundary. So keep looking forward in the trace data until an event
+ * is found
+ */
+static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
+{
+       /*
+        * optimize for the common fast case, a full trace read that
+        * succeeds
+        */
+       if (read_data(tip, bit, sizeof(*bit)))
+               return -1;
+
+       if (CHECK_MAGIC(bit))
+               return 0;
+
+       /*
+        * ok that didn't work, the event may start somewhere inside the
+        * trace itself
+        */
+       return get_event_slow(tip, bit);
+}
+
 static inline void tip_fd_unlock(struct thread_information *tip)
 {
        if (tip->fd_lock)
@@ -310,8 +387,8 @@ static inline void tip_fd_lock(struct thread_information *tip)
 static void *extract(void *arg)
 {
        struct thread_information *tip = arg;
-       int ret, pdu_len;
-       char dp[64], *pdu_data;
+       int pdu_len;
+       char *pdu_data;
        struct blk_io_trace t;
        pid_t pid = getpid();
        cpu_set_t cpu_mask;
@@ -334,29 +411,23 @@ static void *extract(void *arg)
                exit_trace(1);
        }
 
-       if (use_mmap) {
-               tip->buf = mmap(NULL, BUF_SIZE * BUF_NR, PROT_READ,
-                                       MAP_PRIVATE | MAP_POPULATE, tip->fd, 0);
-               if (tip->buf == MAP_FAILED) {
-                       perror("mmap");
-                       exit_trace(1);
-               }
-       }
-
        pdu_data = NULL;
        while (!is_done()) {
-               if (get_data(tip, &t, sizeof(t), 1))
+               if (get_event(tip, &t))
                        break;
 
                if (verify_trace(&t))
-                       exit_trace(1);
+                       break;
 
                pdu_len = t.pdu_len;
 
                trace_to_be(&t);
 
-               if (pdu_len)
-                       pdu_data = extract_data(tip, dp, pdu_len);
+               if (pdu_len) {
+                       pdu_data = extract_data(tip, pdu_len);
+                       if (!pdu_data)
+                               break;
+               }
 
                /*
                 * now we have both trace and payload, get a lock on the
@@ -364,29 +435,27 @@ static void *extract(void *arg)
                 */
                tip_fd_lock(tip);
 
-               ret = write(tip->ofd, &t, sizeof(t));
-               if (ret < 0) {
-                       fprintf(stderr,"Thread %d failed write\n", tip->cpu);
+               if (write_data(tip->ofd, &t, sizeof(t))) {
                        tip_fd_unlock(tip);
-                       exit_trace(1);
+                       break;
                }
 
-               if (pdu_data) {
-                       ret = write(tip->ofd, pdu_data, pdu_len);
-                       if (ret != pdu_len) {
-                               perror("write pdu data");
-                               tip_fd_unlock(tip);
-                               exit_trace(1);
-                       }
+               if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
+                       tip_fd_unlock(tip);
+                       break;
+               }
 
+               tip_fd_unlock(tip);
+
+               if (pdu_data) {
                        free(pdu_data);
                        pdu_data = NULL;
                }
 
-               tip_fd_unlock(tip);
                tip->events_processed++;
        }
 
+       exit_trace(1);
        return NULL;
 }
 
@@ -406,11 +475,13 @@ static int start_threads(struct device_information *dip)
                        tip->ofd = dup(STDOUT_FILENO);
                        tip->fd_lock = &stdout_mutex;
                } else {
-                       if (output_name)
-                               sprintf(op, "%s.%d", output_name, tip->cpu);
-                       else
-                               sprintf(op, "%s_out.%d",
+                       if (output_name) {
+                               sprintf(op, "%s.blktrace.%d", output_name,
+                                       tip->cpu);
+                       } else {
+                               sprintf(op, "%s.blktrace.%d",
                                        dip->buts_name, tip->cpu);
+                       }
                        tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
                }
 
@@ -431,9 +502,6 @@ static int start_threads(struct device_information *dip)
 
 static void close_thread(struct thread_information *tip)
 {
-       if (tip->buf)
-               munmap(tip->buf, BUF_SIZE * BUF_NR);
-
        if (tip->fd != -1)
                close(tip->fd);
        if (tip->ofd != -1)
@@ -577,15 +645,27 @@ static void show_stats(void)
                printf("  Total:  %20lld events\n", events_processed);
        }
 }
-  
+
+static char usage_str[] = \
+       "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
+       "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
+       "\t-d Use specified device. May also be given last after options\n" \
+       "\t-r Path to mounted relayfs, defaults to /relay\n" \
+       "\t-o File(s) to send output to\n" \
+       "\t-k Kill a running trace\n" \
+       "\t-w Stop after defined time, in seconds\n" \
+       "\t-a Only trace specified actions. See documentation\n" \
+       "\t-A Give trace mask as a single value. See documentation\n" \
+       "\t-b Sub buffer size in KiB\n" \
+       "\t-n Number of sub buffers\n" \
+       "\t-v Print program version info\n\n";
+
 static void show_usage(char *program)
 {
-       fprintf(stderr,"Usage: %s [-d <dev>] "
-                      "[-a <trace> [-a <trace>]] <dev>\n",
-               program);
+       fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
 }
 
-static void handle_sigint(int sig)
+static void handle_sigint(__attribute__((__unused__)) int sig)
 {
        done = 1;
 }
@@ -644,7 +724,26 @@ int main(int argc, char *argv[])
                                return 1;
                        }
                        break;
-
+               case 'v':
+                       printf("%s version %s\n", argv[0], blktrace_version);
+                       return 0;
+               case 'b':
+                       buf_size = atoi(optarg);
+                       if (buf_size <= 0) {
+                               fprintf(stderr,
+                                       "Invalid buffer size (%d)\n", buf_size);
+                               return 1;
+                       }
+                       buf_size <<= 10;
+                       break;
+               case 'n':
+                       buf_nr = atoi(optarg);
+                       if (buf_nr <= 0) {
+                               fprintf(stderr,
+                                       "Invalid buffer nr (%d)\n", buf_nr);
+                               return 1;
+                       }
+                       break;
                default:
                        show_usage(argv[0]);
                        return 1;