[PATCH] blktrace: note that the -b option is in KiB
[blktrace.git] / blktrace.c
index 0e12d10e0973e15fa23af3638aec2a384754d359..de0d9d1e0dc3af43550d7b60ff2cb1be2a22e7bc 100644 (file)
@@ -28,6 +28,7 @@
 #include <string.h>
 #include <sys/ioctl.h>
 #include <sys/param.h>
+#include <sys/statfs.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sched.h>
 
 #include "blktrace.h"
 
+static char blktrace_version[] = "0.90";
+
 #define BUF_SIZE       (128 *1024)
 #define BUF_NR         (4)
 
+#define RELAYFS_TYPE   0xF0B4A981
+
 #define DECLARE_MASK_MAP(mask)          { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
 #define COMPARE_MASK_MAP(mmp, str)                                      \
         (!strcasecmp((mmp)->short_form, (str)) ||                      \
@@ -52,7 +57,7 @@ struct mask_map {
        char *long_form;
 };
 
-struct mask_map mask_maps[] = {
+static struct mask_map mask_maps[] = {
        DECLARE_MASK_MAP(READ),
        DECLARE_MASK_MAP(WRITE),
        DECLARE_MASK_MAP(BARRIER),
@@ -65,56 +70,68 @@ struct mask_map mask_maps[] = {
        DECLARE_MASK_MAP(PC),
 };
 
-#define S_OPTS "d:a:A:r:o:kw:"
+#define S_OPTS "d:a:A:r:o:kw:vb:n:"
 static struct option l_opts[] = {
        {
                .name = "dev",
-               .has_arg = 1,
+               .has_arg = required_argument,
                .flag = NULL,
                .val = 'd'
        },
        {
                .name = "act-mask",
-               .has_arg = 1,
+               .has_arg = required_argument,
                .flag = NULL,
                .val = 'a'
        },
        {
                .name = "set-mask",
-               .has_arg = 1,
+               .has_arg = required_argument,
                .flag = NULL,
                .val = 'A'
        },
        {
                .name = "relay",
-               .has_arg = 1,
+               .has_arg = required_argument,
                .flag = NULL,
                .val = 'r'
        },
        {
                .name = "output",
-               .has_arg = 1,
+               .has_arg = required_argument,
                .flag = NULL,
                .val = 'o'
        },
        {
                .name = "kill",
-               .has_arg = 0,
+               .has_arg = no_argument,
                .flag = NULL,
                .val = 'k'
        },
        {
                .name = "stopwatch",
-               .has_arg = 1,
+               .has_arg = required_argument,
                .flag = NULL,
                .val = 'w'
        },
        {
-               .name = NULL,
-               .has_arg = 0,
+               .name = "version",
+               .has_arg = no_argument,
                .flag = NULL,
-               .val = 0
-       }
+               .val = 'v'
+       },
+       {
+               .name = "buffer size (in KiB)",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'b'
+       },
+       {
+               .name = "nr of sub buffers",
+               .has_arg = required_argument,
+               .flag = NULL,
+               .val = 'n'
+       },
 };
 
 struct thread_information {
@@ -123,6 +140,10 @@ struct thread_information {
 
        int fd;
        char fn[MAXPATHLEN + 64];
+       void *buf;
+       unsigned long buf_offset;
+       unsigned int buf_subbuf;
+       unsigned int sequence;
 
        pthread_mutex_t *fd_lock;
        int ofd;
@@ -149,6 +170,8 @@ static char *relay_path;
 static char *output_name;
 static int act_mask = ~0U;
 static int kill_running_trace;
+static unsigned int buf_size = BUF_SIZE;
+static unsigned int buf_nr = BUF_NR;
 
 #define is_done()      (*(volatile int *)(&done))
 static volatile int done;
@@ -159,7 +182,7 @@ static void exit_trace(int status);
 
 static int find_mask_map(char *string)
 {
-       int i;
+       unsigned int i;
 
        for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
                if (COMPARE_MASK_MAP(&mask_maps[i], string))
@@ -173,8 +196,8 @@ static int start_trace(struct device_information *dip)
        struct blk_user_trace_setup buts;
 
        memset(&buts, 0, sizeof(buts));
-       buts.buf_size = BUF_SIZE;
-       buts.buf_nr = BUF_NR;
+       buts.buf_size = buf_size;
+       buts.buf_nr = buf_nr;
        buts.act_mask = act_mask;
 
        if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
@@ -206,32 +229,147 @@ static void stop_all_traces(void)
                stop_trace(dip);
 }
 
-static void *extract_data(struct thread_information *tip, char *ofn, int nb)
+static int read_data(struct thread_information *tip, void *buf, int len)
 {
-       int ret, bytes_left;
-       unsigned char *buf, *p;
+       char *p = buf;
+       int ret, bytes_left = len;
 
-       buf = malloc(nb);
-       p = buf;
-       bytes_left = nb;
-       while (bytes_left > 0) {
+       while (!is_done() && bytes_left > 0) {
                ret = read(tip->fd, p, bytes_left);
-               if (!ret)
-                       usleep(1000);
-               else if (ret < 0) {
+               if (ret == bytes_left)
+                       return 0;
+
+               if (ret < 0) {
                        perror(tip->fn);
-                       fprintf(stderr, "Thread %d extract_data %s failed\n",
+                       fprintf(stderr,"Thread %d failed read of %s\n",
                                tip->cpu, tip->fn);
-                       free(buf);
-                       exit_trace(1);
-                       return NULL;
-               } else {
+                       break;
+               } else if (ret > 0) {
                        p += ret;
                        bytes_left -= ret;
+               } else
+                       usleep(1000);
+       }
+
+       return -1;
+}
+
+static int write_data(int fd, void *buf, unsigned int buf_len)
+{
+       int ret, bytes_left;
+       char *p = buf;
+
+       bytes_left = buf_len;
+       while (bytes_left > 0) {
+               ret = write(fd, p, bytes_left);
+               if (ret == bytes_left)
+                       break;
+
+               if (ret < 0) {
+                       perror("write");
+                       return 1;
+               } else if (ret > 0) {
+                       p += ret;
+                       bytes_left -= ret;
+               } else {
+                       fprintf(stderr, "Zero write?\n");
+                       return 1;
                }
        }
 
-       return buf;
+       return 0;
+}
+
+static void *extract_data(struct thread_information *tip, int nb)
+{
+       unsigned char *buf;
+
+       buf = malloc(nb);
+       if (!read_data(tip, buf, nb))
+               return buf;
+
+       free(buf);
+       return NULL;
+}
+
+/*
+ * trace may start inside 'bit' or may need to be gotten further on
+ */
+static int get_event_slow(struct thread_information *tip,
+                         struct blk_io_trace *bit)
+{
+       const int inc = sizeof(__u32);
+       struct blk_io_trace foo;
+       int offset;
+       void *p;
+
+       /*
+        * check is trace is inside
+        */
+       offset = 0;
+       p = bit;
+       while (offset < sizeof(*bit)) {
+               p += inc;
+               offset += inc;
+
+               memcpy(&foo, p, inc);
+
+               if (CHECK_MAGIC(&foo))
+                       break;
+       }
+
+       /*
+        * part trace found inside, read the rest
+        */
+       if (offset < sizeof(*bit)) {
+               int good_bytes = sizeof(*bit) - offset;
+
+               memmove(bit, p, good_bytes);
+               p = (void *) bit + good_bytes;
+
+               return read_data(tip, p, offset);
+       }
+
+       /*
+        * nothing found, keep looking for start of trace
+        */
+       do {
+               if (read_data(tip, bit, sizeof(bit->magic)))
+                       return -1;
+       } while (!CHECK_MAGIC(bit));
+
+       /*
+        * now get the rest of it
+        */
+       p = &bit->sequence;
+       if (!read_data(tip, p, sizeof(*bit) - inc))
+               return -1;
+
+       return 0;
+}
+
+/*
+ * Sometimes relayfs screws us a little, if an event crosses a sub buffer
+ * boundary. So keep looking forward in the trace data until an event
+ * is found
+ */
+static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
+{
+       /*
+        * optimize for the common fast case, a full trace read that
+        * succeeds
+        */
+       if (read_data(tip, bit, sizeof(*bit)))
+               return -1;
+
+       if (CHECK_MAGIC(bit))
+               return 0;
+
+       /*
+        * ok that didn't work, the event may start somewhere inside the
+        * trace itself
+        */
+       return get_event_slow(tip, bit);
 }
 
 static inline void tip_fd_unlock(struct thread_information *tip)
@@ -249,8 +387,8 @@ static inline void tip_fd_lock(struct thread_information *tip)
 static void *extract(void *arg)
 {
        struct thread_information *tip = arg;
-       int ret, pdu_len;
-       char dp[64], *pdu_data;
+       int pdu_len;
+       char *pdu_data;
        struct blk_io_trace t;
        pid_t pid = getpid();
        cpu_set_t cpu_mask;
@@ -275,32 +413,21 @@ static void *extract(void *arg)
 
        pdu_data = NULL;
        while (!is_done()) {
-               ret = read(tip->fd, &t, sizeof(t));
-               if (ret != sizeof(t)) {
-                       if (ret < 0) {
-                               perror(tip->fn);
-                               fprintf(stderr,"Thread %d failed read of %s\n",
-                                       tip->cpu, tip->fn);
-                               exit_trace(1);
-                       } else if (ret > 0) {
-                               fprintf(stderr,"Thread %d misread %s %d,%d\n",
-                                       tip->cpu, tip->fn, ret, (int)sizeof(t));
-                               exit_trace(1);
-                       } else {
-                               usleep(10000);
-                               continue;
-                       }
-               }
+               if (get_event(tip, &t))
+                       break;
 
                if (verify_trace(&t))
-                       exit_trace(1);
+                       break;
 
                pdu_len = t.pdu_len;
 
                trace_to_be(&t);
 
-               if (pdu_len)
-                       pdu_data = extract_data(tip, dp, pdu_len);
+               if (pdu_len) {
+                       pdu_data = extract_data(tip, pdu_len);
+                       if (!pdu_data)
+                               break;
+               }
 
                /*
                 * now we have both trace and payload, get a lock on the
@@ -308,28 +435,27 @@ static void *extract(void *arg)
                 */
                tip_fd_lock(tip);
 
-               ret = write(tip->ofd, &t, sizeof(t));
-               if (ret < 0) {
-                       fprintf(stderr,"Thread %d failed write\n", tip->cpu);
+               if (write_data(tip->ofd, &t, sizeof(t))) {
                        tip_fd_unlock(tip);
-                       exit_trace(1);
+                       break;
                }
 
-               if (pdu_data) {
-                       ret = write(tip->ofd, pdu_data, pdu_len);
-                       if (ret != pdu_len) {
-                               perror("write pdu data");
-                               exit_trace(1);
-                       }
+               if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
+                       tip_fd_unlock(tip);
+                       break;
+               }
 
+               tip_fd_unlock(tip);
+
+               if (pdu_data) {
                        free(pdu_data);
                        pdu_data = NULL;
                }
 
-               tip_fd_unlock(tip);
                tip->events_processed++;
        }
 
+       exit_trace(1);
        return NULL;
 }
 
@@ -349,12 +475,13 @@ static int start_threads(struct device_information *dip)
                        tip->ofd = dup(STDOUT_FILENO);
                        tip->fd_lock = &stdout_mutex;
                } else {
-                       if (output_name)
-                               sprintf(op, "%s_%s_out.%d", output_name,
-                                       dip->buts_name, tip->cpu);
-                       else
-                               sprintf(op, "%s_out.%d",
+                       if (output_name) {
+                               sprintf(op, "%s.blktrace.%d", output_name,
+                                       tip->cpu);
+                       } else {
+                               sprintf(op, "%s.blktrace.%d",
                                        dip->buts_name, tip->cpu);
+                       }
                        tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
                }
 
@@ -379,6 +506,7 @@ static void close_thread(struct thread_information *tip)
                close(tip->fd);
        if (tip->ofd != -1)
                close(tip->ofd);
+
        tip->fd = tip->ofd = -1;
 }
 
@@ -502,7 +630,7 @@ static void show_stats(void)
        struct device_information *dip;
        struct thread_information *tip;
        unsigned long long events_processed;
-  
+
        if (output_name && !strcmp(output_name, "-"))
                return;
 
@@ -517,15 +645,27 @@ static void show_stats(void)
                printf("  Total:  %20lld events\n", events_processed);
        }
 }
-  
+
+static char usage_str[] = \
+       "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
+       "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
+       "\t-d Use specified device. May also be given last after options\n" \
+       "\t-r Path to mounted relayfs, defaults to /relay\n" \
+       "\t-o File(s) to send output to\n" \
+       "\t-k Kill a running trace\n" \
+       "\t-w Stop after defined time, in seconds\n" \
+       "\t-a Only trace specified actions. See documentation\n" \
+       "\t-A Give trace mask as a single value. See documentation\n" \
+       "\t-b Sub buffer size in KiB\n" \
+       "\t-n Number of sub buffers\n" \
+       "\t-v Print program version info\n\n";
+
 static void show_usage(char *program)
 {
-       fprintf(stderr,"Usage: %s [-d <dev>] "
-                      "[-a <trace> [-a <trace>]] <dev>\n",
-               program);
+       fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
 }
 
-static void handle_sigint(int sig)
+static void handle_sigint(__attribute__((__unused__)) int sig)
 {
        done = 1;
 }
@@ -533,7 +673,7 @@ static void handle_sigint(int sig)
 int main(int argc, char *argv[])
 {
        static char default_relay_path[] = "/relay";
-       struct stat st;
+       struct statfs st;
        int i, c;
        int stop_watch = 0;
        int act_mask_tmp = 0;
@@ -543,7 +683,7 @@ int main(int argc, char *argv[])
                case 'a':
                        i = find_mask_map(optarg);
                        if (i < 0) {
-                               fprintf(stderr,"Invalid action mask %s\n", 
+                               fprintf(stderr,"Invalid action mask %s\n",
                                        optarg);
                                return 1;
                        }
@@ -553,7 +693,7 @@ int main(int argc, char *argv[])
                case 'A':
                        if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
                                fprintf(stderr,
-                                       "Invalid set action mask %s/0x%x\n", 
+                                       "Invalid set action mask %s/0x%x\n",
                                        optarg, i);
                                return 1;
                        }
@@ -584,7 +724,26 @@ int main(int argc, char *argv[])
                                return 1;
                        }
                        break;
-
+               case 'v':
+                       printf("%s version %s\n", argv[0], blktrace_version);
+                       return 0;
+               case 'b':
+                       buf_size = atoi(optarg);
+                       if (buf_size <= 0) {
+                               fprintf(stderr,
+                                       "Invalid buffer size (%d)\n", buf_size);
+                               return 1;
+                       }
+                       buf_size <<= 10;
+                       break;
+               case 'n':
+                       buf_nr = atoi(optarg);
+                       if (buf_nr <= 0) {
+                               fprintf(stderr,
+                                       "Invalid buffer nr (%d)\n", buf_nr);
+                               return 1;
+                       }
+                       break;
                default:
                        show_usage(argv[0]);
                        return 1;
@@ -607,8 +766,13 @@ int main(int argc, char *argv[])
        if (act_mask_tmp != 0)
                act_mask = act_mask_tmp;
 
-       if (stat(relay_path, &st) < 0) {
-               fprintf(stderr,"%s does not appear to be mounted\n",
+       if (statfs(relay_path, &st) < 0) {
+               perror("statfs");
+               fprintf(stderr,"%s does not appear to be a valid path\n",
+                       relay_path);
+               return 1;
+       } else if (st.f_type != RELAYFS_TYPE) {
+               fprintf(stderr,"%s does not appear to be a relay filesystem\n",
                        relay_path);
                return 1;
        }