Fix io piece logging to not have O(n) runtime
authorJens Axboe <jens.axboe@oracle.com>
Mon, 26 Mar 2007 07:32:22 +0000 (09:32 +0200)
committerJens Axboe <jens.axboe@oracle.com>
Mon, 26 Mar 2007 07:32:22 +0000 (09:32 +0200)
Use an rbtree for that log instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Makefile
Makefile.FreeBSD
Makefile.solaris
fio.c
fio.h
log.c
rbtree.c [new file with mode: 0644]
rbtree.h [new file with mode: 0644]
verify.c

index c79d36b..10b4ffc 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,8 @@ CFLAGS  = -Wwrite-strings -Wall -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_
 PROGS  = fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
-       filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+       filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+       rbtree.o
 
 OBJS += engines/cpu.o
 OBJS += engines/libaio.o
index d5e675a..334395b 100644 (file)
@@ -3,7 +3,8 @@ CFLAGS  = -Wall -O2 -g -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 PROGS  = fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
-       filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+       filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+       rbtree.o
 
 OBJS += engines/cpu.o
 OBJS += engines/mmap.o
index bdb8274..f386041 100644 (file)
@@ -3,7 +3,8 @@ CFLAGS  = -Wall -O2 -g -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 PROGS  = fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
-       filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o
+       filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
+       rbtree.o
 
 OBJS += engines/cpu.o
 OBJS += engines/mmap.o
diff --git a/fio.c b/fio.c
index f6c3fc8..504b78d 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -735,7 +735,6 @@ static void *thread_main(void *data)
        INIT_LIST_HEAD(&td->io_u_freelist);
        INIT_LIST_HEAD(&td->io_u_busylist);
        INIT_LIST_HEAD(&td->io_u_requeues);
-       INIT_LIST_HEAD(&td->io_hist_list);
        INIT_LIST_HEAD(&td->io_log_list);
 
        if (init_io_u(td))
diff --git a/fio.h b/fio.h
index 736fefb..4111bff 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -14,6 +14,7 @@
 #include <getopt.h>
 
 #include "list.h"
+#include "rbtree.h"
 #include "md5.h"
 #include "crc32.h"
 #include "arch.h"
@@ -78,7 +79,10 @@ struct io_log {
  * When logging io actions, this matches a single sent io_u
  */
 struct io_piece {
-       struct list_head list;
+       union {
+               struct rb_node rb_node;
+               struct list_head list;
+       };
        struct fio_file *file;
        unsigned long long offset;
        unsigned long len;
@@ -510,7 +514,7 @@ struct thread_data {
        /*
         * IO historic logs
         */
-       struct list_head io_hist_list;
+       struct rb_root io_hist_tree;
        struct list_head io_log_list;
 
        /*
diff --git a/log.c b/log.c
index dbca3cc..2b90f45 100644 (file)
--- a/log.c
+++ b/log.c
@@ -29,11 +29,11 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u)
 void prune_io_piece_log(struct thread_data *td)
 {
        struct io_piece *ipo;
+       struct rb_node *n;
 
-       while (!list_empty(&td->io_hist_list)) {
-               ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
-
-               list_del(&ipo->list);
+       while ((n = rb_first(&td->io_hist_tree)) != NULL) {
+               ipo = rb_entry(n, struct io_piece, rb_node);
+               rb_erase(n, &td->io_hist_tree);
                free(ipo);
        }
 }
@@ -43,36 +43,33 @@ void prune_io_piece_log(struct thread_data *td)
  */
 void log_io_piece(struct thread_data *td, struct io_u *io_u)
 {
-       struct io_piece *ipo = malloc(sizeof(struct io_piece));
-       struct list_head *entry;
+       struct rb_node **p = &td->io_hist_tree.rb_node;
+       struct rb_node *parent = NULL;
+       struct io_piece *ipo, *__ipo;
 
-       INIT_LIST_HEAD(&ipo->list);
+       ipo = malloc(sizeof(struct io_piece));
+       memset(&ipo->rb_node, 0, sizeof(ipo->rb_node));
        ipo->file = io_u->file;
        ipo->offset = io_u->offset;
        ipo->len = io_u->buflen;
 
        /*
-        * for random io where the writes extend the file, it will typically
-        * be laid out with the block scattered as written. it's faster to
-        * read them in in that order again, so don't sort
-        */
-       if (!td_random(td) || !td->o.overwrite) {
-               list_add_tail(&ipo->list, &td->io_hist_list);
-               return;
-       }
-
-       /*
-        * for random io, sort the list so verify will run faster
+        * Sort the entry into the verification list
         */
-       entry = &td->io_hist_list;
-       while ((entry = entry->prev) != &td->io_hist_list) {
-               struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
-
-               if (__ipo->offset < ipo->offset)
+       while (*p) {
+               parent = *p;
+
+               __ipo = rb_entry(parent, struct io_piece, rb_node);
+               if (ipo->offset < __ipo->offset)
+                       p = &(*p)->rb_left;
+               else if (ipo->offset > __ipo->offset)
+                       p = &(*p)->rb_right;
+               else
                        break;
        }
 
-       list_add(&ipo->list, entry);
+       rb_link_node(&ipo->rb_node, parent, p);
+       rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
 }
 
 void write_iolog_close(struct thread_data *td)
diff --git a/rbtree.c b/rbtree.c
new file mode 100644 (file)
index 0000000..cc4093a
--- /dev/null
+++ b/rbtree.c
@@ -0,0 +1,363 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include "rbtree.h"
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+       struct rb_node *right = node->rb_right;
+
+       if ((node->rb_right = right->rb_left))
+               right->rb_left->rb_parent = node;
+       right->rb_left = node;
+
+       if ((right->rb_parent = node->rb_parent))
+       {
+               if (node == node->rb_parent->rb_left)
+                       node->rb_parent->rb_left = right;
+               else
+                       node->rb_parent->rb_right = right;
+       }
+       else
+               root->rb_node = right;
+       node->rb_parent = right;
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+       struct rb_node *left = node->rb_left;
+
+       if ((node->rb_left = left->rb_right))
+               left->rb_right->rb_parent = node;
+       left->rb_right = node;
+
+       if ((left->rb_parent = node->rb_parent))
+       {
+               if (node == node->rb_parent->rb_right)
+                       node->rb_parent->rb_right = left;
+               else
+                       node->rb_parent->rb_left = left;
+       }
+       else
+               root->rb_node = left;
+       node->rb_parent = left;
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+       struct rb_node *parent, *gparent;
+
+       while ((parent = node->rb_parent) && parent->rb_color == RB_RED)
+       {
+               gparent = parent->rb_parent;
+
+               if (parent == gparent->rb_left)
+               {
+                       {
+                               register struct rb_node *uncle = gparent->rb_right;
+                               if (uncle && uncle->rb_color == RB_RED)
+                               {
+                                       uncle->rb_color = RB_BLACK;
+                                       parent->rb_color = RB_BLACK;
+                                       gparent->rb_color = RB_RED;
+                                       node = gparent;
+                                       continue;
+                               }
+                       }
+
+                       if (parent->rb_right == node)
+                       {
+                               register struct rb_node *tmp;
+                               __rb_rotate_left(parent, root);
+                               tmp = parent;
+                               parent = node;
+                               node = tmp;
+                       }
+
+                       parent->rb_color = RB_BLACK;
+                       gparent->rb_color = RB_RED;
+                       __rb_rotate_right(gparent, root);
+               } else {
+                       {
+                               register struct rb_node *uncle = gparent->rb_left;
+                               if (uncle && uncle->rb_color == RB_RED)
+                               {
+                                       uncle->rb_color = RB_BLACK;
+                                       parent->rb_color = RB_BLACK;
+                                       gparent->rb_color = RB_RED;
+                                       node = gparent;
+                                       continue;
+                               }
+                       }
+
+                       if (parent->rb_left == node)
+                       {
+                               register struct rb_node *tmp;
+                               __rb_rotate_right(parent, root);
+                               tmp = parent;
+                               parent = node;
+                               node = tmp;
+                       }
+
+                       parent->rb_color = RB_BLACK;
+                       gparent->rb_color = RB_RED;
+                       __rb_rotate_left(gparent, root);
+               }
+       }
+
+       root->rb_node->rb_color = RB_BLACK;
+}
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+                            struct rb_root *root)
+{
+       struct rb_node *other;
+
+       while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node)
+       {
+               if (parent->rb_left == node)
+               {
+                       other = parent->rb_right;
+                       if (other->rb_color == RB_RED)
+                       {
+                               other->rb_color = RB_BLACK;
+                               parent->rb_color = RB_RED;
+                               __rb_rotate_left(parent, root);
+                               other = parent->rb_right;
+                       }
+                       if ((!other->rb_left ||
+                            other->rb_left->rb_color == RB_BLACK)
+                           && (!other->rb_right ||
+                               other->rb_right->rb_color == RB_BLACK))
+                       {
+                               other->rb_color = RB_RED;
+                               node = parent;
+                               parent = node->rb_parent;
+                       }
+                       else
+                       {
+                               if (!other->rb_right ||
+                                   other->rb_right->rb_color == RB_BLACK)
+                               {
+                                       register struct rb_node *o_left;
+                                       if ((o_left = other->rb_left))
+                                               o_left->rb_color = RB_BLACK;
+                                       other->rb_color = RB_RED;
+                                       __rb_rotate_right(other, root);
+                                       other = parent->rb_right;
+                               }
+                               other->rb_color = parent->rb_color;
+                               parent->rb_color = RB_BLACK;
+                               if (other->rb_right)
+                                       other->rb_right->rb_color = RB_BLACK;
+                               __rb_rotate_left(parent, root);
+                               node = root->rb_node;
+                               break;
+                       }
+               }
+               else
+               {
+                       other = parent->rb_left;
+                       if (other->rb_color == RB_RED)
+                       {
+                               other->rb_color = RB_BLACK;
+                               parent->rb_color = RB_RED;
+                               __rb_rotate_right(parent, root);
+                               other = parent->rb_left;
+                       }
+                       if ((!other->rb_left ||
+                            other->rb_left->rb_color == RB_BLACK)
+                           && (!other->rb_right ||
+                               other->rb_right->rb_color == RB_BLACK))
+                       {
+                               other->rb_color = RB_RED;
+                               node = parent;
+                               parent = node->rb_parent;
+                       }
+                       else
+                       {
+                               if (!other->rb_left ||
+                                   other->rb_left->rb_color == RB_BLACK)
+                               {
+                                       register struct rb_node *o_right;
+                                       if ((o_right = other->rb_right))
+                                               o_right->rb_color = RB_BLACK;
+                                       other->rb_color = RB_RED;
+                                       __rb_rotate_left(other, root);
+                                       other = parent->rb_left;
+                               }
+                               other->rb_color = parent->rb_color;
+                               parent->rb_color = RB_BLACK;
+                               if (other->rb_left)
+                                       other->rb_left->rb_color = RB_BLACK;
+                               __rb_rotate_right(parent, root);
+                               node = root->rb_node;
+                               break;
+                       }
+               }
+       }
+       if (node)
+               node->rb_color = RB_BLACK;
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+       struct rb_node *child, *parent;
+       int color;
+
+       if (!node->rb_left)
+               child = node->rb_right;
+       else if (!node->rb_right)
+               child = node->rb_left;
+       else
+       {
+               struct rb_node *old = node, *left;
+
+               node = node->rb_right;
+               while ((left = node->rb_left) != NULL)
+                       node = left;
+               child = node->rb_right;
+               parent = node->rb_parent;
+               color = node->rb_color;
+
+               if (child)
+                       child->rb_parent = parent;
+               if (parent)
+               {
+                       if (parent->rb_left == node)
+                               parent->rb_left = child;
+                       else
+                               parent->rb_right = child;
+               }
+               else
+                       root->rb_node = child;
+
+               if (node->rb_parent == old)
+                       parent = node;
+               node->rb_parent = old->rb_parent;
+               node->rb_color = old->rb_color;
+               node->rb_right = old->rb_right;
+               node->rb_left = old->rb_left;
+
+               if (old->rb_parent)
+               {
+                       if (old->rb_parent->rb_left == old)
+                               old->rb_parent->rb_left = node;
+                       else
+                               old->rb_parent->rb_right = node;
+               } else
+                       root->rb_node = node;
+
+               old->rb_left->rb_parent = node;
+               if (old->rb_right)
+                       old->rb_right->rb_parent = node;
+               goto color;
+       }
+
+       parent = node->rb_parent;
+       color = node->rb_color;
+
+       if (child)
+               child->rb_parent = parent;
+       if (parent)
+       {
+               if (parent->rb_left == node)
+                       parent->rb_left = child;
+               else
+                       parent->rb_right = child;
+       }
+       else
+               root->rb_node = child;
+
+ color:
+       if (color == RB_BLACK)
+               __rb_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(struct rb_root *root)
+{
+       struct rb_node  *n;
+
+       n = root->rb_node;
+       if (!n)
+               return NULL;
+       while (n->rb_left)
+               n = n->rb_left;
+       return n;
+}
+
+struct rb_node *rb_last(struct rb_root *root)
+{
+       struct rb_node  *n;
+
+       n = root->rb_node;
+       if (!n)
+               return NULL;
+       while (n->rb_right)
+               n = n->rb_right;
+       return n;
+}
+
+struct rb_node *rb_next(struct rb_node *node)
+{
+       /* If we have a right-hand child, go down and then left as far
+          as we can. */
+       if (node->rb_right) {
+               node = node->rb_right; 
+               while (node->rb_left)
+                       node=node->rb_left;
+               return node;
+       }
+
+       /* No right-hand children.  Everything down and left is
+          smaller than us, so any 'next' node must be in the general
+          direction of our parent. Go up the tree; any time the
+          ancestor is a right-hand child of its parent, keep going
+          up. First time it's a left-hand child of its parent, said
+          parent is our 'next' node. */
+       while (node->rb_parent && node == node->rb_parent->rb_right)
+               node = node->rb_parent;
+
+       return node->rb_parent;
+}
+
+struct rb_node *rb_prev(struct rb_node *node)
+{
+       /* If we have a left-hand child, go down and then right as far
+          as we can. */
+       if (node->rb_left) {
+               node = node->rb_left; 
+               while (node->rb_right)
+                       node=node->rb_right;
+               return node;
+       }
+
+       /* No left-hand children. Go up till we find an ancestor which
+          is a right-hand child of its parent */
+       while (node->rb_parent && node == node->rb_parent->rb_left)
+               node = node->rb_parent;
+
+       return node->rb_parent;
+}
diff --git a/rbtree.h b/rbtree.h
new file mode 100644 (file)
index 0000000..2cb9e37
--- /dev/null
+++ b/rbtree.h
@@ -0,0 +1,147 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+                                                unsigned long offset)
+{
+       struct rb_node * n = inode->i_rb_page_cache.rb_node;
+       struct page * page;
+
+       while (n)
+       {
+               page = rb_entry(n, struct page, rb_page_cache);
+
+               if (offset < page->offset)
+                       n = n->rb_left;
+               else if (offset > page->offset)
+                       n = n->rb_right;
+               else
+                       return page;
+       }
+       return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+                                                  unsigned long offset,
+                                                  struct rb_node * node)
+{
+       struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
+       struct rb_node * parent = NULL;
+       struct page * page;
+
+       while (*p)
+       {
+               parent = *p;
+               page = rb_entry(parent, struct page, rb_page_cache);
+
+               if (offset < page->offset)
+                       p = &(*p)->rb_left;
+               else if (offset > page->offset)
+                       p = &(*p)->rb_right;
+               else
+                       return page;
+       }
+
+       rb_link_node(node, parent, p);
+
+       return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+                                                unsigned long offset,
+                                                struct rb_node * node)
+{
+       struct page * ret;
+       if ((ret = __rb_insert_page_cache(inode, offset, node)))
+               goto out;
+       rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+       return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef        _LINUX_RBTREE_H
+#define        _LINUX_RBTREE_H
+
+#include <stdlib.h>
+
+struct rb_node
+{
+       struct rb_node *rb_parent;
+       int rb_color;
+#define        RB_RED          0
+#define        RB_BLACK        1
+       struct rb_node *rb_right;
+       struct rb_node *rb_left;
+};
+
+struct rb_root
+{
+       struct rb_node *rb_node;
+};
+
+#undef offsetof
+#ifdef __compiler_offsetof
+#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#define container_of(ptr, type, member) ({                      \
+       const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+       (type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define RB_ROOT        (struct rb_root) { NULL, }
+#define        rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(struct rb_node *);
+extern struct rb_node *rb_prev(struct rb_node *);
+extern struct rb_node *rb_first(struct rb_root *);
+extern struct rb_node *rb_last(struct rb_root *);
+
+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
+                               struct rb_node ** rb_link)
+{
+       node->rb_parent = parent;
+       node->rb_color = RB_RED;
+       node->rb_left = node->rb_right = NULL;
+
+       *rb_link = node;
+}
+
+#endif /* _LINUX_RBTREE_H */
index f748065..7fbb2e6 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -145,6 +145,7 @@ void populate_verify_io_u(struct thread_data *td, struct io_u *io_u)
 int get_next_verify(struct thread_data *td, struct io_u *io_u)
 {
        struct io_piece *ipo;
+       struct rb_node *n;
 
        /*
         * this io_u is from a requeue, we already filled the offsets
@@ -152,10 +153,11 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u)
        if (io_u->file)
                return 0;
 
-       if (!list_empty(&td->io_hist_list)) {
-               ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
+       n = rb_first(&td->io_hist_tree);
+       if (n) {
+               ipo = rb_entry(n, struct io_piece, rb_node);
 
-               list_del(&ipo->list);
+               rb_erase(n, &td->io_hist_tree);
 
                io_u->offset = ipo->offset;
                io_u->buflen = ipo->len;