graph: switch tooltip lookups to being range based in a prio tree
authorJens Axboe <axboe@kernel.dk>
Wed, 21 Mar 2012 16:17:45 +0000 (17:17 +0100)
committerJens Axboe <axboe@kernel.dk>
Wed, 21 Mar 2012 16:17:45 +0000 (17:17 +0100)
This cuts a lot of the CPU usage from browsing bigger graphs. Even
the normal graphs are typically cut in half.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Makefile
graph.c
lib/prio_tree.c [new file with mode: 0644]
lib/prio_tree.h [new file with mode: 0644]

index 063823d102435fb372c1b4feaa9b20b96ba63109..ddf257f3e6b145967774a70f7519e7bb43d23579 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \
                lib/num2str.c lib/ieee754.c $(wildcard crc/*.c) engines/cpu.c \
                engines/mmap.c engines/sync.c engines/null.c engines/net.c \
                memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
-               cconv.c
+               cconv.c lib/prio_tree.c
 
 ifeq ($(UNAME), Linux)
   SOURCE += diskutil.c fifo.c blktrace.c helpers.c cgroup.c trim.c \
diff --git a/graph.c b/graph.c
index d8f1ba45ab9df005a3c6266278fe7d09dd2c0e25..91ddc8937fa4ed53290742f1dc5a563266ec3b33 100644 (file)
--- a/graph.c
+++ b/graph.c
 
 #include "tickmarks.h"
 #include "graph.h"
+#include "flist.h"
+#include "lib/prio_tree.h"
+#include "gettime.h"
+struct thread_data;
+#include "time.h"
+
+/*
+ * Allowable difference to show tooltip
+ */
+#define TOOLTIP_DELTA  1.02
 
 struct xyvalue {
        double x, y;
-       int gx, gy;
 };
 
 struct graph_value {
        struct graph_value *next;
+       struct prio_tree_node node;
        char *tooltip;
        void *value;
 };
@@ -48,12 +58,18 @@ struct graph_label {
        struct graph_value *tail;
        struct graph_value *values;
        struct graph_label *next;
+       struct prio_tree_root prio_tree;
        double r, g, b;
        int value_count;
        unsigned int tooltip_count;
        struct graph *parent;
 };
 
+struct tick_value {
+       unsigned int offset;
+       double value;
+};
+
 struct graph {
        char *title;
        char *xtitle;
@@ -71,6 +87,13 @@ struct graph {
        double right_extra;     
        double top_extra;       
        double bottom_extra;    
+
+       double xtick_zero;
+       double xtick_delta;
+       double xtick_zero_val;
+       double ytick_zero;
+       double ytick_delta;
+       double ytick_zero_val;
 };
 
 void graph_set_size(struct graph *g, unsigned int xdim, unsigned int ydim)
@@ -342,6 +365,15 @@ static void graph_draw_x_ticks(struct graph *g, cairo_t *cr,
        for (i = 0; i < nticks; i++) {
                tx = (((tm[i].value) - minx) / (maxx - minx)) * (x2 - x1) + x1;
 
+               /*
+                * Update tick delta
+                */
+               if (!i) {
+                       g->xtick_zero = tx;
+                       g->xtick_zero_val = tm[0].value;
+               } else if (i == 1)
+                       g->xtick_delta = (tm[1].value - tm[0].value) / (tx - g->xtick_zero);
+
                /* really tx < yx || tx > x2, but protect against rounding */
                if (x1 - tx > 0.01 || tx - x2 > 0.01)
                        continue;
@@ -367,7 +399,6 @@ static void graph_draw_x_ticks(struct graph *g, cairo_t *cr,
                /* draw tickmark label */
                draw_centered_text(g, cr, tx, y2 * 1.04, 12.0, tm[i].string);
                cairo_stroke(cr);
-               
        }
 }
 
@@ -395,6 +426,15 @@ static double graph_draw_y_ticks(struct graph *g, cairo_t *cr,
        for (i = 0; i < nticks; i++) {
                ty = y2 - (((tm[i].value) - miny) / (maxy - miny)) * (y2 - y1);
 
+               /*
+                * Update tick delta
+                */
+               if (!i) {
+                       g->ytick_zero = ty;
+                       g->ytick_zero_val = tm[0].value;
+               } else if (i == 1)
+                       g->ytick_delta = (tm[1].value - tm[0].value) / (ty - g->ytick_zero);
+
                /* really ty < y1 || ty > y2, but protect against rounding */
                if (y1 - ty > 0.01 || ty - y2 > 0.01)
                        continue;
@@ -574,10 +614,9 @@ void line_graph_draw(struct graph *g, cairo_t *cr)
                first = 1;
                if (i->r < 0) /* invisible data */
                        continue;
+
                cairo_set_source_rgb(cr, i->r, i->g, i->b);
                for (j = i->values; j; j = j->next) {
-                       struct xyvalue *xy = j->value;
-
                        tx = ((getx(j) - gminx) / (gmaxx - gminx)) * (x2 - x1) + x1;
                        ty = y2 - ((gety(j) - gminy) / (gmaxy - gminy)) * (y2 - y1);
                        if (first) {
@@ -586,8 +625,6 @@ void line_graph_draw(struct graph *g, cairo_t *cr)
                        } else {
                                cairo_line_to(cr, tx, ty);
                        }
-                       xy->gx = tx;
-                       xy->gy = ty;
                }
                cairo_stroke(cr);
        }
@@ -597,15 +634,9 @@ skip_data:
 
 }
 
-static void gfree(void *f)
-{
-       if (f)
-               free(f);
-}
-
 static void setstring(char **str, const char *value)
 {
-       gfree(*str);
+       free(*str);
        *str = strdup(value);
 }
 
@@ -651,6 +682,7 @@ void graph_add_label(struct graph *bg, const char *label)
        else
                bg->tail->next = i;
        bg->tail = i;
+       INIT_PRIO_TREE_ROOT(&i->prio_tree);
 }
 
 static void graph_label_add_value(struct graph_label *i, void *value,
@@ -672,8 +704,21 @@ static void graph_label_add_value(struct graph_label *i, void *value,
        }
        i->tail = x;
        i->value_count++;
-       if (x->tooltip)
+
+       if (x->tooltip) {
+               double yval = gety(x);
+               double miny = yval / TOOLTIP_DELTA;
+               double maxy = yval * TOOLTIP_DELTA;
+
+               x->node.start = miny;
+               x->node.last = maxy;
+               if (x->node.last == x->node.start)
+                       x->node.last++;
+
+               prio_tree_insert(&i->prio_tree, &x->node);
+               printf("insert (x=%u,y=%u) range %lu-%lu (%s)\n", (int)getx(x), (int)gety(x), x->node.start, x->node.last, x->tooltip);
                i->tooltip_count++;
+       }
 
        if (i->parent->per_label_limit != -1 &&
                i->value_count > i->parent->per_label_limit) {
@@ -693,6 +738,7 @@ static void graph_label_add_value(struct graph_label *i, void *value,
                        i->values = i->values->next;
                        if (x->tooltip) {
                                free(x->tooltip);
+                               prio_tree_remove(&i->prio_tree, &x->node);
                                i->tooltip_count--;
                        }
                        free(x->value);
@@ -741,8 +787,8 @@ static void graph_free_values(struct graph_value *values)
 
        for (i = values; i; i = next) {
                next = i->next;
-               gfree(i->value);
-               gfree(i);
+               free(i->value);
+               free(i);
        }       
 }
 
@@ -753,7 +799,7 @@ static void graph_free_labels(struct graph_label *labels)
        for (i = labels; i; i = next) {
                next = i->next;
                graph_free_values(i->values);
-               gfree(i);
+               free(i);
        }       
 }
 
@@ -777,7 +823,7 @@ void graph_set_color(struct graph *gr, const char *label,
                if (g > 1.0)
                        g = 1.0;
                if (b > 1.0)
-                       b =1.0;
+                       b = 1.0;
        }
 
        for (i = gr->labels; i; i = i->next)
@@ -791,9 +837,9 @@ void graph_set_color(struct graph *gr, const char *label,
 
 void graph_free(struct graph *bg)
 {
-       gfree(bg->title);
-       gfree(bg->xtitle);
-       gfree(bg->ytitle);
+       free(bg->title);
+       free(bg->xtitle);
+       free(bg->ytitle);
        graph_free_labels(bg->labels);
 }
 
@@ -846,41 +892,66 @@ int graph_contains_xy(struct graph *g, int x, int y)
        return (x >= first_x && x <= last_x) && (y >= first_y && y <= last_y);
 }
 
-/*
- * Allowable difference to show tooltip
- */
-#define TOOLTIP_XDIFF  10
-#define TOOLTIP_YDIFF  10
-
-static int xy_match(struct xyvalue *xy, int x, int y)
+const char *graph_find_tooltip(struct graph *g, int ix, int iy)
 {
-       int xdiff = abs(xy->gx - x);
-       int ydiff = abs(xy->gy - y);
+       double x = ix, y = iy;
+       struct prio_tree_iter iter;
+       struct prio_tree_node *n;
+       struct graph_label *i;
+       struct graph_value *best = NULL;
+       double best_delta;
+       double maxx, minx;
 
-       return xdiff <= TOOLTIP_XDIFF && ydiff <= TOOLTIP_YDIFF;
-}
+       x -= g->xoffset;
+       y -= g->yoffset;
 
-const char *graph_find_tooltip(struct graph *g, int x, int y)
-{
-       struct graph_label *i;
-       struct graph_value *j;
+       x = g->xtick_zero_val + ((x - g->xtick_zero) * g->xtick_delta);
+       y = g->ytick_zero_val + ((y - g->ytick_zero) * g->ytick_delta);
 
-       for (i = g->labels; i; i = i->next) {
-               for (j = i->values; j; j = j->next) {
-                       struct xyvalue *xy = j->value;
-                       int graphx = x - g->xoffset;
+       maxx = x * TOOLTIP_DELTA;
+       minx = x / TOOLTIP_DELTA;
+       best_delta = UINT_MAX;
+       i = g->labels;
+       do {
+               prio_tree_iter_init(&iter, &i->prio_tree, y, y);
+
+               n = prio_tree_next(&iter);
+               if (!n)
+                       continue;
+
+               do {
+                       struct graph_value *v;
+                       double xval, xdiff;
+
+                       v = container_of(n, struct graph_value, node);
+                       xval = getx(v);
+
+                       if (xval > x)
+                               xdiff = xval - x;
+                       else
+                               xdiff = x - xval;
 
                        /*
-                        * Return match if close enough. Take advantage
-                        * of the X axis being monotonically increasing,
-                        * so we can break out if we exceed it.
+                        * zero delta, or within or match critera, break
                         */
-                       if (xy_match(xy, graphx, y))
-                               return j->tooltip;
-                       else if (xy->gx - graphx > TOOLTIP_XDIFF)
-                               break;
-               }
-       }
+                       if (xdiff < best_delta) {
+                               best_delta = xdiff;
+                               if (!best_delta ||
+                                   (xval >= minx && xval <= maxx)) {
+                                       best = v;
+                                       break;
+                               }
+                       }
+               } while ((n = prio_tree_next(&iter)) != NULL);
+
+               /*
+                * If we got matches in one label, don't check others.
+                */
+               break;
+       } while ((i = i->next) != NULL);
+
+       if (best)
+               return best->tooltip;
 
        return NULL;
 }
diff --git a/lib/prio_tree.c b/lib/prio_tree.c
new file mode 100644 (file)
index 0000000..b0e935c
--- /dev/null
@@ -0,0 +1,465 @@
+/*
+ * lib/prio_tree.c - priority search tree
+ *
+ * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
+ *
+ * This file is released under the GPL v2.
+ *
+ * Based on the radix priority search tree proposed by Edward M. McCreight
+ * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
+ *
+ * 02Feb2004   Initial version
+ */
+
+#include <stdlib.h>
+#include <limits.h>
+#include "../fio.h"
+#include "prio_tree.h"
+
+/*
+ * A clever mix of heap and radix trees forms a radix priority search tree (PST)
+ * which is useful for storing intervals, e.g, we can consider a vma as a closed
+ * interval of file pages [offset_begin, offset_end], and store all vmas that
+ * map a file in a PST. Then, using the PST, we can answer a stabbing query,
+ * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
+ * given input interval X (a set of consecutive file pages), in "O(log n + m)"
+ * time where 'log n' is the height of the PST, and 'm' is the number of stored
+ * intervals (vmas) that overlap (map) with the input interval X (the set of
+ * consecutive file pages).
+ *
+ * In our implementation, we store closed intervals of the form [radix_index,
+ * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
+ * is designed for storing intervals with unique radix indices, i.e., each
+ * interval have different radix_index. However, this limitation can be easily
+ * overcome by using the size, i.e., heap_index - radix_index, as part of the
+ * index, so we index the tree using [(radix_index,size), heap_index].
+ *
+ * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
+ * machine, the maximum height of a PST can be 64. We can use a balanced version
+ * of the priority search tree to optimize the tree height, but the balanced
+ * tree proposed by McCreight is too complex and memory-hungry for our purpose.
+ */
+
+static void get_index(const struct prio_tree_node *node,
+                     unsigned long *radix, unsigned long *heap)
+{
+       *radix = node->start;
+       *heap = node->last;
+}
+
+static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
+
+void fio_init prio_tree_init(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
+               index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
+       index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
+}
+
+/*
+ * Maximum heap_index that can be stored in a PST with index_bits bits
+ */
+static inline unsigned long prio_tree_maxindex(unsigned int bits)
+{
+       return index_bits_to_maxindex[bits - 1];
+}
+
+/*
+ * Extend a priority search tree so that it can store a node with heap_index
+ * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
+ * However, this function is used rarely and the common case performance is
+ * not bad.
+ */
+static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
+               struct prio_tree_node *node, unsigned long max_heap_index)
+{
+       struct prio_tree_node *first = NULL, *prev, *last = NULL;
+
+       if (max_heap_index > prio_tree_maxindex(root->index_bits))
+               root->index_bits++;
+
+       while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
+               root->index_bits++;
+
+               if (prio_tree_empty(root))
+                       continue;
+
+               if (first == NULL) {
+                       first = root->prio_tree_node;
+                       prio_tree_remove(root, root->prio_tree_node);
+                       INIT_PRIO_TREE_NODE(first);
+                       last = first;
+               } else {
+                       prev = last;
+                       last = root->prio_tree_node;
+                       prio_tree_remove(root, root->prio_tree_node);
+                       INIT_PRIO_TREE_NODE(last);
+                       prev->left = last;
+                       last->parent = prev;
+               }
+       }
+
+       INIT_PRIO_TREE_NODE(node);
+
+       if (first) {
+               node->left = first;
+               first->parent = node;
+       } else
+               last = node;
+
+       if (!prio_tree_empty(root)) {
+               last->left = root->prio_tree_node;
+               last->left->parent = last;
+       }
+
+       root->prio_tree_node = node;
+       return node;
+}
+
+/*
+ * Replace a prio_tree_node with a new node and return the old node
+ */
+struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
+               struct prio_tree_node *old, struct prio_tree_node *node)
+{
+       INIT_PRIO_TREE_NODE(node);
+
+       if (prio_tree_root(old)) {
+               assert(root->prio_tree_node == old);
+               /*
+                * We can reduce root->index_bits here. However, it is complex
+                * and does not help much to improve performance (IMO).
+                */
+               node->parent = node;
+               root->prio_tree_node = node;
+       } else {
+               node->parent = old->parent;
+               if (old->parent->left == old)
+                       old->parent->left = node;
+               else
+                       old->parent->right = node;
+       }
+
+       if (!prio_tree_left_empty(old)) {
+               node->left = old->left;
+               old->left->parent = node;
+       }
+
+       if (!prio_tree_right_empty(old)) {
+               node->right = old->right;
+               old->right->parent = node;
+       }
+
+       return old;
+}
+
+/*
+ * Insert a prio_tree_node @node into a radix priority search tree @root. The
+ * algorithm typically takes O(log n) time where 'log n' is the number of bits
+ * required to represent the maximum heap_index. In the worst case, the algo
+ * can take O((log n)^2) - check prio_tree_expand.
+ *
+ * If a prior node with same radix_index and heap_index is already found in
+ * the tree, then returns the address of the prior node. Otherwise, inserts
+ * @node into the tree and returns @node.
+ */
+struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
+               struct prio_tree_node *node)
+{
+       struct prio_tree_node *cur, *res = node;
+       unsigned long radix_index, heap_index;
+       unsigned long r_index, h_index, index, mask;
+       int size_flag = 0;
+
+       get_index(node, &radix_index, &heap_index);
+
+       if (prio_tree_empty(root) ||
+                       heap_index > prio_tree_maxindex(root->index_bits))
+               return prio_tree_expand(root, node, heap_index);
+
+       cur = root->prio_tree_node;
+       mask = 1UL << (root->index_bits - 1);
+
+       while (mask) {
+               get_index(cur, &r_index, &h_index);
+
+               if (r_index == radix_index && h_index == heap_index)
+                       return cur;
+
+                if (h_index < heap_index ||
+                   (h_index == heap_index && r_index > radix_index)) {
+                       struct prio_tree_node *tmp = node;
+                       node = prio_tree_replace(root, cur, node);
+                       cur = tmp;
+                       /* swap indices */
+                       index = r_index;
+                       r_index = radix_index;
+                       radix_index = index;
+                       index = h_index;
+                       h_index = heap_index;
+                       heap_index = index;
+               }
+
+               if (size_flag)
+                       index = heap_index - radix_index;
+               else
+                       index = radix_index;
+
+               if (index & mask) {
+                       if (prio_tree_right_empty(cur)) {
+                               INIT_PRIO_TREE_NODE(node);
+                               cur->right = node;
+                               node->parent = cur;
+                               return res;
+                       } else
+                               cur = cur->right;
+               } else {
+                       if (prio_tree_left_empty(cur)) {
+                               INIT_PRIO_TREE_NODE(node);
+                               cur->left = node;
+                               node->parent = cur;
+                               return res;
+                       } else
+                               cur = cur->left;
+               }
+
+               mask >>= 1;
+
+               if (!mask) {
+                       mask = 1UL << (BITS_PER_LONG - 1);
+                       size_flag = 1;
+               }
+       }
+       /* Should not reach here */
+       assert(0);
+       return NULL;
+}
+
+/*
+ * Remove a prio_tree_node @node from a radix priority search tree @root. The
+ * algorithm takes O(log n) time where 'log n' is the number of bits required
+ * to represent the maximum heap_index.
+ */
+void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
+{
+       struct prio_tree_node *cur;
+       unsigned long r_index, h_index_right, h_index_left;
+
+       cur = node;
+
+       while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
+               if (!prio_tree_left_empty(cur))
+                       get_index(cur->left, &r_index, &h_index_left);
+               else {
+                       cur = cur->right;
+                       continue;
+               }
+
+               if (!prio_tree_right_empty(cur))
+                       get_index(cur->right, &r_index, &h_index_right);
+               else {
+                       cur = cur->left;
+                       continue;
+               }
+
+               /* both h_index_left and h_index_right cannot be 0 */
+               if (h_index_left >= h_index_right)
+                       cur = cur->left;
+               else
+                       cur = cur->right;
+       }
+
+       if (prio_tree_root(cur)) {
+               assert(root->prio_tree_node == cur);
+               INIT_PRIO_TREE_ROOT(root);
+               return;
+       }
+
+       if (cur->parent->right == cur)
+               cur->parent->right = cur->parent;
+       else
+               cur->parent->left = cur->parent;
+
+       while (cur != node)
+               cur = prio_tree_replace(root, cur->parent, cur);
+}
+
+/*
+ * Following functions help to enumerate all prio_tree_nodes in the tree that
+ * overlap with the input interval X [radix_index, heap_index]. The enumeration
+ * takes O(log n + m) time where 'log n' is the height of the tree (which is
+ * proportional to # of bits required to represent the maximum heap_index) and
+ * 'm' is the number of prio_tree_nodes that overlap the interval X.
+ */
+
+static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
+               unsigned long *r_index, unsigned long *h_index)
+{
+       if (prio_tree_left_empty(iter->cur))
+               return NULL;
+
+       get_index(iter->cur->left, r_index, h_index);
+
+       if (iter->r_index <= *h_index) {
+               iter->cur = iter->cur->left;
+               iter->mask >>= 1;
+               if (iter->mask) {
+                       if (iter->size_level)
+                               iter->size_level++;
+               } else {
+                       if (iter->size_level) {
+                               assert(prio_tree_left_empty(iter->cur));
+                               assert(prio_tree_right_empty(iter->cur));
+                               iter->size_level++;
+                               iter->mask = ULONG_MAX;
+                       } else {
+                               iter->size_level = 1;
+                               iter->mask = 1UL << (BITS_PER_LONG - 1);
+                       }
+               }
+               return iter->cur;
+       }
+
+       return NULL;
+}
+
+static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
+               unsigned long *r_index, unsigned long *h_index)
+{
+       unsigned long value;
+
+       if (prio_tree_right_empty(iter->cur))
+               return NULL;
+
+       if (iter->size_level)
+               value = iter->value;
+       else
+               value = iter->value | iter->mask;
+
+       if (iter->h_index < value)
+               return NULL;
+
+       get_index(iter->cur->right, r_index, h_index);
+
+       if (iter->r_index <= *h_index) {
+               iter->cur = iter->cur->right;
+               iter->mask >>= 1;
+               iter->value = value;
+               if (iter->mask) {
+                       if (iter->size_level)
+                               iter->size_level++;
+               } else {
+                       if (iter->size_level) {
+                               assert(prio_tree_left_empty(iter->cur));
+                               assert(prio_tree_right_empty(iter->cur));
+                               iter->size_level++;
+                               iter->mask = ULONG_MAX;
+                       } else {
+                               iter->size_level = 1;
+                               iter->mask = 1UL << (BITS_PER_LONG - 1);
+                       }
+               }
+               return iter->cur;
+       }
+
+       return NULL;
+}
+
+static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
+{
+       iter->cur = iter->cur->parent;
+       if (iter->mask == ULONG_MAX)
+               iter->mask = 1UL;
+       else if (iter->size_level == 1)
+               iter->mask = 1UL;
+       else
+               iter->mask <<= 1;
+       if (iter->size_level)
+               iter->size_level--;
+       if (!iter->size_level && (iter->value & iter->mask))
+               iter->value ^= iter->mask;
+       return iter->cur;
+}
+
+static inline int overlap(struct prio_tree_iter *iter,
+               unsigned long r_index, unsigned long h_index)
+{
+       return iter->h_index >= r_index && iter->r_index <= h_index;
+}
+
+/*
+ * prio_tree_first:
+ *
+ * Get the first prio_tree_node that overlaps with the interval [radix_index,
+ * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
+ * traversal of the tree.
+ */
+static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter)
+{
+       struct prio_tree_root *root;
+       unsigned long r_index, h_index;
+
+       INIT_PRIO_TREE_ITER(iter);
+
+       root = iter->root;
+       if (prio_tree_empty(root))
+               return NULL;
+
+       get_index(root->prio_tree_node, &r_index, &h_index);
+
+       if (iter->r_index > h_index)
+               return NULL;
+
+       iter->mask = 1UL << (root->index_bits - 1);
+       iter->cur = root->prio_tree_node;
+
+       while (1) {
+               if (overlap(iter, r_index, h_index))
+                       return iter->cur;
+
+               if (prio_tree_left(iter, &r_index, &h_index))
+                       continue;
+
+               if (prio_tree_right(iter, &r_index, &h_index))
+                       continue;
+
+               break;
+       }
+       return NULL;
+}
+
+/*
+ * prio_tree_next:
+ *
+ * Get the next prio_tree_node that overlaps with the input interval in iter
+ */
+struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter)
+{
+       unsigned long r_index, h_index;
+
+       if (iter->cur == NULL)
+               return prio_tree_first(iter);
+
+repeat:
+       while (prio_tree_left(iter, &r_index, &h_index))
+               if (overlap(iter, r_index, h_index))
+                       return iter->cur;
+
+       while (!prio_tree_right(iter, &r_index, &h_index)) {
+               while (!prio_tree_root(iter->cur) &&
+                               iter->cur->parent->right == iter->cur)
+                       prio_tree_parent(iter);
+
+               if (prio_tree_root(iter->cur))
+                       return NULL;
+
+               prio_tree_parent(iter);
+       }
+
+       if (overlap(iter, r_index, h_index))
+               return iter->cur;
+
+       goto repeat;
+}
diff --git a/lib/prio_tree.h b/lib/prio_tree.h
new file mode 100644 (file)
index 0000000..e1491db
--- /dev/null
@@ -0,0 +1,90 @@
+#ifndef _LINUX_PRIO_TREE_H
+#define _LINUX_PRIO_TREE_H
+
+#include <inttypes.h>
+#include "../hash.h"
+
+struct prio_tree_node {
+       struct prio_tree_node   *left;
+       struct prio_tree_node   *right;
+       struct prio_tree_node   *parent;
+       uint64_t                start;
+       uint64_t                last;   /* last location _in_ interval */
+};
+
+struct prio_tree_root {
+       struct prio_tree_node   *prio_tree_node;
+       unsigned short          index_bits;
+};
+
+struct prio_tree_iter {
+       struct prio_tree_node   *cur;
+       unsigned long           mask;
+       unsigned long           value;
+       int                     size_level;
+
+       struct prio_tree_root   *root;
+       uint64_t                r_index;
+       uint64_t                h_index;
+};
+
+static inline void prio_tree_iter_init(struct prio_tree_iter *iter,
+               struct prio_tree_root *root, uint64_t r_index, uint64_t h_index)
+{
+       iter->root = root;
+       iter->r_index = r_index;
+       iter->h_index = h_index;
+       iter->cur = NULL;
+}
+
+#define INIT_PRIO_TREE_ROOT(ptr)       \
+do {                                   \
+       (ptr)->prio_tree_node = NULL;   \
+       (ptr)->index_bits = 1;          \
+} while (0)
+
+#define INIT_PRIO_TREE_NODE(ptr)                               \
+do {                                                           \
+       (ptr)->left = (ptr)->right = (ptr)->parent = (ptr);     \
+} while (0)
+
+#define INIT_PRIO_TREE_ITER(ptr)       \
+do {                                   \
+       (ptr)->cur = NULL;              \
+       (ptr)->mask = 0UL;              \
+       (ptr)->value = 0UL;             \
+       (ptr)->size_level = 0;          \
+} while (0)
+
+#define prio_tree_entry(ptr, type, member) \
+       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+static inline int prio_tree_empty(const struct prio_tree_root *root)
+{
+       return root->prio_tree_node == NULL;
+}
+
+static inline int prio_tree_root(const struct prio_tree_node *node)
+{
+       return node->parent == node;
+}
+
+static inline int prio_tree_left_empty(const struct prio_tree_node *node)
+{
+       return node->left == node;
+}
+
+static inline int prio_tree_right_empty(const struct prio_tree_node *node)
+{
+       return node->right == node;
+}
+
+
+struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
+                struct prio_tree_node *old, struct prio_tree_node *node);
+struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
+                struct prio_tree_node *node);
+void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node);
+struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter);
+
+#endif /* _LINUX_PRIO_TREE_H */