Rework file random map
authorJens Axboe <axboe@kernel.dk>
Thu, 22 Nov 2012 12:50:29 +0000 (13:50 +0100)
committerJens Axboe <axboe@kernel.dk>
Thu, 22 Nov 2012 12:50:29 +0000 (13:50 +0100)
Fio slows down at the end of a random IO run, when the random
map is used and it gets fuller. This causes slowdowns in
IOPS. This is largely due to the file random map being an
array of bits, and with random access to single bits of the
array at the time, locality is awful. The effect is observable
throughout a run, though, where it gradually gets slower and
slower. It just becomes more apparent at near the end of the
run, where the last 10% are fairly bad. This is even with
doing a bunch of tricks to reduce that cost.

Implement an N-level bitmap, where layer N uses a single bit
to represent 32/64-bits at layer N-1. The number of layers
depends on the number of blocks.

This has slightly higher overhead initially in theory, in
practice it performs about the same. As a bonus, the throughput
remains constant during the run, and even becomes faster
near the end.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Makefile
file.h
filesetup.c
io_ddir.h
io_u.c
lib/bitmap.c [new file with mode: 0644]
lib/bitmap.h [new file with mode: 0644]

index b68ab0619cbb8796edff6f8052f9926cf62e50c3..70309147e0d89a220fa4ce4ad1812c193eda19cf 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ SOURCE := gettime.c fio.c ioengines.c init.c stat.c log.c time.c filesetup.c \
                lib/num2str.c lib/ieee754.c $(wildcard crc/*.c) engines/cpu.c \
                engines/mmap.c engines/sync.c engines/null.c engines/net.c \
                memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
-               json.c lib/zipf.c gettime-thread.c
+               json.c lib/zipf.c lib/bitmap.c gettime-thread.c
 
 ifeq ($(UNAME), Linux)
   SOURCE += diskutil.c fifo.c blktrace.c helpers.c cgroup.c trim.c \
diff --git a/file.h b/file.h
index 38e9d0d43003c466ae13defebea219db8057b7c1..ce92ff75305dda0438ec041dcfa7f37c6fb5ce83 100644 (file)
--- a/file.h
+++ b/file.h
@@ -6,6 +6,7 @@
 #include "io_ddir.h"
 #include "flist.h"
 #include "lib/zipf.h"
+#include "lib/bitmap.h"
 
 /*
  * The type of object we are working on
@@ -108,10 +109,7 @@ struct fio_file {
        /*
         * block map for random io
         */
-       unsigned long *file_map;
-       unsigned long num_maps;
-       unsigned long last_free_lookup;
-       unsigned failed_rands;
+       struct bitmap *io_bitmap;
 
        /*
         * Used for zipf random distribution
@@ -177,13 +175,11 @@ extern void free_release_files(struct thread_data *);
 
 static inline void fio_file_reset(struct fio_file *f)
 {
-       f->last_free_lookup = 0;
-       f->failed_rands = 0;
        f->last_pos = f->file_offset;
        f->last_start = -1ULL;
        f->file_pos = -1ULL;
-       if (f->file_map)
-               memset(f->file_map, 0, f->num_maps * sizeof(unsigned long));
+       if (f->io_bitmap)
+               bitmap_reset(f->io_bitmap);
 }
 
 #endif
index c488eb471caa8a2d442c8cf58a1c203ff18309e7..c96c2502a46bdec4494d2141cd5a2078a75d2014 100644 (file)
@@ -13,6 +13,7 @@
 #include "filehash.h"
 #include "os/os.h"
 #include "hash.h"
+#include "lib/bitmap.h"
 
 #ifdef FIO_HAVE_LINUX_FALLOCATE
 #include <linux/falloc.h>
@@ -904,7 +905,7 @@ static int init_rand_distribution(struct thread_data *td)
 
 int init_random_map(struct thread_data *td)
 {
-       unsigned long long blocks, num_maps;
+       unsigned long long blocks;
        struct fio_file *f;
        unsigned int i;
 
@@ -916,16 +917,9 @@ int init_random_map(struct thread_data *td)
        for_each_file(td, f, i) {
                blocks = (f->real_file_size + td->o.rw_min_bs - 1) /
                                (unsigned long long) td->o.rw_min_bs;
-               num_maps = (blocks + BLOCKS_PER_MAP - 1) /
-                               (unsigned long long) BLOCKS_PER_MAP;
-               if (num_maps == (unsigned long) num_maps) {
-                       f->file_map = smalloc(num_maps * sizeof(unsigned long));
-                       if (f->file_map) {
-                               f->num_maps = num_maps;
-                               continue;
-                       }
-               } else
-                       f->file_map = NULL;
+               f->io_bitmap = bitmap_new(blocks);
+               if (f->io_bitmap)
+                       continue;
 
                if (!td->o.softrandommap) {
                        log_err("fio: failed allocating random map. If running"
@@ -937,7 +931,6 @@ int init_random_map(struct thread_data *td)
 
                log_info("fio: file %s failed allocating random map. Running "
                         "job without.\n", f->file_name);
-               f->num_maps = 0;
        }
 
        return 0;
@@ -974,8 +967,8 @@ void close_and_free_files(struct thread_data *td)
 
                sfree(f->file_name);
                f->file_name = NULL;
-               sfree(f->file_map);
-               f->file_map = NULL;
+               bitmap_free(f->io_bitmap);
+               f->io_bitmap = NULL;
                sfree(f);
        }
 
index df5abbbc1c45322e0625f0e77bf614ca25f8424d..d13c6b1ed6bbf7c23bf68760b71decd40e323e1e 100644 (file)
--- a/io_ddir.h
+++ b/io_ddir.h
@@ -30,7 +30,7 @@ enum td_ddir {
 #define td_trim(td)            ((td)->o.td_ddir & TD_DDIR_TRIM)
 #define td_rw(td)              (((td)->o.td_ddir & TD_DDIR_RW) == TD_DDIR_RW)
 #define td_random(td)          ((td)->o.td_ddir & TD_DDIR_RAND)
-#define file_randommap(td, f)  (!(td)->o.norandommap && (f)->file_map)
+#define file_randommap(td, f)  (!(td)->o.norandommap && (f)->io_bitmap)
 
 static inline int ddir_sync(enum fio_ddir ddir)
 {
diff --git a/io_u.c b/io_u.c
index d81fefdeefe2269e2be9df49d976cc7c0c801c22..d6816068063e9a4be50998fd21c6fcd64d1ae49e 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -10,6 +10,7 @@
 #include "verify.h"
 #include "trim.h"
 #include "lib/rand.h"
+#include "lib/bitmap.h"
 
 struct io_completion_data {
        int nr;                         /* input */
@@ -20,17 +21,12 @@ struct io_completion_data {
 };
 
 /*
- * The ->file_map[] contains a map of blocks we have or have not done io
+ * The ->io_bitmap contains a map of blocks we have or have not done io
  * to yet. Used to make sure we cover the entire range in a fair fashion.
  */
 static int random_map_free(struct fio_file *f, const unsigned long long block)
 {
-       unsigned int idx = RAND_MAP_IDX(f, block);
-       unsigned int bit = RAND_MAP_BIT(f, block);
-
-       dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit);
-
-       return (f->file_map[idx] & (1UL << bit)) == 0;
+       return !bitmap_isset(f->io_bitmap, block);
 }
 
 /*
@@ -41,61 +37,15 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u)
        unsigned int min_bs = td->o.rw_min_bs;
        struct fio_file *f = io_u->file;
        unsigned long long block;
-       unsigned int blocks, nr_blocks;
-       int busy_check;
+       unsigned int nr_blocks;
 
        block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs;
        nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
-       blocks = 0;
-       busy_check = !(io_u->flags & IO_U_F_BUSY_OK);
-
-       while (nr_blocks) {
-               unsigned int idx, bit;
-               unsigned long mask, this_blocks;
-
-               /*
-                * If we have a mixed random workload, we may
-                * encounter blocks we already did IO to.
-                */
-               if (!busy_check) {
-                       blocks = nr_blocks;
-                       break;
-               }
-               if ((td->o.ddir_seq_nr == 1) && !random_map_free(f, block))
-                       break;
-
-               idx = RAND_MAP_IDX(f, block);
-               bit = RAND_MAP_BIT(f, block);
-
-               fio_assert(td, idx < f->num_maps);
-
-               this_blocks = nr_blocks;
-               if (this_blocks + bit > BLOCKS_PER_MAP)
-                       this_blocks = BLOCKS_PER_MAP - bit;
-
-               do {
-                       if (this_blocks == BLOCKS_PER_MAP)
-                               mask = -1UL;
-                       else
-                               mask = ((1UL << this_blocks) - 1) << bit;
-       
-                       if (!(f->file_map[idx] & mask))
-                               break;
 
-                       this_blocks--;
-               } while (this_blocks);
+       nr_blocks = bitmap_set_nr(f->io_bitmap, block, nr_blocks);
 
-               if (!this_blocks)
-                       break;
-
-               f->file_map[idx] |= mask;
-               nr_blocks -= this_blocks;
-               blocks += this_blocks;
-               block += this_blocks;
-       }
-
-       if ((blocks * min_bs) < io_u->buflen)
-               io_u->buflen = blocks * min_bs;
+       if ((nr_blocks * min_bs) < io_u->buflen)
+               io_u->buflen = nr_blocks * min_bs;
 }
 
 static unsigned long long last_block(struct thread_data *td, struct fio_file *f,
@@ -123,113 +73,46 @@ static unsigned long long last_block(struct thread_data *td, struct fio_file *f,
        return max_blocks;
 }
 
-/*
- * Return the next free block in the map.
- */
-static int get_next_free_block(struct thread_data *td, struct fio_file *f,
-                              enum fio_ddir ddir, unsigned long long *b)
-{
-       unsigned long long block, min_bs = td->o.rw_min_bs, lastb;
-       int i;
-
-       lastb = last_block(td, f, ddir);
-       if (!lastb)
-               return 1;
-
-       i = f->last_free_lookup;
-       block = i * BLOCKS_PER_MAP;
-       while (block * min_bs < f->real_file_size &&
-               block * min_bs < f->io_size) {
-               if (f->file_map[i] != -1UL) {
-                       block += ffz(f->file_map[i]);
-                       if (block > lastb)
-                               break;
-                       f->last_free_lookup = i;
-                       *b = block;
-                       return 0;
-               }
-
-               block += BLOCKS_PER_MAP;
-               i++;
-       }
-
-       dprint(FD_IO, "failed finding a free block\n");
-       return 1;
-}
-
 static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
                                  enum fio_ddir ddir, unsigned long long *b)
 {
        unsigned long long rmax, r, lastb;
-       int loops = 5;
 
        lastb = last_block(td, f, ddir);
        if (!lastb)
                return 1;
 
-       if (f->failed_rands >= 200)
-               goto ffz;
-
        rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX;
-       do {
-               if (td->o.use_os_rand)
-                       r = os_random_long(&td->random_state);
-               else
-                       r = __rand(&td->__random_state);
-
-               *b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0));
-
-               dprint(FD_RANDOM, "off rand %llu\n", r);
-
-
-               /*
-                * if we are not maintaining a random map, we are done.
-                */
-               if (!file_randommap(td, f))
-                       goto ret_good;
 
-               /*
-                * calculate map offset and check if it's free
-                */
-               if (random_map_free(f, *b))
-                       goto ret_good;
+       if (td->o.use_os_rand) {
+               rmax = OS_RAND_MAX;
+               r = os_random_long(&td->random_state);
+       } else {
+               rmax = FRAND_MAX;
+               r = __rand(&td->__random_state);
+       }
 
-               dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n",
-                                                                       *b);
-       } while (--loops);
+       *b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0));
 
-       if (!f->failed_rands++)
-               f->last_free_lookup = 0;
+       dprint(FD_RANDOM, "off rand %llu\n", r);
 
        /*
-        * we get here, if we didn't suceed in looking up a block. generate
-        * a random start offset into the filemap, and find the first free
-        * block from there.
+        * if we are not maintaining a random map, we are done.
         */
-       loops = 10;
-       do {
-               f->last_free_lookup = (f->num_maps - 1) *
-                                       (r / ((unsigned long long) rmax + 1.0));
-               if (!get_next_free_block(td, f, ddir, b))
-                       goto ret;
-
-               if (td->o.use_os_rand)
-                       r = os_random_long(&td->random_state);
-               else
-                       r = __rand(&td->__random_state);
-       } while (--loops);
+       if (!file_randommap(td, f))
+               goto ret;
 
        /*
-        * that didn't work either, try exhaustive search from the start
+        * calculate map offset and check if it's free
         */
-       f->last_free_lookup = 0;
-ffz:
-       if (!get_next_free_block(td, f, ddir, b))
-               return 0;
-       f->last_free_lookup = 0;
-       return get_next_free_block(td, f, ddir, b);
-ret_good:
-       f->failed_rands = 0;
+       if (random_map_free(f, *b))
+               goto ret;
+
+       dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", *b);
+
+       *b = bitmap_next_free(f->io_bitmap, *b);
+       if (*b == (uint64_t) -1ULL)
+               return 1;
 ret:
        return 0;
 }
@@ -347,7 +230,8 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u,
                else if (b != -1ULL)
                        io_u->offset = b * td->o.ba[ddir];
                else {
-                       log_err("fio: bug in offset generation\n");
+                       log_err("fio: bug in offset generation: offset=%llu, b=%llu\n",
+                                                               offset, b);
                        ret = 1;
                }
        }
diff --git a/lib/bitmap.c b/lib/bitmap.c
new file mode 100644 (file)
index 0000000..a88b680
--- /dev/null
@@ -0,0 +1,382 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "../arch/arch.h"
+#include "bitmap.h"
+#include "../smalloc.h"
+#include "../minmax.h"
+
+#if BITS_PER_LONG == 64
+#define UNIT_SIZE              8
+#define UNIT_SHIFT             6
+#elif BITS_PER_LONG == 32
+#define UNIT_SIZE              4
+#define UNIT_SHIFT             5
+#else
+#error "Number of arch bits unknown"
+#endif
+
+#define BLOCKS_PER_UNIT                (1UL << UNIT_SHIFT)
+#define BLOCKS_PER_UNIT_MASK   (BLOCKS_PER_UNIT - 1)
+
+#define firstfree_valid(b)     ((b)->first_free != (uint64_t) -1)
+
+struct bitmap_level {
+       int level;
+       unsigned long map_size;
+       unsigned long *map;
+};
+
+struct bitmap {
+       unsigned int nr_levels;
+       struct bitmap_level *levels;
+       uint64_t first_free;
+};
+
+static unsigned long ulog64(unsigned long val, unsigned int log)
+{
+       while (log-- && val)
+               val >>= UNIT_SHIFT;
+
+       return val;
+}
+
+void bitmap_reset(struct bitmap *bitmap)
+{
+       int i;
+
+       for (i = 0; i < bitmap->nr_levels; i++) {
+               struct bitmap_level *bl = &bitmap->levels[i];
+
+               memset(bl->map, 0, bl->map_size * UNIT_SIZE);
+       }
+}
+
+void bitmap_free(struct bitmap *bitmap)
+{
+       unsigned int i;
+
+       if (!bitmap)
+               return;
+
+       for (i = 0; i < bitmap->nr_levels; i++) {
+#if 0
+               unsigned long j;
+
+               for (j = 0; j < bitmap->levels[i].map_size; j++) {
+                       if (bitmap->levels[i].map[j] != ~0UL)
+                               printf("L%d: %.5ld=%.16lx\n", i, j, bitmap->levels[i].map[j]);
+               }
+#endif
+
+               sfree(bitmap->levels[i].map);
+       }
+
+       sfree(bitmap->levels);
+       sfree(bitmap);
+}
+
+struct bitmap *bitmap_new(unsigned long nr_bits)
+{
+       struct bitmap *bitmap;
+       unsigned int i, levels;
+
+       bitmap = smalloc(sizeof(*bitmap));
+       if (!bitmap)
+               return NULL;
+
+       levels = 1;
+       i = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+       while (i > 1) {
+               i = (i + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+               levels++;
+       }
+
+       bitmap->nr_levels = levels;
+       bitmap->levels = smalloc(bitmap->nr_levels * sizeof(struct bitmap_level));
+       bitmap->first_free = 0;
+
+       for (i = 0; i < bitmap->nr_levels; i++) {
+               struct bitmap_level *bl = &bitmap->levels[i];
+
+               bl->level = i;
+               bl->map_size = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+               bl->map = smalloc(bl->map_size << UNIT_SHIFT);
+               if (!bl->map)
+                       goto err;
+
+               nr_bits = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+       }
+
+       bitmap_reset(bitmap);
+       return bitmap;
+err:
+       for (i = 0; i < bitmap->nr_levels; i++)
+               if (bitmap->levels[i].map)
+                       sfree(bitmap->levels[i].map);
+
+       sfree(bitmap->levels);
+       return NULL;
+}
+
+static int bitmap_handler(struct bitmap *bitmap, uint64_t bit_nr,
+                         int (*func)(struct bitmap_level *, unsigned long, unsigned int,
+                         void *), void *data)
+{
+       struct bitmap_level *bl;
+       int i;
+
+       for (i = 0; i < bitmap->nr_levels; i++) {
+               unsigned long index = ulog64(bit_nr, i);
+               unsigned long offset = index >> UNIT_SHIFT;
+               unsigned int bit = index & BLOCKS_PER_UNIT_MASK;
+
+               bl = &bitmap->levels[i];
+
+               if (func(bl, offset, bit, data))
+                       return 1;
+       }
+
+       return 0;
+}
+
+static int bitmap_handler_topdown(struct bitmap *bitmap, uint64_t bit_nr,
+       int (*func)(struct bitmap_level *, unsigned long, unsigned int, void *),
+       void *data)
+{
+       struct bitmap_level *bl;
+       int i, level = bitmap->nr_levels;
+
+       for (i = bitmap->nr_levels - 1; i >= 0; i--) {
+               unsigned long index = ulog64(bit_nr, --level);
+               unsigned long offset = index >> UNIT_SHIFT;
+               unsigned int bit = index & BLOCKS_PER_UNIT_MASK;
+
+               bl = &bitmap->levels[i];
+
+               if (func(bl, offset, bit, data))
+                       return 1;
+       }
+
+       return 0;
+}
+
+static int bitmap_clear_fn(struct bitmap_level *bl, unsigned long offset,
+                          unsigned int bit, void *unused)
+{
+       if (!(bl->map[offset] & (1UL << bit)))
+               return 1;
+
+       bl->map[offset] &= ~(1UL << bit);
+       return 0;
+}
+
+void bitmap_clear(struct bitmap *bitmap, uint64_t bit_nr)
+{
+       bitmap_handler(bitmap, bit_nr, bitmap_clear_fn, NULL);
+}
+
+struct bitmap_set_data {
+       unsigned int nr_bits;
+       unsigned int set_bits;
+       unsigned int fail_ok;
+};
+
+static unsigned long bit_masks[] = {
+       0x0000000000000000, 0x0000000000000001, 0x0000000000000003, 0x0000000000000007,
+       0x000000000000000f, 0x000000000000001f, 0x000000000000003f, 0x000000000000007f,
+       0x00000000000000ff, 0x00000000000001ff, 0x00000000000003ff, 0x00000000000007ff,
+       0x0000000000000fff, 0x0000000000001fff, 0x0000000000003fff, 0x0000000000007fff,
+       0x000000000000ffff, 0x000000000001ffff, 0x000000000003ffff, 0x000000000007ffff,
+       0x00000000000fffff, 0x00000000001fffff, 0x00000000003fffff, 0x00000000007fffff,
+       0x0000000000ffffff, 0x0000000001ffffff, 0x0000000003ffffff, 0x0000000007ffffff,
+       0x000000000fffffff, 0x000000001fffffff, 0x000000003fffffff, 0x000000007fffffff,
+       0x00000000ffffffff, 0x00000001ffffffff, 0x00000003ffffffff, 0x00000007ffffffff,
+       0x0000000fffffffff, 0x0000001fffffffff, 0x0000003fffffffff, 0x0000007fffffffff,
+       0x000000ffffffffff, 0x000001ffffffffff, 0x000003ffffffffff, 0x000007ffffffffff,
+       0x00000fffffffffff, 0x00001fffffffffff, 0x00003fffffffffff, 0x00007fffffffffff,
+       0x0000ffffffffffff, 0x0001ffffffffffff, 0x0003ffffffffffff, 0x0007ffffffffffff,
+       0x000fffffffffffff, 0x001fffffffffffff, 0x003fffffffffffff, 0x007fffffffffffff,
+       0x00ffffffffffffff, 0x01ffffffffffffff, 0x03ffffffffffffff, 0x07ffffffffffffff,
+       0x0fffffffffffffff, 0x1fffffffffffffff, 0x3fffffffffffffff, 0x7fffffffffffffff,
+       0xffffffffffffffff };
+
+static int bitmap_set_fn(struct bitmap_level *bl, unsigned long offset,
+                        unsigned int bit, void *__data)
+{
+       struct bitmap_set_data *data = __data;
+       unsigned long mask, overlap;
+       unsigned int nr_bits;
+
+       nr_bits = min(data->nr_bits, BLOCKS_PER_UNIT - bit);
+
+       mask = bit_masks[nr_bits] << bit;
+
+       /*
+        * Mask off any potential overlap, only sets contig regions
+        */
+       overlap = bl->map[offset] & mask;
+       if (overlap == mask) {
+               assert(data->fail_ok);
+               return 1;
+       }
+
+       while (overlap) {
+               unsigned long clear_mask = ~(1UL << ffz(~overlap));
+
+               mask &= clear_mask;
+               overlap &= clear_mask;
+               nr_bits--;
+       }
+
+       assert(mask);
+       assert(!(bl->map[offset] & mask));
+               
+       bl->map[offset] |= mask;
+
+       if (!bl->level)
+               data->set_bits = nr_bits;
+
+       data->nr_bits = 1;
+       return bl->map[offset] != -1UL;
+}
+
+static void __bitmap_set(struct bitmap *bitmap, uint64_t bit_nr,
+                        struct bitmap_set_data *data)
+{
+       unsigned int set_bits, nr_bits = data->nr_bits;
+
+       if (bitmap->first_free >= bit_nr &&
+           bitmap->first_free < bit_nr + data->nr_bits)
+               bitmap->first_free = -1ULL;
+
+       set_bits = 0;
+       while (nr_bits) {
+               bitmap_handler(bitmap, bit_nr, bitmap_set_fn, data);
+               set_bits += data->set_bits;
+
+               if (data->set_bits != (BLOCKS_PER_UNIT - nr_bits))
+                       break;
+
+               nr_bits -= data->set_bits;
+               bit_nr += data->set_bits;
+
+               data->nr_bits = nr_bits;
+               data->fail_ok = 1;
+       }
+
+       data->set_bits = set_bits;
+}
+
+void bitmap_set(struct bitmap *bitmap, uint64_t bit_nr)
+{
+       struct bitmap_set_data data = { .nr_bits = 1, };
+
+       __bitmap_set(bitmap, bit_nr, &data);
+}
+
+unsigned int bitmap_set_nr(struct bitmap *bitmap, uint64_t bit_nr, unsigned int nr_bits)
+{
+       struct bitmap_set_data data = { .nr_bits = nr_bits, };
+
+       __bitmap_set(bitmap, bit_nr, &data);
+       return data.set_bits;
+}
+
+static int bitmap_isset_fn(struct bitmap_level *bl, unsigned long offset,
+                           unsigned int bit, void *unused)
+{
+       return (bl->map[offset] & (1UL << bit)) != 0;
+}
+
+int bitmap_isset(struct bitmap *bitmap, uint64_t bit_nr)
+{
+       return bitmap_handler_topdown(bitmap, bit_nr, bitmap_isset_fn, NULL);
+}
+
+static uint64_t bitmap_find_first_free(struct bitmap *bitmap, unsigned int level,
+                                      uint64_t index)
+{
+       unsigned long j;
+       int i;
+
+       /*
+        * Start at the bottom, then converge towards first free bit at the top
+        */
+       for (i = level; i >= 0; i--) {
+               struct bitmap_level *bl = &bitmap->levels[i];
+
+               if (index >= bl->map_size) {
+                       index = -1ULL;
+                       break;
+               }
+
+               for (j = index; j < bl->map_size; j++) {
+                       if (bl->map[j] == -1UL)
+                               continue;
+
+                       /*
+                        * First free bit here is our index into the first
+                        * free bit at the next higher level
+                        */
+                       index = (j << UNIT_SHIFT) + ffz(bl->map[j]);
+                       break;
+               }
+       }
+
+       return index;
+}
+
+uint64_t bitmap_first_free(struct bitmap *bitmap)
+{
+       if (firstfree_valid(bitmap))
+               return bitmap->first_free;
+
+       bitmap->first_free = bitmap_find_first_free(bitmap, bitmap->nr_levels - 1, 0);
+       return bitmap->first_free;
+}
+
+struct bitmap_next_free_data {
+       unsigned int level;
+       unsigned long offset;
+       uint64_t bit;
+};
+
+static int bitmap_next_free_fn(struct bitmap_level *bl, unsigned long offset,
+                              unsigned int bit, void *__data)
+{
+       struct bitmap_next_free_data *data = __data;
+       uint64_t mask = ~((1UL << ((data->bit & BLOCKS_PER_UNIT_MASK) + 1)) - 1);
+
+       if (!(mask & bl->map[offset]))
+               return 0;
+
+       if (bl->map[offset] != -1UL) {
+               data->level = bl->level;
+               data->offset = offset;
+               return 1;
+       }
+
+       data->bit = (data->bit + BLOCKS_PER_UNIT - 1) / BLOCKS_PER_UNIT;
+       return 0;
+}
+
+/*
+ * 'bit_nr' is already set. Find the next free bit after this one.
+ */
+uint64_t bitmap_next_free(struct bitmap *bitmap, uint64_t bit_nr)
+{
+       struct bitmap_next_free_data data = { .level = -1U, .bit = bit_nr, };
+
+       if (firstfree_valid(bitmap) && bit_nr < bitmap->first_free)
+               return bitmap->first_free;
+
+       if (!bitmap_handler(bitmap, bit_nr, bitmap_next_free_fn, &data))
+               return bitmap_first_free(bitmap);
+
+       assert(data.level != -1U);
+
+       return bitmap_find_first_free(bitmap, data.level, data.offset);
+}
diff --git a/lib/bitmap.h b/lib/bitmap.h
new file mode 100644 (file)
index 0000000..41c0fd8
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef FIO_BITMAP_H
+#define FIO_BITMAP_H
+
+#include <inttypes.h>
+
+struct bitmap;
+struct bitmap *bitmap_new(unsigned long nr_bits);
+void bitmap_free(struct bitmap *bm);
+
+void bitmap_clear(struct bitmap *bitmap, uint64_t bit_nr);
+void bitmap_set(struct bitmap *bitmap, uint64_t bit_nr);
+unsigned int bitmap_set_nr(struct bitmap *bitmap, uint64_t bit_nr, unsigned int nr_bits);
+int bitmap_isset(struct bitmap *bitmap, uint64_t bit_nr);
+uint64_t bitmap_first_free(struct bitmap *bitmap);
+uint64_t bitmap_next_free(struct bitmap *bitmap, uint64_t bit_nr);
+void bitmap_reset(struct bitmap *bitmap);
+
+#endif