dm space maps: improve performance with inc/dec on ranges of blocks
authorJoe Thornber <ejt@redhat.com>
Tue, 13 Apr 2021 10:03:45 +0000 (11:03 +0100)
committerMike Snitzer <snitzer@redhat.com>
Fri, 4 Jun 2021 16:07:22 +0000 (12:07 -0400)
When we break sharing on btree nodes we typically need to increment
the reference counts to every value held in the node.  This can
cause a lot of repeated calls to the space maps.  Fix this by changing
the interface to the space map inc/dec methods to take ranges of
adjacent blocks to be operated on.

For installations that are using a lot of snapshots this will reduce
cpu overhead of fundamental operations such as provisioning a new block,
or deleting a snapshot, by as much as 10 times.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
15 files changed:
drivers/md/dm-era-target.c
drivers/md/dm-thin-metadata.c
drivers/md/persistent-data/dm-array.c
drivers/md/persistent-data/dm-btree-internal.h
drivers/md/persistent-data/dm-btree-remove.c
drivers/md/persistent-data/dm-btree-spine.c
drivers/md/persistent-data/dm-btree.c
drivers/md/persistent-data/dm-btree.h
drivers/md/persistent-data/dm-space-map-common.c
drivers/md/persistent-data/dm-space-map-common.h
drivers/md/persistent-data/dm-space-map-disk.c
drivers/md/persistent-data/dm-space-map-metadata.c
drivers/md/persistent-data/dm-space-map.h
drivers/md/persistent-data/dm-transaction-manager.c
drivers/md/persistent-data/dm-transaction-manager.h

index d9ac7372108c98ea1719600aa219c41bab884838..3b748393fca5d421539dd3e71d62a6d9b45bbffb 100644 (file)
@@ -363,28 +363,32 @@ static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata
        core->root = le64_to_cpu(disk->root);
 }
 
-static void ws_inc(void *context, const void *value)
+static void ws_inc(void *context, const void *value, unsigned count)
 {
        struct era_metadata *md = context;
        struct writeset_disk ws_d;
        dm_block_t b;
+       unsigned i;
 
-       memcpy(&ws_d, value, sizeof(ws_d));
-       b = le64_to_cpu(ws_d.root);
-
-       dm_tm_inc(md->tm, b);
+       for (i = 0; i < count; i++) {
+               memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
+               b = le64_to_cpu(ws_d.root);
+               dm_tm_inc(md->tm, b);
+       }
 }
 
-static void ws_dec(void *context, const void *value)
+static void ws_dec(void *context, const void *value, unsigned count)
 {
        struct era_metadata *md = context;
        struct writeset_disk ws_d;
        dm_block_t b;
+       unsigned i;
 
-       memcpy(&ws_d, value, sizeof(ws_d));
-       b = le64_to_cpu(ws_d.root);
-
-       dm_bitset_del(&md->bitset_info, b);
+       for (i = 0; i < count; i++) {
+               memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
+               b = le64_to_cpu(ws_d.root);
+               dm_bitset_del(&md->bitset_info, b);
+       }
 }
 
 static int ws_eq(void *context, const void *value1, const void *value2)
index e75b20480e460eb8e26efe460537d5d652cf06bb..c88ed14d49e65219ab2037fcc1b79ad250957488 100644 (file)
@@ -311,28 +311,53 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
        *t = v & ((1 << 24) - 1);
 }
 
-static void data_block_inc(void *context, const void *value_le)
+/*
+ * It's more efficient to call dm_sm_{inc,dec}_blocks as few times as
+ * possible.  'with_runs' reads contiguous runs of blocks, and calls the
+ * given sm function.
+ */
+typedef int (*run_fn)(struct dm_space_map *, dm_block_t, dm_block_t);
+
+static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned count, run_fn fn)
 {
-       struct dm_space_map *sm = context;
-       __le64 v_le;
-       uint64_t b;
+       uint64_t b, begin, end;
        uint32_t t;
+       bool in_run = false;
+       unsigned i;
 
-       memcpy(&v_le, value_le, sizeof(v_le));
-       unpack_block_time(le64_to_cpu(v_le), &b, &t);
-       dm_sm_inc_block(sm, b);
+       for (i = 0; i < count; i++, value_le++) {
+               /* We know value_le is 8 byte aligned */
+               unpack_block_time(le64_to_cpu(*value_le), &b, &t);
+
+               if (in_run) {
+                       if (b == end) {
+                               end++;
+                       } else {
+                               fn(sm, begin, end);
+                               begin = b;
+                               end = b + 1;
+                       }
+               } else {
+                       in_run = true;
+                       begin = b;
+                       end = b + 1;
+               }
+       }
+
+       if (in_run)
+               fn(sm, begin, end);
 }
 
-static void data_block_dec(void *context, const void *value_le)
+static void data_block_inc(void *context, const void *value_le, unsigned count)
 {
-       struct dm_space_map *sm = context;
-       __le64 v_le;
-       uint64_t b;
-       uint32_t t;
+       with_runs((struct dm_space_map *) context,
+                 (const __le64 *) value_le, count, dm_sm_inc_blocks);
+}
 
-       memcpy(&v_le, value_le, sizeof(v_le));
-       unpack_block_time(le64_to_cpu(v_le), &b, &t);
-       dm_sm_dec_block(sm, b);
+static void data_block_dec(void *context, const void *value_le, unsigned count)
+{
+       with_runs((struct dm_space_map *) context,
+                 (const __le64 *) value_le, count, dm_sm_dec_blocks);
 }
 
 static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
@@ -349,27 +374,25 @@ static int data_block_equal(void *context, const void *value1_le, const void *va
        return b1 == b2;
 }
 
-static void subtree_inc(void *context, const void *value)
+static void subtree_inc(void *context, const void *value, unsigned count)
 {
        struct dm_btree_info *info = context;
-       __le64 root_le;
-       uint64_t root;
+       const __le64 *root_le = value;
+       unsigned i;
 
-       memcpy(&root_le, value, sizeof(root_le));
-       root = le64_to_cpu(root_le);
-       dm_tm_inc(info->tm, root);
+       for (i = 0; i < count; i++, root_le++)
+               dm_tm_inc(info->tm, le64_to_cpu(*root_le));
 }
 
-static void subtree_dec(void *context, const void *value)
+static void subtree_dec(void *context, const void *value, unsigned count)
 {
        struct dm_btree_info *info = context;
-       __le64 root_le;
-       uint64_t root;
+       const __le64 *root_le = value;
+       unsigned i;
 
-       memcpy(&root_le, value, sizeof(root_le));
-       root = le64_to_cpu(root_le);
-       if (dm_btree_del(info, root))
-               DMERR("btree delete failed");
+       for (i = 0; i < count; i++, root_le++)
+               if (dm_btree_del(info, le64_to_cpu(*root_le)))
+                       DMERR("btree delete failed");
 }
 
 static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
@@ -1761,11 +1784,7 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
        int r = 0;
 
        pmd_write_lock(pmd);
-       for (; b != e; b++) {
-               r = dm_sm_inc_block(pmd->data_sm, b);
-               if (r)
-                       break;
-       }
+       r = dm_sm_inc_blocks(pmd->data_sm, b, e);
        pmd_write_unlock(pmd);
 
        return r;
@@ -1776,11 +1795,7 @@ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
        int r = 0;
 
        pmd_write_lock(pmd);
-       for (; b != e; b++) {
-               r = dm_sm_dec_block(pmd->data_sm, b);
-               if (r)
-                       break;
-       }
+       r = dm_sm_dec_blocks(pmd->data_sm, b, e);
        pmd_write_unlock(pmd);
 
        return r;
index 185dc60360b55f8916f0a8f3cc93e0e44104a0fa..3a963d783a865531ba59b49cf1042d69c0b0b450 100644 (file)
@@ -108,12 +108,10 @@ static void *element_at(struct dm_array_info *info, struct array_block *ab,
  * in an array block.
  */
 static void on_entries(struct dm_array_info *info, struct array_block *ab,
-                      void (*fn)(void *, const void *))
+                      void (*fn)(void *, const void *, unsigned))
 {
-       unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
-
-       for (i = 0; i < nr_entries; i++)
-               fn(info->value_type.context, element_at(info, ab, i));
+       unsigned nr_entries = le32_to_cpu(ab->nr_entries);
+       fn(info->value_type.context, element_at(info, ab, 0), nr_entries);
 }
 
 /*
@@ -175,19 +173,18 @@ static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
 static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
                        const void *value, unsigned new_nr)
 {
-       unsigned i;
-       uint32_t nr_entries;
+       uint32_t nr_entries, delta, i;
        struct dm_btree_value_type *vt = &info->value_type;
 
        BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
        BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
 
        nr_entries = le32_to_cpu(ab->nr_entries);
-       for (i = nr_entries; i < new_nr; i++) {
-               if (vt->inc)
-                       vt->inc(vt->context, value);
+       delta = new_nr - nr_entries;
+       if (vt->inc)
+               vt->inc(vt->context, value, delta);
+       for (i = nr_entries; i < new_nr; i++)
                memcpy(element_at(info, ab, i), value, vt->size);
-       }
        ab->nr_entries = cpu_to_le32(new_nr);
 }
 
@@ -199,17 +196,16 @@ static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
 static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
                        unsigned new_nr)
 {
-       unsigned i;
-       uint32_t nr_entries;
+       uint32_t nr_entries, delta;
        struct dm_btree_value_type *vt = &info->value_type;
 
        BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
        BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
 
        nr_entries = le32_to_cpu(ab->nr_entries);
-       for (i = nr_entries; i > new_nr; i--)
-               if (vt->dec)
-                       vt->dec(vt->context, element_at(info, ab, i - 1));
+       delta = nr_entries - new_nr;
+       if (vt->dec)
+               vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta);
        ab->nr_entries = cpu_to_le32(new_nr);
 }
 
@@ -573,16 +569,17 @@ static int grow(struct resize *resize)
  * These are the value_type functions for the btree elements, which point
  * to array blocks.
  */
-static void block_inc(void *context, const void *value)
+static void block_inc(void *context, const void *value, unsigned count)
 {
-       __le64 block_le;
+       const __le64 *block_le = value;
        struct dm_array_info *info = context;
+       unsigned i;
 
-       memcpy(&block_le, value, sizeof(block_le));
-       dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+       for (i = 0; i < count; i++, block_le++)
+               dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le));
 }
 
-static void block_dec(void *context, const void *value)
+static void __block_dec(void *context, const void *value)
 {
        int r;
        uint64_t b;
@@ -621,6 +618,13 @@ static void block_dec(void *context, const void *value)
        dm_tm_dec(info->btree_info.tm, b);
 }
 
+static void block_dec(void *context, const void *value, unsigned count)
+{
+       unsigned i;
+       for (i = 0; i < count; i++, value += sizeof(__le64))
+               __block_dec(context, value);
+}
+
 static int block_equal(void *context, const void *value1, const void *value2)
 {
        return !memcmp(value1, value2, sizeof(__le64));
@@ -711,7 +715,7 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
                        return r;
 
                if (vt->inc)
-                       vt->inc(vt->context, element_at(info, ab, i));
+                       vt->inc(vt->context, element_at(info, ab, i), 1);
        }
 
        ab->nr_entries = cpu_to_le32(new_nr);
@@ -822,9 +826,9 @@ static int array_set_value(struct dm_array_info *info, dm_block_t root,
        old_value = element_at(info, ab, entry);
        if (vt->dec &&
            (!vt->equal || !vt->equal(vt->context, old_value, value))) {
-               vt->dec(vt->context, old_value);
+               vt->dec(vt->context, old_value, 1);
                if (vt->inc)
-                       vt->inc(vt->context, value);
+                       vt->inc(vt->context, value, 1);
        }
 
        memcpy(old_value, value, info->value_type.size);
index b1788853a355258738b4d69daa9e5732e34d29eb..893edb426dba1fddf8a6cdfa90e8115098b5640c 100644 (file)
@@ -144,4 +144,17 @@ extern struct dm_block_validator btree_node_validator;
 extern void init_le64_type(struct dm_transaction_manager *tm,
                           struct dm_btree_value_type *vt);
 
+/*
+ * This returns a shadowed btree leaf that you may modify.  In practise
+ * this means overwrites only, since an insert could cause a node to
+ * be split.  Useful if you need access to the old value to calculate the
+ * new one.
+ *
+ * This only works with single level btrees.  The given key must be present in
+ * the tree, otherwise -EINVAL will be returned.
+ */
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+                            uint64_t key, int *index,
+                            dm_block_t *new_root, struct dm_block **leaf);
+
 #endif /* DM_BTREE_INTERNAL_H */
index eff04fa23dfad46d7d43dee24cde0a1fd90f2f68..b34af195bf2a212a8cbbd3098fa8559c56d27be1 100644 (file)
@@ -544,7 +544,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 
                if (info->value_type.dec)
                        info->value_type.dec(info->value_type.context,
-                                            value_ptr(n, index));
+                                            value_ptr(n, index), 1);
 
                delete_at(n, index);
        }
@@ -653,7 +653,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root,
        if (k >= keys[last_level] && k < end_key) {
                if (info->value_type.dec)
                        info->value_type.dec(info->value_type.context,
-                                            value_ptr(n, index));
+                                            value_ptr(n, index), 1);
 
                delete_at(n, index);
                keys[last_level] = k + 1ull;
index 2061ab86556770a19b430a4b007f2d047f0e9b86..f5bd76ed8fe6dd228082d89356faec537dbb3996 100644 (file)
@@ -236,22 +236,14 @@ dm_block_t shadow_root(struct shadow_spine *s)
        return s->root;
 }
 
-static void le64_inc(void *context, const void *value_le)
+static void le64_inc(void *context, const void *value_le, unsigned count)
 {
-       struct dm_transaction_manager *tm = context;
-       __le64 v_le;
-
-       memcpy(&v_le, value_le, sizeof(v_le));
-       dm_tm_inc(tm, le64_to_cpu(v_le));
+       dm_tm_with_runs(context, value_le, count, dm_tm_inc_range);
 }
 
-static void le64_dec(void *context, const void *value_le)
+static void le64_dec(void *context, const void *value_le, unsigned count)
 {
-       struct dm_transaction_manager *tm = context;
-       __le64 v_le;
-
-       memcpy(&v_le, value_le, sizeof(v_le));
-       dm_tm_dec(tm, le64_to_cpu(v_le));
+       dm_tm_with_runs(context, value_le, count, dm_tm_dec_range);
 }
 
 static int le64_equal(void *context, const void *value1_le, const void *value2_le)
index 18282932bedce16dfa6fca0cb250ddd1ffcc2ff1..0703ca7a7d9a43b117691de7d2245dbb7f8e32ce 100644 (file)
@@ -71,15 +71,13 @@ static int upper_bound(struct btree_node *n, uint64_t key)
 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
                  struct dm_btree_value_type *vt)
 {
-       unsigned i;
        uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
 
        if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
-               for (i = 0; i < nr_entries; i++)
-                       dm_tm_inc(tm, value64(n, i));
+               dm_tm_with_runs(tm, value_ptr(n, 0), nr_entries, dm_tm_inc_range);
+
        else if (vt->inc)
-               for (i = 0; i < nr_entries; i++)
-                       vt->inc(vt->context, value_ptr(n, i));
+               vt->inc(vt->context, value_ptr(n, 0), nr_entries);
 }
 
 static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
@@ -318,13 +316,9 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
                                goto out;
 
                } else {
-                       if (info->value_type.dec) {
-                               unsigned i;
-
-                               for (i = 0; i < f->nr_children; i++)
-                                       info->value_type.dec(info->value_type.context,
-                                                            value_ptr(f->n, i));
-                       }
+                       if (info->value_type.dec)
+                               info->value_type.dec(info->value_type.context,
+                                                    value_ptr(f->n, 0), f->nr_children);
                        pop_frame(s);
                }
        }
@@ -1146,6 +1140,77 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
        return 0;
 }
 
+static int __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root,
+                                     uint64_t key, int *index)
+{
+       int r, i = -1;
+       struct btree_node *node;
+
+       *index = 0;
+       for (;;) {
+               r = shadow_step(s, root, &s->info->value_type);
+               if (r < 0)
+                       return r;
+
+               node = dm_block_data(shadow_current(s));
+
+               /*
+                * We have to patch up the parent node, ugly, but I don't
+                * see a way to do this automatically as part of the spine
+                * op.
+                */
+               if (shadow_has_parent(s) && i >= 0) {
+                       __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+
+                       __dm_bless_for_disk(&location);
+                       memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
+                                   &location, sizeof(__le64));
+               }
+
+               node = dm_block_data(shadow_current(s));
+               i = lower_bound(node, key);
+
+               BUG_ON(i < 0);
+               BUG_ON(i >= le32_to_cpu(node->header.nr_entries));
+
+               if (le32_to_cpu(node->header.flags) & LEAF_NODE) {
+                       if (key != le64_to_cpu(node->keys[i]))
+                               return -EINVAL;
+                       break;
+               }
+
+               root = value64(node, i);
+       }
+
+       *index = i;
+       return 0;
+}
+
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+                            uint64_t key, int *index,
+                            dm_block_t *new_root, struct dm_block **leaf)
+{
+       int r;
+       struct shadow_spine spine;
+
+       BUG_ON(info->levels > 1);
+       init_shadow_spine(&spine, info);
+       r = __btree_get_overwrite_leaf(&spine, root, key, index);
+       if (!r) {
+               *new_root = shadow_root(&spine);
+               *leaf = shadow_current(&spine);
+
+               /*
+                * Decrement the count so exit_shadow_spine() doesn't
+                * unlock the leaf.
+                */
+               spine.count--;
+       }
+       exit_shadow_spine(&spine);
+
+       return r;
+}
+
 static bool need_insert(struct btree_node *node, uint64_t *keys,
                        unsigned level, unsigned index)
 {
@@ -1222,7 +1287,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
                             value_ptr(n, index),
                             value))) {
                        info->value_type.dec(info->value_type.context,
-                                            value_ptr(n, index));
+                                            value_ptr(n, index), 1);
                }
                memcpy_disk(value_ptr(n, index),
                            value, info->value_type.size);
index 3dc5bb1a4748b51059e3fe728ea858f46db44c29..d2ae5aa4d00b6e9ee4cb3e546998d1037456383f 100644 (file)
@@ -51,21 +51,21 @@ struct dm_btree_value_type {
         */
 
        /*
-        * The btree is making a duplicate of the value, for instance
+        * The btree is making a duplicate of a run of values, for instance
         * because previously-shared btree nodes have now diverged.
         * @value argument is the new copy that the copy function may modify.
         * (Probably it just wants to increment a reference count
         * somewhere.) This method is _not_ called for insertion of a new
         * value: It is assumed the ref count is already 1.
         */
-       void (*inc)(void *context, const void *value);
+       void (*inc)(void *context, const void *value, unsigned count);
 
        /*
-        * This value is being deleted.  The btree takes care of freeing
+        * These values are being deleted.  The btree takes care of freeing
         * the memory pointed to by @value.  Often the del function just
-        * needs to decrement a reference count somewhere.
+        * needs to decrement a reference counts somewhere.
         */
-       void (*dec)(void *context, const void *value);
+       void (*dec)(void *context, const void *value, unsigned count);
 
        /*
         * A test for equality between two values.  When a value is
index a213bf11738fbbb10448d09abfb8d532679f011e..5552941912afd12036047e4f6339b97380945609 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "dm-space-map-common.h"
 #include "dm-transaction-manager.h"
+#include "dm-btree-internal.h"
 
 #include <linux/bitops.h>
 #include <linux/device-mapper.h>
@@ -409,12 +410,13 @@ int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
        return r;
 }
 
-static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
-                       int (*mutator)(void *context, uint32_t old, uint32_t *new),
-                       void *context, enum allocation_event *ev)
+/*----------------------------------------------------------------*/
+
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+                uint32_t ref_count, int32_t *nr_allocations)
 {
        int r;
-       uint32_t bit, old, ref_count;
+       uint32_t bit, old;
        struct dm_block *nb;
        dm_block_t index = b;
        struct disk_index_entry ie_disk;
@@ -433,10 +435,9 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
                return r;
        }
        ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
-
        bm_le = dm_bitmap_data(nb);
-       old = sm_lookup_bitmap(bm_le, bit);
 
+       old = sm_lookup_bitmap(bm_le, bit);
        if (old > 2) {
                r = sm_ll_lookup_big_ref_count(ll, b, &old);
                if (r < 0) {
@@ -445,7 +446,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
                }
        }
 
-       r = mutator(context, old, &ref_count);
        if (r) {
                dm_tm_unlock(ll->tm, nb);
                return r;
@@ -453,7 +453,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 
        if (ref_count <= 2) {
                sm_set_bitmap(bm_le, bit, ref_count);
-
                dm_tm_unlock(ll->tm, nb);
 
                if (old > 2) {
@@ -480,62 +479,459 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
        }
 
        if (ref_count && !old) {
-               *ev = SM_ALLOC;
+               *nr_allocations = 1;
                ll->nr_allocated++;
                le32_add_cpu(&ie_disk.nr_free, -1);
                if (le32_to_cpu(ie_disk.none_free_before) == bit)
                        ie_disk.none_free_before = cpu_to_le32(bit + 1);
 
        } else if (old && !ref_count) {
-               *ev = SM_FREE;
+               *nr_allocations = -1;
                ll->nr_allocated--;
                le32_add_cpu(&ie_disk.nr_free, 1);
                ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
        } else
-               *ev = SM_NONE;
+               *nr_allocations = 0;
 
        return ll->save_ie(ll, index, &ie_disk);
 }
 
-static int set_ref_count(void *context, uint32_t old, uint32_t *new)
+/*----------------------------------------------------------------*/
+
+/*
+ * Holds useful intermediate results for the range based inc and dec
+ * operations.
+ */
+struct inc_context {
+       struct disk_index_entry ie_disk;
+       struct dm_block *bitmap_block;
+       void *bitmap;
+
+       struct dm_block *overflow_leaf;
+};
+
+static inline void init_inc_context(struct inc_context *ic)
+{
+       ic->bitmap_block = NULL;
+       ic->bitmap = NULL;
+       ic->overflow_leaf = NULL;
+}
+
+static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
+{
+       if (ic->bitmap_block)
+               dm_tm_unlock(ll->tm, ic->bitmap_block);
+       if (ic->overflow_leaf)
+               dm_tm_unlock(ll->tm, ic->overflow_leaf);
+}
+
+static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
+{
+       exit_inc_context(ll, ic);
+       init_inc_context(ic);
+}
+
+/*
+ * Confirms a btree node contains a particular key at an index.
+ */
+static bool contains_key(struct btree_node *n, uint64_t key, int index)
+{
+       return index >= 0 &&
+               index < le32_to_cpu(n->header.nr_entries) &&
+               le64_to_cpu(n->keys[index]) == key;
+}
+
+static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 {
-       *new = *((uint32_t *) context);
+       int r;
+       int index;
+       struct btree_node *n;
+       __le32 *v_ptr;
+       uint32_t rc;
+
+       /*
+        * bitmap_block needs to be unlocked because getting the
+        * overflow_leaf may need to allocate, and thus use the space map.
+        */
+       reset_inc_context(ll, ic);
+
+       r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+                                    b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+       if (r < 0)
+               return r;
+
+       n = dm_block_data(ic->overflow_leaf);
+
+       if (!contains_key(n, b, index)) {
+               DMERR("overflow btree is missing an entry");
+               return -EINVAL;
+       }
+
+       v_ptr = value_ptr(n, index);
+       rc = le32_to_cpu(*v_ptr) + 1;
+       *v_ptr = cpu_to_le32(rc);
+
        return 0;
 }
 
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
-                uint32_t ref_count, enum allocation_event *ev)
+static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
+{
+       int index;
+       struct btree_node *n;
+       __le32 *v_ptr;
+       uint32_t rc;
+
+       /*
+        * Do we already have the correct overflow leaf?
+        */
+       if (ic->overflow_leaf) {
+               n = dm_block_data(ic->overflow_leaf);
+               index = lower_bound(n, b);
+               if (contains_key(n, b, index)) {
+                       v_ptr = value_ptr(n, index);
+                       rc = le32_to_cpu(*v_ptr) + 1;
+                       *v_ptr = cpu_to_le32(rc);
+
+                       return 0;
+               }
+       }
+
+       return __sm_ll_inc_overflow(ll, b, ic);
+}
+
+static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
+{
+       int r, inc;
+       r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
+                              &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
+       if (r < 0) {
+               DMERR("dm_tm_shadow_block() failed");
+               return r;
+       }
+       ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
+       ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+       return 0;
+}
+
+/*
+ * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
+ * we can reopen the bitmap with a simple write lock, rather than re calling
+ * dm_tm_shadow_block().
+ */
+static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
+{
+       if (!ic->bitmap_block) {
+               int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
+                                        &dm_sm_bitmap_validator, &ic->bitmap_block);
+               if (r) {
+                       DMERR("unable to re-get write lock for bitmap");
+                       return r;
+               }
+               ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+       }
+
+       return 0;
+}
+
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
+                                  uint32_t bit, uint32_t bit_end,
+                                  int32_t *nr_allocations, dm_block_t *new_b,
+                                  struct inc_context *ic)
+{
+       int r;
+       __le32 le_rc;
+       uint32_t old;
+
+       for (; bit != bit_end; bit++, b++) {
+               /*
+                * We only need to drop the bitmap if we need to find a new btree
+                * leaf for the overflow.  So if it was dropped last iteration,
+                * we now re-get it.
+                */
+               r = ensure_bitmap(ll, ic);
+               if (r)
+                       return r;
+
+               old = sm_lookup_bitmap(ic->bitmap, bit);
+               switch (old) {
+               case 0:
+                       /* inc bitmap, adjust nr_allocated */
+                       sm_set_bitmap(ic->bitmap, bit, 1);
+                       (*nr_allocations)++;
+                       ll->nr_allocated++;
+                       le32_add_cpu(&ic->ie_disk.nr_free, -1);
+                       if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
+                               ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
+                       break;
+
+               case 1:
+                       /* inc bitmap */
+                       sm_set_bitmap(ic->bitmap, bit, 2);
+                       break;
+
+               case 2:
+                       /* inc bitmap and insert into overflow */
+                       sm_set_bitmap(ic->bitmap, bit, 3);
+                       reset_inc_context(ll, ic);
+
+                       le_rc = cpu_to_le32(3);
+                       __dm_bless_for_disk(&le_rc);
+                       r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
+                                           &b, &le_rc, &ll->ref_count_root);
+                       if (r < 0) {
+                               DMERR("ref count insert failed");
+                               return r;
+                       }
+                       break;
+
+               default:
+                       /*
+                        * inc within the overflow tree only.
+                        */
+                       r = sm_ll_inc_overflow(ll, b, ic);
+                       if (r < 0)
+                               return r;
+               }
+       }
+
+       *new_b = b;
+       return 0;
+}
+
+/*
+ * Finds a bitmap that contains entries in the block range, and increments
+ * them.
+ */
+static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+                      int32_t *nr_allocations, dm_block_t *new_b)
 {
-       return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
+       int r;
+       struct inc_context ic;
+       uint32_t bit, bit_end;
+       dm_block_t index = b;
+
+       init_inc_context(&ic);
+
+       bit = do_div(index, ll->entries_per_block);
+       r = ll->load_ie(ll, index, &ic.ie_disk);
+       if (r < 0)
+               return r;
+
+       r = shadow_bitmap(ll, &ic);
+       if (r)
+               return r;
+
+       bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+       r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
+
+       exit_inc_context(ll, &ic);
+
+       if (r)
+               return r;
+
+       return ll->save_ie(ll, index, &ic.ie_disk);
 }
 
-static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+             int32_t *nr_allocations)
 {
-       *new = old + 1;
+       *nr_allocations = 0;
+       while (b != e) {
+               int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
+               if (r)
+                       return r;
+       }
+
        return 0;
 }
 
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+/*----------------------------------------------------------------*/
+
+static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
+                               struct inc_context *ic)
 {
-       return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
+       reset_inc_context(ll, ic);
+       return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
+                              &b, &ll->ref_count_root);
 }
 
-static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
+static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+                               struct inc_context *ic, uint32_t *old_rc)
 {
-       if (!old) {
-               DMERR_LIMIT("unable to decrement a reference count below 0");
+       int r;
+       int index = -1;
+       struct btree_node *n;
+       __le32 *v_ptr;
+       uint32_t rc;
+
+       reset_inc_context(ll, ic);
+       r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+                                    b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+       if (r < 0)
+               return r;
+
+       n = dm_block_data(ic->overflow_leaf);
+
+       if (!contains_key(n, b, index)) {
+               DMERR("overflow btree is missing an entry");
                return -EINVAL;
        }
 
-       *new = old - 1;
+       v_ptr = value_ptr(n, index);
+       rc = le32_to_cpu(*v_ptr);
+       *old_rc = rc;
+
+       if (rc == 3) {
+               return __sm_ll_del_overflow(ll, b, ic);
+       } else {
+               rc--;
+               *v_ptr = cpu_to_le32(rc);
+               return 0;
+       }
+}
+
+static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+                             struct inc_context *ic, uint32_t *old_rc)
+{
+       /*
+        * Do we already have the correct overflow leaf?
+        */
+       if (ic->overflow_leaf) {
+               int index;
+               struct btree_node *n;
+               __le32 *v_ptr;
+               uint32_t rc;
+
+               n = dm_block_data(ic->overflow_leaf);
+               index = lower_bound(n, b);
+               if (contains_key(n, b, index)) {
+                       v_ptr = value_ptr(n, index);
+                       rc = le32_to_cpu(*v_ptr);
+                       *old_rc = rc;
+
+                       if (rc > 3) {
+                               rc--;
+                               *v_ptr = cpu_to_le32(rc);
+                               return 0;
+                       } else {
+                               return __sm_ll_del_overflow(ll, b, ic);
+                       }
+
+               }
+       }
+
+       return __sm_ll_dec_overflow(ll, b, ic, old_rc);
+}
+
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
+                                  uint32_t bit, uint32_t bit_end,
+                                  struct inc_context *ic,
+                                  int32_t *nr_allocations, dm_block_t *new_b)
+{
+       int r;
+       uint32_t old;
+
+       for (; bit != bit_end; bit++, b++) {
+               /*
+                * We only need to drop the bitmap if we need to find a new btree
+                * leaf for the overflow.  So if it was dropped last iteration,
+                * we now re-get it.
+                */
+               r = ensure_bitmap(ll, ic);
+               if (r)
+                       return r;
+
+               old = sm_lookup_bitmap(ic->bitmap, bit);
+               switch (old) {
+               case 0:
+                       DMERR("unable to decrement block");
+                       return -EINVAL;
+
+               case 1:
+                       /* dec bitmap */
+                       sm_set_bitmap(ic->bitmap, bit, 0);
+                       (*nr_allocations)--;
+                       ll->nr_allocated--;
+                       le32_add_cpu(&ic->ie_disk.nr_free, 1);
+                       ic->ie_disk.none_free_before =
+                               cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
+                       break;
+
+               case 2:
+                       /* dec bitmap and insert into overflow */
+                       sm_set_bitmap(ic->bitmap, bit, 1);
+                       break;
+
+               case 3:
+                       r = sm_ll_dec_overflow(ll, b, ic, &old);
+                       if (r < 0)
+                               return r;
+
+                       if (old == 3) {
+                               r = ensure_bitmap(ll, ic);
+                               if (r)
+                                       return r;
+
+                               sm_set_bitmap(ic->bitmap, bit, 2);
+                       }
+                       break;
+               }
+       }
+
+       *new_b = b;
        return 0;
 }
 
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+                      int32_t *nr_allocations, dm_block_t *new_b)
+{
+       int r;
+       uint32_t bit, bit_end;
+       struct inc_context ic;
+       dm_block_t index = b;
+
+       init_inc_context(&ic);
+
+       bit = do_div(index, ll->entries_per_block);
+       r = ll->load_ie(ll, index, &ic.ie_disk);
+       if (r < 0)
+               return r;
+
+       r = shadow_bitmap(ll, &ic);
+       if (r)
+               return r;
+
+       bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+       r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
+       exit_inc_context(ll, &ic);
+
+       if (r)
+               return r;
+
+       return ll->save_ie(ll, index, &ic.ie_disk);
+}
+
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+             int32_t *nr_allocations)
 {
-       return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev);
+       *nr_allocations = 0;
+       while (b != e) {
+               int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
+               if (r)
+                       return r;
+       }
+
+       return 0;
 }
 
+/*----------------------------------------------------------------*/
+
 int sm_ll_commit(struct ll_disk *ll)
 {
        int r = 0;
index 87e17909ef5210729ff13b5594d0be82137deb36..4a22183e78b73b0826fb5df9c0ba2e2d77acf1ac 100644 (file)
@@ -96,12 +96,6 @@ struct disk_bitmap_header {
        __le64 blocknr;
 } __attribute__ ((packed, aligned(8)));
 
-enum allocation_event {
-       SM_NONE,
-       SM_ALLOC,
-       SM_FREE,
-};
-
 /*----------------------------------------------------------------*/
 
 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks);
@@ -111,9 +105,15 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
                          dm_block_t end, dm_block_t *result);
 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
                                 dm_block_t begin, dm_block_t end, dm_block_t *result);
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev);
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
+
+/*
+ * The next three functions return (via nr_allocations) the net number of
+ * allocations that were made.  This number may be negative if there were
+ * more frees than allocs.
+ */
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, int32_t *nr_allocations);
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
 int sm_ll_commit(struct ll_disk *ll);
 
 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm);
index 4f8069bb04816af475651c35bc470505dad3b3ff..d0a8d5e73c28064d787bb2b0d05305ec9dbaedf4 100644 (file)
@@ -87,76 +87,39 @@ static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
                             uint32_t count)
 {
        int r;
-       uint32_t old_count;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-       r = sm_ll_insert(&smd->ll, b, count, &ev);
+       r = sm_ll_insert(&smd->ll, b, count, &nr_allocations);
        if (!r) {
-               switch (ev) {
-               case SM_NONE:
-                       break;
-
-               case SM_ALLOC:
-                       /*
-                        * This _must_ be free in the prior transaction
-                        * otherwise we've lost atomicity.
-                        */
-                       smd->nr_allocated_this_transaction++;
-                       break;
-
-               case SM_FREE:
-                       /*
-                        * It's only free if it's also free in the last
-                        * transaction.
-                        */
-                       r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-                       if (r)
-                               return r;
-
-                       if (!old_count)
-                               smd->nr_allocated_this_transaction--;
-                       break;
-               }
+               smd->nr_allocated_this_transaction += nr_allocations;
        }
 
        return r;
 }
 
-static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
        int r;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-       r = sm_ll_inc(&smd->ll, b, &ev);
-       if (!r && (ev == SM_ALLOC))
-               /*
-                * This _must_ be free in the prior transaction
-                * otherwise we've lost atomicity.
-                */
-               smd->nr_allocated_this_transaction++;
+       r = sm_ll_inc(&smd->ll, b, e, &nr_allocations);
+       if (!r)
+               smd->nr_allocated_this_transaction += nr_allocations;
 
        return r;
 }
 
-static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
        int r;
-       uint32_t old_count;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-       r = sm_ll_dec(&smd->ll, b, &ev);
-       if (!r && (ev == SM_FREE)) {
-               /*
-                * It's only free if it's also free in the last
-                * transaction.
-                */
-               r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-               if (!r && !old_count)
-                       smd->nr_allocated_this_transaction--;
-       }
+       r = sm_ll_dec(&smd->ll, b, e, &nr_allocations);
+       if (!r)
+               smd->nr_allocated_this_transaction += nr_allocations;
 
        return r;
 }
@@ -164,7 +127,7 @@ static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
 static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 {
        int r;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
        /*
@@ -183,10 +146,9 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
                return r;
 
        smd->begin = *b + 1;
-       r = sm_ll_inc(&smd->ll, *b, &ev);
+       r = sm_ll_inc(&smd->ll, *b, *b + 1, &nr_allocations);
        if (!r) {
-               BUG_ON(ev != SM_ALLOC);
-               smd->nr_allocated_this_transaction++;
+               smd->nr_allocated_this_transaction += nr_allocations;
        }
 
        return r;
@@ -242,8 +204,8 @@ static struct dm_space_map ops = {
        .get_count = sm_disk_get_count,
        .count_is_more_than_one = sm_disk_count_is_more_than_one,
        .set_count = sm_disk_set_count,
-       .inc_block = sm_disk_inc_block,
-       .dec_block = sm_disk_dec_block,
+       .inc_blocks = sm_disk_inc_blocks,
+       .dec_blocks = sm_disk_dec_blocks,
        .new_block = sm_disk_new_block,
        .commit = sm_disk_commit,
        .root_size = sm_disk_root_size,
index da439ac85796374e1c4582c02b4a884df38564c4..392ae26134a4e6fd4569def99e4020cbaa1d384c 100644 (file)
@@ -89,7 +89,8 @@ enum block_op_type {
 
 struct block_op {
        enum block_op_type type;
-       dm_block_t block;
+       dm_block_t b;
+       dm_block_t e;
 };
 
 struct bop_ring_buffer {
@@ -116,7 +117,7 @@ static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
 }
 
 static int brb_push(struct bop_ring_buffer *brb,
-                   enum block_op_type type, dm_block_t b)
+                   enum block_op_type type, dm_block_t b, dm_block_t e)
 {
        struct block_op *bop;
        unsigned next = brb_next(brb, brb->end);
@@ -130,7 +131,8 @@ static int brb_push(struct bop_ring_buffer *brb,
 
        bop = brb->bops + brb->end;
        bop->type = type;
-       bop->block = b;
+       bop->b = b;
+       bop->e = e;
 
        brb->end = next;
 
@@ -145,9 +147,7 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
                return -ENODATA;
 
        bop = brb->bops + brb->begin;
-       result->type = bop->type;
-       result->block = bop->block;
-
+       memcpy(result, bop, sizeof(*result));
        return 0;
 }
 
@@ -178,10 +178,9 @@ struct sm_metadata {
        struct threshold threshold;
 };
 
-static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
+static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
 {
-       int r = brb_push(&smm->uncommitted, type, b);
-
+       int r = brb_push(&smm->uncommitted, type, b, e);
        if (r) {
                DMERR("too many recursive allocations");
                return -ENOMEM;
@@ -193,15 +192,15 @@ static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t
 static int commit_bop(struct sm_metadata *smm, struct block_op *op)
 {
        int r = 0;
-       enum allocation_event ev;
+       int32_t nr_allocations;
 
        switch (op->type) {
        case BOP_INC:
-               r = sm_ll_inc(&smm->ll, op->block, &ev);
+               r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations);
                break;
 
        case BOP_DEC:
-               r = sm_ll_dec(&smm->ll, op->block, &ev);
+               r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations);
                break;
        }
 
@@ -314,7 +313,7 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
             i = brb_next(&smm->uncommitted, i)) {
                struct block_op *op = smm->uncommitted.bops + i;
 
-               if (op->block != b)
+               if (b < op->b || b >= op->e)
                        continue;
 
                switch (op->type) {
@@ -355,7 +354,7 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
 
                struct block_op *op = smm->uncommitted.bops + i;
 
-               if (op->block != b)
+               if (b < op->b || b >= op->e)
                        continue;
 
                switch (op->type) {
@@ -393,7 +392,7 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
                                 uint32_t count)
 {
        int r, r2;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
        if (smm->recursion_count) {
@@ -402,40 +401,42 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
        }
 
        in(smm);
-       r = sm_ll_insert(&smm->ll, b, count, &ev);
+       r = sm_ll_insert(&smm->ll, b, count, &nr_allocations);
        r2 = out(smm);
 
        return combine_errors(r, r2);
 }
 
-static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
        int r, r2 = 0;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-       if (recursing(smm))
-               r = add_bop(smm, BOP_INC, b);
-       else {
+       if (recursing(smm)) {
+               r = add_bop(smm, BOP_INC, b, e);
+               if (r)
+                       return r;
+       } else {
                in(smm);
-               r = sm_ll_inc(&smm->ll, b, &ev);
+               r = sm_ll_inc(&smm->ll, b, e, &nr_allocations);
                r2 = out(smm);
        }
 
        return combine_errors(r, r2);
 }
 
-static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
        int r, r2 = 0;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
        if (recursing(smm))
-               r = add_bop(smm, BOP_DEC, b);
+               r = add_bop(smm, BOP_DEC, b, e);
        else {
                in(smm);
-               r = sm_ll_dec(&smm->ll, b, &ev);
+               r = sm_ll_dec(&smm->ll, b, e, &nr_allocations);
                r2 = out(smm);
        }
 
@@ -445,7 +446,7 @@ static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
 {
        int r, r2 = 0;
-       enum allocation_event ev;
+       int32_t nr_allocations;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
        /*
@@ -466,10 +467,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
        smm->begin = *b + 1;
 
        if (recursing(smm))
-               r = add_bop(smm, BOP_INC, *b);
+               r = add_bop(smm, BOP_INC, *b, *b + 1);
        else {
                in(smm);
-               r = sm_ll_inc(&smm->ll, *b, &ev);
+               r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations);
                r2 = out(smm);
        }
 
@@ -563,8 +564,8 @@ static const struct dm_space_map ops = {
        .get_count = sm_metadata_get_count,
        .count_is_more_than_one = sm_metadata_count_is_more_than_one,
        .set_count = sm_metadata_set_count,
-       .inc_block = sm_metadata_inc_block,
-       .dec_block = sm_metadata_dec_block,
+       .inc_blocks = sm_metadata_inc_blocks,
+       .dec_blocks = sm_metadata_dec_blocks,
        .new_block = sm_metadata_new_block,
        .commit = sm_metadata_commit,
        .root_size = sm_metadata_root_size,
@@ -648,18 +649,28 @@ static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
        return 0;
 }
 
-static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
+       int r;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-       return add_bop(smm, BOP_INC, b);
+       r = add_bop(smm, BOP_INC, b, e);
+       if (r)
+               return r;
+
+       return 0;
 }
 
-static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
+       int r;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-       return add_bop(smm, BOP_DEC, b);
+       r = add_bop(smm, BOP_DEC, b, e);
+       if (r)
+               return r;
+
+       return 0;
 }
 
 static int sm_bootstrap_commit(struct dm_space_map *sm)
@@ -690,8 +701,8 @@ static const struct dm_space_map bootstrap_ops = {
        .get_count = sm_bootstrap_get_count,
        .count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
        .set_count = sm_bootstrap_set_count,
-       .inc_block = sm_bootstrap_inc_block,
-       .dec_block = sm_bootstrap_dec_block,
+       .inc_blocks = sm_bootstrap_inc_blocks,
+       .dec_blocks = sm_bootstrap_dec_blocks,
        .new_block = sm_bootstrap_new_block,
        .commit = sm_bootstrap_commit,
        .root_size = sm_bootstrap_root_size,
@@ -703,7 +714,7 @@ static const struct dm_space_map bootstrap_ops = {
 
 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 {
-       int r, i;
+       int r;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
        dm_block_t old_len = smm->ll.nr_blocks;
 
@@ -725,9 +736,7 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
         * allocate any new blocks.
         */
        do {
-               for (i = old_len; !r && i < smm->begin; i++)
-                       r = add_bop(smm, BOP_INC, i);
-
+               r = add_bop(smm, BOP_INC, old_len, smm->begin);
                if (r)
                        goto out;
 
@@ -774,7 +783,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
                          dm_block_t superblock)
 {
        int r;
-       dm_block_t i;
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
        smm->begin = superblock + 1;
@@ -799,9 +807,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
         * Now we need to update the newly created data structures with the
         * allocated blocks that they were built from.
         */
-       for (i = superblock; !r && i < smm->begin; i++)
-               r = add_bop(smm, BOP_INC, i);
-
+       r = add_bop(smm, BOP_INC, superblock, smm->begin);
        if (r)
                return r;
 
index 3e6d1153b7c4b898b5a1355ad8bcc43cac2de4f4..a015cd11f6e97e1925d9b5cea1f276a69ecf07ce 100644 (file)
@@ -46,8 +46,8 @@ struct dm_space_map {
 
        int (*commit)(struct dm_space_map *sm);
 
-       int (*inc_block)(struct dm_space_map *sm, dm_block_t b);
-       int (*dec_block)(struct dm_space_map *sm, dm_block_t b);
+       int (*inc_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
+       int (*dec_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
 
        /*
         * new_block will increment the returned block.
@@ -117,14 +117,24 @@ static inline int dm_sm_commit(struct dm_space_map *sm)
        return sm->commit(sm);
 }
 
+static inline int dm_sm_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+       return sm->inc_blocks(sm, b, e);
+}
+
 static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b)
 {
-       return sm->inc_block(sm, b);
+       return dm_sm_inc_blocks(sm, b, b + 1);
+}
+
+static inline int dm_sm_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+       return sm->dec_blocks(sm, b, e);
 }
 
 static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b)
 {
-       return sm->dec_block(sm, b);
+       return dm_sm_dec_blocks(sm, b, b + 1);
 }
 
 static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b)
index 4353e1146d738866b5071f84e7ff3b7d2f82458c..16643fc974e8495167574717e1113e3881d81795 100644 (file)
@@ -359,6 +359,17 @@ void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
 }
 EXPORT_SYMBOL_GPL(dm_tm_inc);
 
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+       /*
+        * The non-blocking clone doesn't support this.
+        */
+       BUG_ON(tm->is_clone);
+
+       dm_sm_inc_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_inc_range);
+
 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
 {
        /*
@@ -370,6 +381,47 @@ void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
 }
 EXPORT_SYMBOL_GPL(dm_tm_dec);
 
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+       /*
+        * The non-blocking clone doesn't support this.
+        */
+       BUG_ON(tm->is_clone);
+
+       dm_sm_dec_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_dec_range);
+
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+                    const __le64 *value_le, unsigned count, dm_tm_run_fn fn)
+{
+       uint64_t b, begin, end;
+       bool in_run = false;
+       unsigned i;
+
+       for (i = 0; i < count; i++, value_le++) {
+               b = le64_to_cpu(*value_le);
+
+               if (in_run) {
+                       if (b == end)
+                               end++;
+                       else {
+                               fn(tm, begin, end);
+                               begin = b;
+                               end = b + 1;
+                       }
+               } else {
+                       in_run = true;
+                       begin = b;
+                       end = b + 1;
+               }
+       }
+
+       if (in_run)
+               fn(tm, begin, end);
+}
+EXPORT_SYMBOL_GPL(dm_tm_with_runs);
+
 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
              uint32_t *result)
 {
index 3d75cc59bbb8292c9b98165f7eaff707d8d99b20..906c02ed0365b2e9c35de3dbe34ee1019b3a86f0 100644 (file)
@@ -100,8 +100,18 @@ void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
  * Functions for altering the reference count of a block directly.
  */
 void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b);
-
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b);
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
+
+/*
+ * Builds up runs of adjacent blocks, and then calls the given fn
+ * (typically dm_tm_inc/dec).  Very useful when you have to perform
+ * the same tm operation on all values in a btree leaf.
+ */
+typedef void (*dm_tm_run_fn)(struct dm_transaction_manager *, dm_block_t, dm_block_t);
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+                    const __le64 *value_le, unsigned count, dm_tm_run_fn fn);
 
 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result);