dm-transaction-manager: use red-black trees instead of linear lists
authorMikulas Patocka <mpatocka@redhat.com>
Wed, 15 Jan 2025 17:33:30 +0000 (18:33 +0100)
committerMikulas Patocka <mpatocka@redhat.com>
Fri, 17 Jan 2025 21:05:40 +0000 (22:05 +0100)
There was reported performance degradation when the shadow map contained
too many entries [1]. The shadow map uses 256-bucket hash with linear
lists - when there are too many entries, it has quadratic complexity.

Meir Elisha proposed to add a module parameter that could configure the
size of the hash array - however, this is not ideal because users don't
know that they should increase the parameter when they get bad
performance.

This commit replaces the linear lists with rb-trees (so that there's a
hash of rb-trees), they have logarithmic complexity, so it solves the
performance degradation.

Link: https://patchwork.kernel.org/project/dm-devel/patch/20241014134944.1264991-1-meir.elisha@volumez.com/
Reported-by: Meir Elisha <meir.elisha@volumez.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
drivers/md/persistent-data/dm-transaction-manager.c

index c7ba4e6cbbc7e9ed4237ea0345a3ab0c0da6b2df..98c745d90f4873dd2e847e5111241ce59a91dce5 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/hash.h>
+#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/device-mapper.h>
 
@@ -77,7 +78,7 @@ static void prefetch_issue(struct prefetch_set *p, struct dm_block_manager *bm)
 /*----------------------------------------------------------------*/
 
 struct shadow_info {
-       struct hlist_node hlist;
+       struct rb_node node;
        dm_block_t where;
 };
 
@@ -95,7 +96,7 @@ struct dm_transaction_manager {
        struct dm_space_map *sm;
 
        spinlock_t lock;
-       struct hlist_head buckets[DM_HASH_SIZE];
+       struct rb_root buckets[DM_HASH_SIZE];
 
        struct prefetch_set prefetches;
 };
@@ -106,14 +107,22 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 {
        int r = 0;
        unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
-       struct shadow_info *si;
+       struct rb_node **node;
 
        spin_lock(&tm->lock);
-       hlist_for_each_entry(si, tm->buckets + bucket, hlist)
-               if (si->where == b) {
+       node = &tm->buckets[bucket].rb_node;
+       while (*node) {
+               struct shadow_info *si =
+                       rb_entry(*node, struct shadow_info, node);
+               if (b == si->where) {
                        r = 1;
                        break;
                }
+               if (b < si->where)
+                       node = &si->node.rb_left;
+               else
+                       node = &si->node.rb_right;
+       }
        spin_unlock(&tm->lock);
 
        return r;
@@ -130,30 +139,41 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 
        si = kmalloc(sizeof(*si), GFP_NOIO);
        if (si) {
+               struct rb_node **node, *parent;
                si->where = b;
                bucket = dm_hash_block(b, DM_HASH_MASK);
+
                spin_lock(&tm->lock);
-               hlist_add_head(&si->hlist, tm->buckets + bucket);
+               node = &tm->buckets[bucket].rb_node;
+               parent = NULL;
+               while (*node) {
+                       struct shadow_info *si =
+                               rb_entry(*node, struct shadow_info, node);
+                       parent = *node;
+                       if (b < si->where)
+                               node = &si->node.rb_left;
+                       else
+                               node = &si->node.rb_right;
+               }
+               rb_link_node(&si->node, parent, node);
+               rb_insert_color(&si->node, &tm->buckets[bucket]);
                spin_unlock(&tm->lock);
        }
 }
 
 static void wipe_shadow_table(struct dm_transaction_manager *tm)
 {
-       struct shadow_info *si;
-       struct hlist_node *tmp;
-       struct hlist_head *bucket;
-       int i;
+       unsigned int i;
 
        spin_lock(&tm->lock);
        for (i = 0; i < DM_HASH_SIZE; i++) {
-               bucket = tm->buckets + i;
-               hlist_for_each_entry_safe(si, tmp, bucket, hlist)
+               while (!RB_EMPTY_ROOT(&tm->buckets[i])) {
+                       struct shadow_info *si =
+                               rb_entry(tm->buckets[i].rb_node, struct shadow_info, node);
+                       rb_erase(&si->node, &tm->buckets[i]);
                        kfree(si);
-
-               INIT_HLIST_HEAD(bucket);
+               }
        }
-
        spin_unlock(&tm->lock);
 }
 
@@ -162,7 +182,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm)
 static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
                                                   struct dm_space_map *sm)
 {
-       int i;
+       unsigned int i;
        struct dm_transaction_manager *tm;
 
        tm = kmalloc(sizeof(*tm), GFP_KERNEL);
@@ -176,7 +196,7 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
 
        spin_lock_init(&tm->lock);
        for (i = 0; i < DM_HASH_SIZE; i++)
-               INIT_HLIST_HEAD(tm->buckets + i);
+               tm->buckets[i] = RB_ROOT;
 
        prefetch_init(&tm->prefetches);