bcachefs: Improved nocow locking
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 15 Dec 2022 01:52:11 +0000 (20:52 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:52 +0000 (17:09 -0400)
This improves the nocow lock table so that hash table entries have
multiple locks, and locks specify which bucket they're for - i.e. we can
now resolve hash collisions.

This is important because the allocator has to skip buckets that are
locked in the nocow lock table, and previously hash collisions would
cause it to spuriously skip unlocked buckets.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.h
fs/bcachefs/alloc_foreground.c
fs/bcachefs/bcachefs.h
fs/bcachefs/data_update.c
fs/bcachefs/io.c
fs/bcachefs/nocow_locking.c
fs/bcachefs/nocow_locking.h
fs/bcachefs/nocow_locking_types.h [new file with mode: 0644]
fs/bcachefs/super.c
fs/bcachefs/sysfs.c

index d4957b4557bfb14b1265a1f1849aa4e0825ca754..a0c3c47b49b5970a26ededf4269ecba87e30ddea 100644 (file)
@@ -23,6 +23,16 @@ static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
                pos.offset < ca->mi.nbuckets;
 }
 
+static inline u64 bucket_to_u64(struct bpos bucket)
+{
+       return (bucket.inode << 48) | bucket.offset;
+}
+
+static inline struct bpos u64_to_bucket(u64 bucket)
+{
+       return POS(bucket >> 48, bucket & ~(~0ULL << 48));
+}
+
 static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
 {
        return a.gen - a.oldest_gen;
index f78eaa52c11f8a973276c160d324d0779c310803..9e1c236d57b80e5becd681029b8b702703640a15 100644 (file)
@@ -28,6 +28,7 @@
 #include "io.h"
 #include "journal.h"
 #include "movinggc.h"
+#include "nocow_locking.h"
 #include "trace.h"
 
 #include <linux/math64.h>
index acd4adaf475accf823fceaf2b0bf91dad1ebca8e..6089d9ed6c27445400e1607fe64417dda4d6a941 100644 (file)
 #include "bcachefs_format.h"
 #include "errcode.h"
 #include "fifo.h"
-#include "nocow_locking.h"
+#include "nocow_locking_types.h"
 #include "opts.h"
 #include "util.h"
 
index 190ad03910af1fa6c1a0bf7de144984e22573fbd..eb248968de481f8d9f10c6f6dd46d58804c85c24 100644 (file)
@@ -11,6 +11,7 @@
 #include "io.h"
 #include "keylist.h"
 #include "move.h"
+#include "nocow_locking.h"
 #include "subvolume.h"
 #include "trace.h"
 
index d511bd6649534bf88b9ebdf0860c53111971c08c..fe0c4b58e525f17c7daf41814c763f1b4f086449 100644 (file)
@@ -27,6 +27,7 @@
 #include "journal.h"
 #include "keylist.h"
 #include "move.h"
+#include "nocow_locking.h"
 #include "rebalance.h"
 #include "subvolume.h"
 #include "super.h"
@@ -1469,7 +1470,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
        struct {
                struct bpos     b;
                unsigned        gen;
-               two_state_lock_t *l;
+               struct nocow_lock_bucket *l;
        } buckets[BCH_REPLICAS_MAX];
        unsigned nr_buckets = 0;
        u32 snapshot;
@@ -1516,7 +1517,8 @@ retry:
                        buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
                        buckets[nr_buckets].gen = ptr->gen;
                        buckets[nr_buckets].l =
-                               bucket_nocow_lock(&c->nocow_locks, buckets[nr_buckets].b);
+                               bucket_nocow_lock(&c->nocow_locks,
+                                                 bucket_to_u64(buckets[nr_buckets].b));
 
                        prefetch(buckets[nr_buckets].l);
                        nr_buckets++;
@@ -1538,11 +1540,12 @@ retry:
 
                for (i = 0; i < nr_buckets; i++) {
                        struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
-                       two_state_lock_t *l = buckets[i].l;
+                       struct nocow_lock_bucket *l = buckets[i].l;
                        bool stale;
 
-                       if (!bch2_two_state_trylock(l, BUCKET_NOCOW_LOCK_UPDATE))
-                               __bch2_bucket_nocow_lock(&c->nocow_locks, l, BUCKET_NOCOW_LOCK_UPDATE);
+                       __bch2_bucket_nocow_lock(&c->nocow_locks, l,
+                                                bucket_to_u64(buckets[i].b),
+                                                BUCKET_NOCOW_LOCK_UPDATE);
 
                        rcu_read_lock();
                        stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
@@ -2984,11 +2987,6 @@ void bch2_fs_io_exit(struct bch_fs *c)
 
 int bch2_fs_io_init(struct bch_fs *c)
 {
-       unsigned i;
-
-       for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
-               two_state_lock_init(&c->nocow_locks.l[i]);
-
        if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
                        BIOSET_NEED_BVECS) ||
            bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
index b325fb105322a8393587813d9852bcbd652ebb4f..53e5bc9fd5854545518b24b7a8b813ef47f6b4f5 100644 (file)
 #include "nocow_locking.h"
 #include "util.h"
 
+#include <linux/closure.h>
+
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket)
+{
+       u64 dev_bucket = bucket_to_u64(bucket);
+       struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (l->b[i] == dev_bucket && atomic_read(&l->l[i]))
+                       return true;
+       return false;
+}
+
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags)
+{
+       u64 dev_bucket = bucket_to_u64(bucket);
+       struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+       int lock_val = flags ? 1 : -1;
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (l->b[i] == dev_bucket) {
+                       if (!atomic_sub_return(lock_val, &l->l[i]))
+                               closure_wake_up(&l->wait);
+                       return;
+               }
+
+       BUG();
+}
+
+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
+                                u64 dev_bucket, int flags)
+{
+       int v, lock_val = flags ? 1 : -1;
+       unsigned i;
+
+       spin_lock(&l->lock);
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (l->b[i] == dev_bucket)
+                       goto got_entry;
+
+       for (i = 0; i < ARRAY_SIZE(l->b); i++)
+               if (!atomic_read(&l->l[i])) {
+                       l->b[i] = dev_bucket;
+                       goto take_lock;
+               }
+fail:
+       spin_unlock(&l->lock);
+       return false;
+got_entry:
+       v = atomic_read(&l->l[i]);
+       if (lock_val > 0 ? v < 0 : v > 0)
+               goto fail;
+take_lock:
+       atomic_add(lock_val, &l->l[i]);
+       spin_unlock(&l->lock);
+       return true;
+}
+
 void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
-                             two_state_lock_t *l, int flags)
+                             struct nocow_lock_bucket *l,
+                             u64 dev_bucket, int flags)
+{
+       if (!__bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
+               struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
+               u64 start_time = local_clock();
+
+               __closure_wait_event(&l->wait, __bch2_bucket_nocow_trylock(l, dev_bucket, flags));
+               bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
+       }
+}
+
+void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t)
 {
-       struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
-       u64 start_time = local_clock();
+       unsigned i, nr_zero = 0;
+       struct nocow_lock_bucket *l;
+
+       for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) {
+               unsigned v = 0;
+
+               for (i = 0; i < ARRAY_SIZE(l->l); i++)
+                       v |= atomic_read(&l->l[i]);
+
+               if (!v) {
+                       nr_zero++;
+                       continue;
+               }
+
+               if (nr_zero)
+                       prt_printf(out, "(%u empty entries)\n", nr_zero);
+               nr_zero = 0;
+
+               for (i = 0; i < ARRAY_SIZE(l->l); i++)
+                       if (atomic_read(&l->l[i]))
+                               prt_printf(out, "%llu: %i ", l->b[i], atomic_read(&l->l[i]));
+               prt_newline(out);
+       }
+
+       if (nr_zero)
+               prt_printf(out, "(%u empty entries)\n", nr_zero);
+}
+
+int bch2_fs_nocow_locking_init(struct bch_fs *c)
+{
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
+               spin_lock_init(&c->nocow_locks.l[i].lock);
 
-       __bch2_two_state_lock(l, flags & BUCKET_NOCOW_LOCK_UPDATE);
-       bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
+       return 0;
 }
index 2a7a9f44e88e9eaf5fcf6dbacbcf97a680df5bbe..ff8e4af52edcd95fefc0b3164870280ff58fd2b7 100644 (file)
@@ -2,54 +2,48 @@
 #ifndef _BCACHEFS_NOCOW_LOCKING_H
 #define _BCACHEFS_NOCOW_LOCKING_H
 
-#include "bcachefs_format.h"
-#include "two_state_shared_lock.h"
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "nocow_locking_types.h"
 
 #include <linux/hash.h>
 
-#define BUCKET_NOCOW_LOCKS_BITS                10
-#define BUCKET_NOCOW_LOCKS             (1U << BUCKET_NOCOW_LOCKS_BITS)
-
-struct bucket_nocow_lock_table {
-       two_state_lock_t                l[BUCKET_NOCOW_LOCKS];
-};
-
-#define BUCKET_NOCOW_LOCK_UPDATE       (1 << 0)
-
-static inline two_state_lock_t *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
-                                                 struct bpos bucket)
+static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+                                                         u64 dev_bucket)
 {
-       u64 dev_bucket = bucket.inode << 56 | bucket.offset;
        unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS);
 
        return t->l + (h & (BUCKET_NOCOW_LOCKS - 1));
 }
 
-static inline bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t,
-                                              struct bpos bucket)
-{
-       two_state_lock_t *l = bucket_nocow_lock(t, bucket);
+#define BUCKET_NOCOW_LOCK_UPDATE       (1 << 0)
 
-       return atomic_long_read(&l->v) != 0;
-}
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos);
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int);
+bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *, u64, int);
+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *,
+                             struct nocow_lock_bucket *, u64, int);
 
-static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t,
-                                           struct bpos bucket, int flags)
+static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+                                         struct bpos bucket, int flags)
 {
-       two_state_lock_t *l = bucket_nocow_lock(t, bucket);
+       u64 dev_bucket = bucket_to_u64(bucket);
+       struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
 
-       bch2_two_state_unlock(l, flags & BUCKET_NOCOW_LOCK_UPDATE);
+       __bch2_bucket_nocow_lock(t, l, dev_bucket, flags);
 }
 
-void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, two_state_lock_t *, int);
-
-static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t,
                                          struct bpos bucket, int flags)
 {
-       two_state_lock_t *l = bucket_nocow_lock(t, bucket);
+       u64 dev_bucket = bucket_to_u64(bucket);
+       struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
 
-       if (!bch2_two_state_trylock(l, flags & BUCKET_NOCOW_LOCK_UPDATE))
-               __bch2_bucket_nocow_lock(t, l, flags);
+       return __bch2_bucket_nocow_trylock(l, dev_bucket, flags);
 }
 
+void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
+
+int bch2_fs_nocow_locking_init(struct bch_fs *);
+
 #endif /* _BCACHEFS_NOCOW_LOCKING_H */
diff --git a/fs/bcachefs/nocow_locking_types.h b/fs/bcachefs/nocow_locking_types.h
new file mode 100644 (file)
index 0000000..bd12bf6
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H
+#define _BCACHEFS_NOCOW_LOCKING_TYPES_H
+
+#define BUCKET_NOCOW_LOCKS_BITS                10
+#define BUCKET_NOCOW_LOCKS             (1U << BUCKET_NOCOW_LOCKS_BITS)
+
+struct nocow_lock_bucket {
+       struct closure_waitlist         wait;
+       spinlock_t                      lock;
+       u64                             b[4];
+       atomic_t                        l[4];
+} __aligned(SMP_CACHE_BYTES);
+
+struct bucket_nocow_lock_table {
+       struct nocow_lock_bucket        l[BUCKET_NOCOW_LOCKS];
+};
+
+#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */
+
index 2fb7e6300ea5ed06fdfc5f46b6374d8de3281c91..e142de2a5527de0b7e573b8a65fc3f1bece1fdb1 100644 (file)
@@ -39,6 +39,7 @@
 #include "move.h"
 #include "migrate.h"
 #include "movinggc.h"
+#include "nocow_locking.h"
 #include "quota.h"
 #include "rebalance.h"
 #include "recovery.h"
@@ -821,6 +822,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            bch2_fs_btree_write_buffer_init(c) ?:
            bch2_fs_subvolumes_init(c) ?:
            bch2_fs_io_init(c) ?:
+           bch2_fs_nocow_locking_init(c) ?:
            bch2_fs_encryption_init(c) ?:
            bch2_fs_compress_init(c) ?:
            bch2_fs_ec_init(c) ?:
index 5b1f792243cd1d522d73bc79566574b5e1b0c75f..6cbdf70f36bdabc53cfc21667088cd4c1496d57c 100644 (file)
@@ -27,6 +27,7 @@
 #include "journal.h"
 #include "keylist.h"
 #include "move.h"
+#include "nocow_locking.h"
 #include "opts.h"
 #include "rebalance.h"
 #include "replicas.h"
@@ -477,22 +478,8 @@ SHOW(bch2_fs)
                bch2_write_refs_to_text(out, c);
 #endif
 
-       if (attr == &sysfs_nocow_lock_table) {
-               int i, count = 1;
-               long last, curr = 0;
-
-               last = atomic_long_read(&c->nocow_locks.l[0].v);
-               for (i = 1; i < BUCKET_NOCOW_LOCKS; i++) {
-                       curr = atomic_long_read(&c->nocow_locks.l[i].v);
-                       if (last != curr) {
-                               prt_printf(out, "%li: %d\n", last, count);
-                               count = 1;
-                               last = curr;
-                       } else
-                               count++;
-               }
-               prt_printf(out, "%li: %d\n", last, count);
-       }
+       if (attr == &sysfs_nocow_lock_table)
+               bch2_nocow_locks_to_text(out, &c->nocow_locks);
 
        return 0;
 }