bcachefs: btree_locking.c
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 19 Aug 2022 19:35:34 +0000 (15:35 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:39 +0000 (17:09 -0400)
Start to centralize some of the locking code in a new file; more locking
code will be moving here in the future.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/Makefile
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_locking.c [new file with mode: 0644]
fs/bcachefs/btree_locking.h
fs/bcachefs/btree_types.h

index 2f4bd31c862ffe630b0565482c5c6dc1a99428c2..e23667548e09ef47c161e3ecdffa93c1dcd28403 100644 (file)
@@ -13,6 +13,7 @@ bcachefs-y            :=      \
        btree_io.o              \
        btree_iter.o            \
        btree_key_cache.o       \
+       btree_locking.o         \
        btree_update_interior.o \
        btree_update_leaf.o     \
        buckets.o               \
index 95bc71dd87afacb3ff54bd0654f12c1b2ebf140e..488b56a209e3c86bbd02bda884041b0a41c16a20 100644 (file)
@@ -118,444 +118,6 @@ static inline bool btree_path_pos_in_node(struct btree_path *path,
                !btree_path_pos_after_node(path, b);
 }
 
-/* Btree node locking: */
-
-void bch2_btree_node_unlock_write(struct btree_trans *trans,
-                       struct btree_path *path, struct btree *b)
-{
-       bch2_btree_node_unlock_write_inlined(trans, path, b);
-}
-
-struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
-                                          struct btree_path *skip,
-                                          struct btree *b,
-                                          unsigned level)
-{
-       struct btree_path *path;
-       struct six_lock_count ret = { 0, 0 };
-
-       if (IS_ERR_OR_NULL(b))
-               return ret;
-
-       trans_for_each_path(trans, path)
-               if (path != skip && path->l[level].b == b) {
-                       ret.read += btree_node_read_locked(path, level);
-                       ret.intent += btree_node_intent_locked(path, level);
-               }
-
-       return ret;
-}
-
-static inline void six_lock_readers_add(struct six_lock *lock, int nr)
-{
-       if (!lock->readers)
-               atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter);
-       else
-               this_cpu_add(*lock->readers, nr);
-}
-
-void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
-{
-       int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->c.level).read;
-
-       /*
-        * Must drop our read locks before calling six_lock_write() -
-        * six_unlock() won't do wakeups until the reader count
-        * goes to 0, and it's safe because we have the node intent
-        * locked:
-        */
-       six_lock_readers_add(&b->c.lock, -readers);
-       six_lock_write(&b->c.lock, NULL, NULL);
-       six_lock_readers_add(&b->c.lock, readers);
-}
-
-bool __bch2_btree_node_relock(struct btree_trans *trans,
-                             struct btree_path *path, unsigned level)
-{
-       struct btree *b = btree_path_node(path, level);
-       int want = __btree_lock_want(path, level);
-
-       if (!is_btree_node(path, level))
-               goto fail;
-
-       if (race_fault())
-               goto fail;
-
-       if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
-           (btree_node_lock_seq_matches(path, b, level) &&
-            btree_node_lock_increment(trans, b, level, want))) {
-               mark_btree_node_locked(trans, path, level, want);
-               return true;
-       }
-fail:
-       if (b != ERR_PTR(-BCH_ERR_no_btree_node_cached) &&
-           b != ERR_PTR(-BCH_ERR_no_btree_node_init))
-               trace_btree_node_relock_fail(trans, _RET_IP_, path, level);
-       return false;
-}
-
-bool bch2_btree_node_upgrade(struct btree_trans *trans,
-                            struct btree_path *path, unsigned level)
-{
-       struct btree *b = path->l[level].b;
-
-       if (!is_btree_node(path, level))
-               return false;
-
-       switch (btree_lock_want(path, level)) {
-       case BTREE_NODE_UNLOCKED:
-               BUG_ON(btree_node_locked(path, level));
-               return true;
-       case BTREE_NODE_READ_LOCKED:
-               BUG_ON(btree_node_intent_locked(path, level));
-               return bch2_btree_node_relock(trans, path, level);
-       case BTREE_NODE_INTENT_LOCKED:
-               break;
-       }
-
-       if (btree_node_intent_locked(path, level))
-               return true;
-
-       if (race_fault())
-               return false;
-
-       if (btree_node_locked(path, level)
-           ? six_lock_tryupgrade(&b->c.lock)
-           : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
-               goto success;
-
-       if (btree_node_lock_seq_matches(path, b, level) &&
-           btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
-               btree_node_unlock(trans, path, level);
-               goto success;
-       }
-
-       trace_btree_node_upgrade_fail(trans, _RET_IP_, path, level);
-       return false;
-success:
-       mark_btree_node_intent_locked(trans, path, level);
-       return true;
-}
-
-static inline bool btree_path_get_locks(struct btree_trans *trans,
-                                       struct btree_path *path,
-                                       bool upgrade)
-{
-       unsigned l = path->level;
-       int fail_idx = -1;
-
-       do {
-               if (!btree_path_node(path, l))
-                       break;
-
-               if (!(upgrade
-                     ? bch2_btree_node_upgrade(trans, path, l)
-                     : bch2_btree_node_relock(trans, path, l)))
-                       fail_idx = l;
-
-               l++;
-       } while (l < path->locks_want);
-
-       /*
-        * When we fail to get a lock, we have to ensure that any child nodes
-        * can't be relocked so bch2_btree_path_traverse has to walk back up to
-        * the node that we failed to relock:
-        */
-       if (fail_idx >= 0) {
-               __bch2_btree_path_unlock(trans, path);
-               btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-
-               do {
-                       path->l[fail_idx].b = upgrade
-                               ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
-                               : ERR_PTR(-BCH_ERR_no_btree_node_relock);
-                       --fail_idx;
-               } while (fail_idx >= 0);
-       }
-
-       if (path->uptodate == BTREE_ITER_NEED_RELOCK)
-               path->uptodate = BTREE_ITER_UPTODATE;
-
-       bch2_trans_verify_locks(trans);
-
-       return path->uptodate < BTREE_ITER_NEED_RELOCK;
-}
-
-static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
-                                 bool cached)
-{
-       return !cached
-               ? container_of(_b, struct btree, c)->key.k.p
-               : container_of(_b, struct bkey_cached, c)->key.pos;
-}
-
-/* Slowpath: */
-int __bch2_btree_node_lock(struct btree_trans *trans,
-                          struct btree_path *path,
-                          struct btree *b,
-                          struct bpos pos, unsigned level,
-                          enum six_lock_type type,
-                          six_lock_should_sleep_fn should_sleep_fn, void *p,
-                          unsigned long ip)
-{
-       struct btree_path *linked;
-       unsigned reason;
-
-       /* Check if it's safe to block: */
-       trans_for_each_path(trans, linked) {
-               if (!linked->nodes_locked)
-                       continue;
-
-               /*
-                * Can't block taking an intent lock if we have _any_ nodes read
-                * locked:
-                *
-                * - Our read lock blocks another thread with an intent lock on
-                *   the same node from getting a write lock, and thus from
-                *   dropping its intent lock
-                *
-                * - And the other thread may have multiple nodes intent locked:
-                *   both the node we want to intent lock, and the node we
-                *   already have read locked - deadlock:
-                */
-               if (type == SIX_LOCK_intent &&
-                   linked->nodes_locked != linked->nodes_intent_locked) {
-                       reason = 1;
-                       goto deadlock;
-               }
-
-               if (linked->btree_id != path->btree_id) {
-                       if (linked->btree_id < path->btree_id)
-                               continue;
-
-                       reason = 3;
-                       goto deadlock;
-               }
-
-               /*
-                * Within the same btree, non-cached paths come before cached
-                * paths:
-                */
-               if (linked->cached != path->cached) {
-                       if (!linked->cached)
-                               continue;
-
-                       reason = 4;
-                       goto deadlock;
-               }
-
-               /*
-                * Interior nodes must be locked before their descendants: if
-                * another path has possible descendants locked of the node
-                * we're about to lock, it must have the ancestors locked too:
-                */
-               if (level > __fls(linked->nodes_locked)) {
-                       reason = 5;
-                       goto deadlock;
-               }
-
-               /* Must lock btree nodes in key order: */
-               if (btree_node_locked(linked, level) &&
-                   bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
-                                                linked->cached)) <= 0) {
-                       reason = 7;
-                       goto deadlock;
-               }
-       }
-
-       return btree_node_lock_type(trans, path, b, pos, level,
-                                   type, should_sleep_fn, p);
-deadlock:
-       trace_trans_restart_would_deadlock(trans, ip, reason, linked, path, &pos);
-       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
-}
-
-/* Btree iterator locking: */
-
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-static void bch2_btree_path_verify_locks(struct btree_path *path)
-{
-       unsigned l;
-
-       if (!path->nodes_locked) {
-               BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
-                      btree_path_node(path, path->level));
-               return;
-       }
-
-       for (l = 0; btree_path_node(path, l); l++)
-               BUG_ON(btree_lock_want(path, l) !=
-                      btree_node_locked_type(path, l));
-}
-
-void bch2_trans_verify_locks(struct btree_trans *trans)
-{
-       struct btree_path *path;
-
-       trans_for_each_path(trans, path)
-               bch2_btree_path_verify_locks(path);
-}
-#else
-static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
-#endif
-
-/* Btree path locking: */
-
-/*
- * Only for btree_cache.c - only relocks intent locks
- */
-int bch2_btree_path_relock_intent(struct btree_trans *trans,
-                                 struct btree_path *path)
-{
-       unsigned l;
-
-       for (l = path->level;
-            l < path->locks_want && btree_path_node(path, l);
-            l++) {
-               if (!bch2_btree_node_relock(trans, path, l)) {
-                       __bch2_btree_path_unlock(trans, path);
-                       btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-                       trace_trans_restart_relock_path_intent(trans, _RET_IP_, path);
-                       return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
-               }
-       }
-
-       return 0;
-}
-
-__flatten
-static bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
-                       struct btree_path *path, unsigned long trace_ip)
-{
-       return btree_path_get_locks(trans, path, false);
-}
-
-static int __bch2_btree_path_relock(struct btree_trans *trans,
-                       struct btree_path *path, unsigned long trace_ip)
-{
-       if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
-               trace_trans_restart_relock_path(trans, trace_ip, path);
-               return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
-       }
-
-       return 0;
-}
-
-static inline int bch2_btree_path_relock(struct btree_trans *trans,
-                       struct btree_path *path, unsigned long trace_ip)
-{
-       return btree_node_locked(path, path->level)
-               ? 0
-               : __bch2_btree_path_relock(trans, path, trace_ip);
-}
-
-bool __bch2_btree_path_upgrade(struct btree_trans *trans,
-                              struct btree_path *path,
-                              unsigned new_locks_want)
-{
-       struct btree_path *linked;
-
-       EBUG_ON(path->locks_want >= new_locks_want);
-
-       path->locks_want = new_locks_want;
-
-       if (btree_path_get_locks(trans, path, true))
-               return true;
-
-       /*
-        * XXX: this is ugly - we'd prefer to not be mucking with other
-        * iterators in the btree_trans here.
-        *
-        * On failure to upgrade the iterator, setting iter->locks_want and
-        * calling get_locks() is sufficient to make bch2_btree_path_traverse()
-        * get the locks we want on transaction restart.
-        *
-        * But if this iterator was a clone, on transaction restart what we did
-        * to this iterator isn't going to be preserved.
-        *
-        * Possibly we could add an iterator field for the parent iterator when
-        * an iterator is a copy - for now, we'll just upgrade any other
-        * iterators with the same btree id.
-        *
-        * The code below used to be needed to ensure ancestor nodes get locked
-        * before interior nodes - now that's handled by
-        * bch2_btree_path_traverse_all().
-        */
-       if (!path->cached && !trans->in_traverse_all)
-               trans_for_each_path(trans, linked)
-                       if (linked != path &&
-                           linked->cached == path->cached &&
-                           linked->btree_id == path->btree_id &&
-                           linked->locks_want < new_locks_want) {
-                               linked->locks_want = new_locks_want;
-                               btree_path_get_locks(trans, linked, true);
-                       }
-
-       return false;
-}
-
-void __bch2_btree_path_downgrade(struct btree_trans *trans,
-                                struct btree_path *path,
-                                unsigned new_locks_want)
-{
-       unsigned l;
-
-       EBUG_ON(path->locks_want < new_locks_want);
-
-       path->locks_want = new_locks_want;
-
-       while (path->nodes_locked &&
-              (l = __fls(path->nodes_locked)) >= path->locks_want) {
-               if (l > path->level) {
-                       btree_node_unlock(trans, path, l);
-               } else {
-                       if (btree_node_intent_locked(path, l)) {
-                               six_lock_downgrade(&path->l[l].b->c.lock);
-                               path->nodes_intent_locked ^= 1 << l;
-                       }
-                       break;
-               }
-       }
-
-       bch2_btree_path_verify_locks(path);
-}
-
-void bch2_trans_downgrade(struct btree_trans *trans)
-{
-       struct btree_path *path;
-
-       trans_for_each_path(trans, path)
-               bch2_btree_path_downgrade(trans, path);
-}
-
-/* Btree transaction locking: */
-
-int bch2_trans_relock(struct btree_trans *trans)
-{
-       struct btree_path *path;
-
-       if (unlikely(trans->restarted))
-               return -BCH_ERR_transaction_restart_relock;
-
-       trans_for_each_path(trans, path)
-               if (path->should_be_locked &&
-                   bch2_btree_path_relock(trans, path, _RET_IP_)) {
-                       trace_trans_restart_relock(trans, _RET_IP_, path);
-                       BUG_ON(!trans->restarted);
-                       return -BCH_ERR_transaction_restart_relock;
-               }
-       return 0;
-}
-
-void bch2_trans_unlock(struct btree_trans *trans)
-{
-       struct btree_path *path;
-
-       trans_for_each_path(trans, path)
-               __bch2_btree_path_unlock(trans, path);
-}
-
 /* Btree iterator: */
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -2036,10 +1598,8 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
         */
 
        locks_want = min(locks_want, BTREE_MAX_DEPTH);
-       if (locks_want > path->locks_want) {
-               path->locks_want = locks_want;
-               btree_path_get_locks(trans, path, true);
-       }
+       if (locks_want > path->locks_want)
+               bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
 
        return path;
 }
index 51beeddcd45e62c951d5d5cef85f8ff43d9d2046..c083e49475d10dffd2bc8bc9cd602248d3416c0e 100644 (file)
@@ -181,12 +181,10 @@ struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 void bch2_trans_verify_paths(struct btree_trans *);
-void bch2_trans_verify_locks(struct btree_trans *);
 void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
                            struct bpos, bool);
 #else
 static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
-static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
 static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
                                          struct bpos pos, bool key_cache) {}
 #endif
@@ -231,20 +229,6 @@ static inline int btree_trans_restart(struct btree_trans *trans, int err)
 bool bch2_btree_node_upgrade(struct btree_trans *,
                             struct btree_path *, unsigned);
 
-bool __bch2_btree_path_upgrade(struct btree_trans *,
-                              struct btree_path *, unsigned);
-
-static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
-                                          struct btree_path *path,
-                                          unsigned new_locks_want)
-{
-       new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
-
-       return path->locks_want < new_locks_want
-               ? __bch2_btree_path_upgrade(trans, path, new_locks_want)
-               : path->uptodate == BTREE_ITER_UPTODATE;
-}
-
 void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned);
 
 static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
new file mode 100644 (file)
index 0000000..3f20fbc
--- /dev/null
@@ -0,0 +1,442 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_locking.h"
+#include "btree_types.h"
+
+struct lock_class_key bch2_btree_node_lock_key;
+
+/* Btree node locking: */
+
+void bch2_btree_node_unlock_write(struct btree_trans *trans,
+                       struct btree_path *path, struct btree *b)
+{
+       bch2_btree_node_unlock_write_inlined(trans, path, b);
+}
+
+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
+                                                 struct btree_path *skip,
+                                                 struct btree *b,
+                                                 unsigned level)
+{
+       struct btree_path *path;
+       struct six_lock_count ret = { 0, 0 };
+
+       if (IS_ERR_OR_NULL(b))
+               return ret;
+
+       trans_for_each_path(trans, path)
+               if (path != skip && path->l[level].b == b) {
+                       ret.read += btree_node_read_locked(path, level);
+                       ret.intent += btree_node_intent_locked(path, level);
+               }
+
+       return ret;
+}
+
+static inline void six_lock_readers_add(struct six_lock *lock, int nr)
+{
+       if (!lock->readers)
+               atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter);
+       else
+               this_cpu_add(*lock->readers, nr);
+}
+
+void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
+{
+       int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->c.level).read;
+
+       /*
+        * Must drop our read locks before calling six_lock_write() -
+        * six_unlock() won't do wakeups until the reader count
+        * goes to 0, and it's safe because we have the node intent
+        * locked:
+        */
+       six_lock_readers_add(&b->c.lock, -readers);
+       six_lock_write(&b->c.lock, NULL, NULL);
+       six_lock_readers_add(&b->c.lock, readers);
+}
+
+bool __bch2_btree_node_relock(struct btree_trans *trans,
+                             struct btree_path *path, unsigned level)
+{
+       struct btree *b = btree_path_node(path, level);
+       int want = __btree_lock_want(path, level);
+
+       if (!is_btree_node(path, level))
+               goto fail;
+
+       if (race_fault())
+               goto fail;
+
+       if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
+           (btree_node_lock_seq_matches(path, b, level) &&
+            btree_node_lock_increment(trans, b, level, want))) {
+               mark_btree_node_locked(trans, path, level, want);
+               return true;
+       }
+fail:
+       if (b != ERR_PTR(-BCH_ERR_no_btree_node_cached) &&
+           b != ERR_PTR(-BCH_ERR_no_btree_node_init))
+               trace_btree_node_relock_fail(trans, _RET_IP_, path, level);
+       return false;
+}
+
+bool bch2_btree_node_upgrade(struct btree_trans *trans,
+                            struct btree_path *path, unsigned level)
+{
+       struct btree *b = path->l[level].b;
+
+       if (!is_btree_node(path, level))
+               return false;
+
+       switch (btree_lock_want(path, level)) {
+       case BTREE_NODE_UNLOCKED:
+               BUG_ON(btree_node_locked(path, level));
+               return true;
+       case BTREE_NODE_READ_LOCKED:
+               BUG_ON(btree_node_intent_locked(path, level));
+               return bch2_btree_node_relock(trans, path, level);
+       case BTREE_NODE_INTENT_LOCKED:
+               break;
+       }
+
+       if (btree_node_intent_locked(path, level))
+               return true;
+
+       if (race_fault())
+               return false;
+
+       if (btree_node_locked(path, level)
+           ? six_lock_tryupgrade(&b->c.lock)
+           : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
+               goto success;
+
+       if (btree_node_lock_seq_matches(path, b, level) &&
+           btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
+               btree_node_unlock(trans, path, level);
+               goto success;
+       }
+
+       trace_btree_node_upgrade_fail(trans, _RET_IP_, path, level);
+       return false;
+success:
+       mark_btree_node_intent_locked(trans, path, level);
+       return true;
+}
+
+static inline bool btree_path_get_locks(struct btree_trans *trans,
+                                       struct btree_path *path,
+                                       bool upgrade)
+{
+       unsigned l = path->level;
+       int fail_idx = -1;
+
+       do {
+               if (!btree_path_node(path, l))
+                       break;
+
+               if (!(upgrade
+                     ? bch2_btree_node_upgrade(trans, path, l)
+                     : bch2_btree_node_relock(trans, path, l)))
+                       fail_idx = l;
+
+               l++;
+       } while (l < path->locks_want);
+
+       /*
+        * When we fail to get a lock, we have to ensure that any child nodes
+        * can't be relocked so bch2_btree_path_traverse has to walk back up to
+        * the node that we failed to relock:
+        */
+       if (fail_idx >= 0) {
+               __bch2_btree_path_unlock(trans, path);
+               btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+
+               do {
+                       path->l[fail_idx].b = upgrade
+                               ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
+                               : ERR_PTR(-BCH_ERR_no_btree_node_relock);
+                       --fail_idx;
+               } while (fail_idx >= 0);
+       }
+
+       if (path->uptodate == BTREE_ITER_NEED_RELOCK)
+               path->uptodate = BTREE_ITER_UPTODATE;
+
+       bch2_trans_verify_locks(trans);
+
+       return path->uptodate < BTREE_ITER_NEED_RELOCK;
+}
+
+/* Slowpath: */
+int __bch2_btree_node_lock(struct btree_trans *trans,
+                          struct btree_path *path,
+                          struct btree *b,
+                          struct bpos pos, unsigned level,
+                          enum six_lock_type type,
+                          six_lock_should_sleep_fn should_sleep_fn, void *p,
+                          unsigned long ip)
+{
+       struct btree_path *linked;
+       unsigned reason;
+
+       /* Check if it's safe to block: */
+       trans_for_each_path(trans, linked) {
+               if (!linked->nodes_locked)
+                       continue;
+
+               /*
+                * Can't block taking an intent lock if we have _any_ nodes read
+                * locked:
+                *
+                * - Our read lock blocks another thread with an intent lock on
+                *   the same node from getting a write lock, and thus from
+                *   dropping its intent lock
+                *
+                * - And the other thread may have multiple nodes intent locked:
+                *   both the node we want to intent lock, and the node we
+                *   already have read locked - deadlock:
+                */
+               if (type == SIX_LOCK_intent &&
+                   linked->nodes_locked != linked->nodes_intent_locked) {
+                       reason = 1;
+                       goto deadlock;
+               }
+
+               if (linked->btree_id != path->btree_id) {
+                       if (linked->btree_id < path->btree_id)
+                               continue;
+
+                       reason = 3;
+                       goto deadlock;
+               }
+
+               /*
+                * Within the same btree, non-cached paths come before cached
+                * paths:
+                */
+               if (linked->cached != path->cached) {
+                       if (!linked->cached)
+                               continue;
+
+                       reason = 4;
+                       goto deadlock;
+               }
+
+               /*
+                * Interior nodes must be locked before their descendants: if
+                * another path has possible descendants locked of the node
+                * we're about to lock, it must have the ancestors locked too:
+                */
+               if (level > __fls(linked->nodes_locked)) {
+                       reason = 5;
+                       goto deadlock;
+               }
+
+               /* Must lock btree nodes in key order: */
+               if (btree_node_locked(linked, level) &&
+                   bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b,
+                                                linked->cached)) <= 0) {
+                       reason = 7;
+                       goto deadlock;
+               }
+       }
+
+       return btree_node_lock_type(trans, path, b, pos, level,
+                                   type, should_sleep_fn, p);
+deadlock:
+       trace_trans_restart_would_deadlock(trans, ip, reason, linked, path, &pos);
+       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
+}
+
+/* Btree iterator locking: */
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void bch2_btree_path_verify_locks(struct btree_path *path)
+{
+       unsigned l;
+
+       if (!path->nodes_locked) {
+               BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
+                      btree_path_node(path, path->level));
+               return;
+       }
+
+       for (l = 0; btree_path_node(path, l); l++)
+               BUG_ON(btree_lock_want(path, l) !=
+                      btree_node_locked_type(path, l));
+}
+
+void bch2_trans_verify_locks(struct btree_trans *trans)
+{
+       struct btree_path *path;
+
+       trans_for_each_path(trans, path)
+               bch2_btree_path_verify_locks(path);
+}
+
+#endif
+
+/* Btree path locking: */
+
+/*
+ * Only for btree_cache.c - only relocks intent locks
+ */
+int bch2_btree_path_relock_intent(struct btree_trans *trans,
+                                 struct btree_path *path)
+{
+       unsigned l;
+
+       for (l = path->level;
+            l < path->locks_want && btree_path_node(path, l);
+            l++) {
+               if (!bch2_btree_node_relock(trans, path, l)) {
+                       __bch2_btree_path_unlock(trans, path);
+                       btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+                       trace_trans_restart_relock_path_intent(trans, _RET_IP_, path);
+                       return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
+               }
+       }
+
+       return 0;
+}
+
+__flatten
+bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
+                       struct btree_path *path, unsigned long trace_ip)
+{
+       return btree_path_get_locks(trans, path, false);
+}
+
+int __bch2_btree_path_relock(struct btree_trans *trans,
+                       struct btree_path *path, unsigned long trace_ip)
+{
+       if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
+               trace_trans_restart_relock_path(trans, trace_ip, path);
+               return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
+       }
+
+       return 0;
+}
+
+__flatten
+bool bch2_btree_path_upgrade_norestart(struct btree_trans *trans,
+                       struct btree_path *path, unsigned long trace_ip)
+{
+       return btree_path_get_locks(trans, path, true);
+}
+
+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
+                              struct btree_path *path,
+                              unsigned new_locks_want)
+{
+       EBUG_ON(path->locks_want >= new_locks_want);
+
+       path->locks_want = new_locks_want;
+
+       return btree_path_get_locks(trans, path, true);
+}
+
+bool __bch2_btree_path_upgrade(struct btree_trans *trans,
+                              struct btree_path *path,
+                              unsigned new_locks_want)
+{
+       struct btree_path *linked;
+
+       if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
+               return true;
+
+       /*
+        * XXX: this is ugly - we'd prefer to not be mucking with other
+        * iterators in the btree_trans here.
+        *
+        * On failure to upgrade the iterator, setting iter->locks_want and
+        * calling get_locks() is sufficient to make bch2_btree_path_traverse()
+        * get the locks we want on transaction restart.
+        *
+        * But if this iterator was a clone, on transaction restart what we did
+        * to this iterator isn't going to be preserved.
+        *
+        * Possibly we could add an iterator field for the parent iterator when
+        * an iterator is a copy - for now, we'll just upgrade any other
+        * iterators with the same btree id.
+        *
+        * The code below used to be needed to ensure ancestor nodes get locked
+        * before interior nodes - now that's handled by
+        * bch2_btree_path_traverse_all().
+        */
+       if (!path->cached && !trans->in_traverse_all)
+               trans_for_each_path(trans, linked)
+                       if (linked != path &&
+                           linked->cached == path->cached &&
+                           linked->btree_id == path->btree_id &&
+                           linked->locks_want < new_locks_want) {
+                               linked->locks_want = new_locks_want;
+                               btree_path_get_locks(trans, linked, true);
+                       }
+
+       return false;
+}
+
+void __bch2_btree_path_downgrade(struct btree_trans *trans,
+                                struct btree_path *path,
+                                unsigned new_locks_want)
+{
+       unsigned l;
+
+       EBUG_ON(path->locks_want < new_locks_want);
+
+       path->locks_want = new_locks_want;
+
+       while (path->nodes_locked &&
+              (l = __fls(path->nodes_locked)) >= path->locks_want) {
+               if (l > path->level) {
+                       btree_node_unlock(trans, path, l);
+               } else {
+                       if (btree_node_intent_locked(path, l)) {
+                               six_lock_downgrade(&path->l[l].b->c.lock);
+                               path->nodes_intent_locked ^= 1 << l;
+                       }
+                       break;
+               }
+       }
+
+       bch2_btree_path_verify_locks(path);
+}
+
+void bch2_trans_downgrade(struct btree_trans *trans)
+{
+       struct btree_path *path;
+
+       trans_for_each_path(trans, path)
+               bch2_btree_path_downgrade(trans, path);
+}
+
+/* Btree transaction locking: */
+
+int bch2_trans_relock(struct btree_trans *trans)
+{
+       struct btree_path *path;
+
+       if (unlikely(trans->restarted))
+               return -BCH_ERR_transaction_restart_relock;
+
+       trans_for_each_path(trans, path)
+               if (path->should_be_locked &&
+                   bch2_btree_path_relock(trans, path, _RET_IP_)) {
+                       trace_trans_restart_relock(trans, _RET_IP_, path);
+                       BUG_ON(!trans->restarted);
+                       return -BCH_ERR_transaction_restart_relock;
+               }
+       return 0;
+}
+
+void bch2_trans_unlock(struct btree_trans *trans)
+{
+       struct btree_path *path;
+
+       trans_for_each_path(trans, path)
+               __bch2_btree_path_unlock(trans, path);
+}
index acc27c3c05d6881c0bc0253c0bdc64bff06f88b2..5b5fa47844f730e5de321b13a20a425318e7e886 100644 (file)
@@ -13,6 +13,8 @@
 #include "btree_iter.h"
 #include "six.h"
 
+extern struct lock_class_key bch2_btree_node_lock_key;
+
 static inline bool is_btree_node(struct btree_path *path, unsigned l)
 {
        return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b);
@@ -300,6 +302,22 @@ static inline void bch2_btree_node_lock_write(struct btree_trans *trans,
                __bch2_btree_node_lock_write(trans, b);
 }
 
+bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
+                              struct btree_path *, unsigned);
+bool __bch2_btree_path_upgrade(struct btree_trans *,
+                              struct btree_path *, unsigned);
+
+static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
+                                          struct btree_path *path,
+                                          unsigned new_locks_want)
+{
+       new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
+
+       return path->locks_want < new_locks_want
+               ? __bch2_btree_path_upgrade(trans, path, new_locks_want)
+               : path->uptodate == BTREE_ITER_UPTODATE;
+}
+
 static inline void btree_path_set_should_be_locked(struct btree_path *path)
 {
        EBUG_ON(!btree_node_locked(path, path->level));
@@ -326,4 +344,27 @@ static inline void btree_path_set_level_up(struct btree_trans *trans,
 struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
                                struct btree_path *, struct btree *, unsigned);
 
+bool bch2_btree_path_relock_norestart(struct btree_trans *,
+                                     struct btree_path *, unsigned long);
+int __bch2_btree_path_relock(struct btree_trans *,
+                            struct btree_path *, unsigned long);
+
+static inline int bch2_btree_path_relock(struct btree_trans *trans,
+                       struct btree_path *path, unsigned long trace_ip)
+{
+       return btree_node_locked(path, path->level)
+               ? 0
+               : __bch2_btree_path_relock(trans, path, trace_ip);
+}
+
+int bch2_btree_path_relock(struct btree_trans *, struct btree_path *, unsigned long);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_path_verify_locks(struct btree_path *);
+void bch2_trans_verify_locks(struct btree_trans *);
+#else
+static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
+static inline void bch2_trans_verify_locks(struct btree_trans *trans) {}
+#endif
+
 #endif /* _BCACHEFS_BTREE_LOCKING_H */
index 0a5803a3a75d8da2ffc864e2e02ca5758c713d29..73aaa1196fafb1750f2859cc0354c1619c73cb76 100644 (file)
@@ -336,6 +336,14 @@ struct bkey_cached {
        struct bkey_i           *k;
 };
 
+static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b,
+                                        bool cached)
+{
+       return !cached
+               ? container_of(b, struct btree, c)->key.k.p
+               : container_of(b, struct bkey_cached, c)->key.pos;
+}
+
 struct btree_insert_entry {
        unsigned                flags;
        u8                      bkey_type;