bcachefs: seqmutex; fix a lockdep splat
authorKent Overstreet <kent.overstreet@linux.dev>
Tue, 20 Jun 2023 01:01:13 +0000 (21:01 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:10:04 +0000 (17:10 -0400)
We can't be holding btree_trans_lock while copying to user space, which
might incur a page fault. To fix this, convert it to a seqmutex so we
can unlock/relock.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_iter.c
fs/bcachefs/debug.c
fs/bcachefs/seqmutex.h [new file with mode: 0644]
fs/bcachefs/sysfs.c

index 0dfa42e297e04b95c90ae167b751ce8bef5f006c..4199b42db64023ff3d433fa38029f9ecf606d306 100644 (file)
 #include "fifo.h"
 #include "nocow_locking_types.h"
 #include "opts.h"
+#include "seqmutex.h"
 #include "util.h"
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -779,7 +780,7 @@ struct bch_fs {
        }                       btree_write_stats[BTREE_WRITE_TYPE_NR];
 
        /* btree_iter.c: */
-       struct mutex            btree_trans_lock;
+       struct seqmutex         btree_trans_lock;
        struct list_head        btree_trans_list;
        mempool_t               btree_paths_pool;
        mempool_t               btree_trans_mem_pool;
index e8fec59dac0263af6210c48d063fefacbc28ee54..8335387d339713a5b2626fc3a16381ed70c38836 100644 (file)
@@ -2991,7 +2991,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
                struct btree_trans *pos;
 
-               mutex_lock(&c->btree_trans_lock);
+               seqmutex_lock(&c->btree_trans_lock);
                list_for_each_entry(pos, &c->btree_trans_list, list) {
                        /*
                         * We'd much prefer to be stricter here and completely
@@ -3009,7 +3009,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
                }
                list_add_tail(&trans->list, &c->btree_trans_list);
 list_add_done:
-               mutex_unlock(&c->btree_trans_lock);
+               seqmutex_unlock(&c->btree_trans_lock);
        }
 }
 
@@ -3044,6 +3044,12 @@ void bch2_trans_exit(struct btree_trans *trans)
 
        bch2_trans_unlock(trans);
 
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
+               seqmutex_lock(&c->btree_trans_lock);
+               list_del(&trans->list);
+               seqmutex_unlock(&c->btree_trans_lock);
+       }
+
        closure_sync(&trans->ref);
 
        if (s)
@@ -3055,12 +3061,6 @@ void bch2_trans_exit(struct btree_trans *trans)
 
        check_btree_paths_leaked(trans);
 
-       if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
-               mutex_lock(&c->btree_trans_lock);
-               list_del(&trans->list);
-               mutex_unlock(&c->btree_trans_lock);
-       }
-
        srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
 
        bch2_journal_preres_put(&c->journal, &trans->journal_preres);
@@ -3198,7 +3198,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
        }
 
        INIT_LIST_HEAD(&c->btree_trans_list);
-       mutex_init(&c->btree_trans_lock);
+       seqmutex_init(&c->btree_trans_lock);
 
        ret   = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
                        sizeof(struct btree_path) * nr +
index 8981acc150989be09b42b0f09af2f4f452ac1cf2..df0e14dc96e6d81228bc23e39d894a030cb94389 100644 (file)
@@ -627,19 +627,26 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
        struct bch_fs *c = i->c;
        struct btree_trans *trans;
        ssize_t ret = 0;
+       u32 seq;
 
        i->ubuf = buf;
        i->size = size;
        i->ret  = 0;
-
-       mutex_lock(&c->btree_trans_lock);
+restart:
+       seqmutex_lock(&c->btree_trans_lock);
        list_for_each_entry(trans, &c->btree_trans_list, list) {
                if (trans->locking_wait.task->pid <= i->iter)
                        continue;
 
+               closure_get(&trans->ref);
+               seq = seqmutex_seq(&c->btree_trans_lock);
+               seqmutex_unlock(&c->btree_trans_lock);
+
                ret = flush_buf(i);
-               if (ret)
-                       break;
+               if (ret) {
+                       closure_put(&trans->ref);
+                       goto unlocked;
+               }
 
                bch2_btree_trans_to_text(&i->buf, trans);
 
@@ -651,9 +658,14 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
                prt_newline(&i->buf);
 
                i->iter = trans->locking_wait.task->pid;
-       }
-       mutex_unlock(&c->btree_trans_lock);
 
+               closure_put(&trans->ref);
+
+               if (!seqmutex_relock(&c->btree_trans_lock, seq))
+                       goto restart;
+       }
+       seqmutex_unlock(&c->btree_trans_lock);
+unlocked:
        if (i->buf.allocation_failure)
                ret = -ENOMEM;
 
@@ -815,6 +827,7 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
        struct bch_fs *c = i->c;
        struct btree_trans *trans;
        ssize_t ret = 0;
+       u32 seq;
 
        i->ubuf = buf;
        i->size = size;
@@ -822,21 +835,32 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
 
        if (i->iter)
                goto out;
-
-       mutex_lock(&c->btree_trans_lock);
+restart:
+       seqmutex_lock(&c->btree_trans_lock);
        list_for_each_entry(trans, &c->btree_trans_list, list) {
                if (trans->locking_wait.task->pid <= i->iter)
                        continue;
 
+               closure_get(&trans->ref);
+               seq = seqmutex_seq(&c->btree_trans_lock);
+               seqmutex_unlock(&c->btree_trans_lock);
+
                ret = flush_buf(i);
-               if (ret)
-                       break;
+               if (ret) {
+                       closure_put(&trans->ref);
+                       goto out;
+               }
 
                bch2_check_for_deadlock(trans, &i->buf);
 
                i->iter = trans->locking_wait.task->pid;
+
+               closure_put(&trans->ref);
+
+               if (!seqmutex_relock(&c->btree_trans_lock, seq))
+                       goto restart;
        }
-       mutex_unlock(&c->btree_trans_lock);
+       seqmutex_unlock(&c->btree_trans_lock);
 out:
        if (i->buf.allocation_failure)
                ret = -ENOMEM;
diff --git a/fs/bcachefs/seqmutex.h b/fs/bcachefs/seqmutex.h
new file mode 100644 (file)
index 0000000..c1860d8
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SEQMUTEX_H
+#define _BCACHEFS_SEQMUTEX_H
+
+#include <linux/mutex.h>
+
+struct seqmutex {
+       struct mutex    lock;
+       u32             seq;
+};
+
+#define seqmutex_init(_lock)   mutex_init(&(_lock)->lock)
+
+static inline bool seqmutex_trylock(struct seqmutex *lock)
+{
+       return mutex_trylock(&lock->lock);
+}
+
+static inline void seqmutex_lock(struct seqmutex *lock)
+{
+       mutex_lock(&lock->lock);
+}
+
+static inline void seqmutex_unlock(struct seqmutex *lock)
+{
+       lock->seq++;
+       mutex_unlock(&lock->lock);
+}
+
+static inline u32 seqmutex_seq(struct seqmutex *lock)
+{
+       return lock->seq;
+}
+
+static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
+{
+       if (lock->seq != seq || !mutex_trylock(&lock->lock))
+               return false;
+
+       if (lock->seq != seq) {
+               mutex_unlock(&lock->lock);
+               return false;
+       }
+
+       return true;
+}
+
+#endif /* _BCACHEFS_SEQMUTEX_H */
index 77f92d537af62360919bf5e4649d8cd00f99a54c..54e1071ecfeb4732db54f9db2278f9d0f202b8a0 100644 (file)
@@ -379,7 +379,7 @@ static void bch2_btree_wakeup_all(struct bch_fs *c)
 {
        struct btree_trans *trans;
 
-       mutex_lock(&c->btree_trans_lock);
+       seqmutex_lock(&c->btree_trans_lock);
        list_for_each_entry(trans, &c->btree_trans_list, list) {
                struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
 
@@ -387,7 +387,7 @@ static void bch2_btree_wakeup_all(struct bch_fs *c)
                        six_lock_wakeup_all(&b->lock);
 
        }
-       mutex_unlock(&c->btree_trans_lock);
+       seqmutex_unlock(&c->btree_trans_lock);
 }
 
 SHOW(bch2_fs)