bcachefs: Print deadlock cycle in debugfs
authorKent Overstreet <kent.overstreet@linux.dev>
Tue, 23 Aug 2022 03:12:11 +0000 (23:12 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:41 +0000 (17:09 -0400)
In the event that we're not finished debugging the cycle detector, this
adds a new file to debugfs that shows what the cycle detector finds, if
anything. By comparing this with btree_transactions, which shows held
locks for every btree_transaction, we'll be able to determine if it's
the cycle detector that's buggy or something else.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_locking.c
fs/bcachefs/btree_locking.h
fs/bcachefs/debug.c

index 5773b00e69ac1c48fc16e97dee6ff0994fd46f18..ece80d7914b247fcd7c14eaf158a08a227dfda42 100644 (file)
@@ -2992,7 +2992,6 @@ bch2_btree_path_node_to_text(struct printbuf *out,
                   c.n[0], c.n[1], c.n[2], pid);
 }
 
-#ifdef CONFIG_BCACHEFS_DEBUG_TRANSACTIONS
 void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
 {
        struct btree_path *path;
@@ -3041,7 +3040,6 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
                prt_printf(out, "\n");
        }
 }
-#endif
 
 void bch2_fs_btree_iter_exit(struct bch_fs *c)
 {
index 869f4163a3c6cce68a00051c776d3b7ea88c8318..e270579d362255bc71c32908e7d2c20a2e7caea0 100644 (file)
@@ -76,6 +76,17 @@ static void lock_graph_pop(struct lock_graph *g)
        closure_put(&g->g[--g->nr].trans->ref);
 }
 
+static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
+{
+       struct trans_waiting_for_lock *i;
+
+       prt_printf(out, "Found lock cycle (%u entries):", g->nr);
+       prt_newline(out);
+
+       for (i = g->g; i < g->g + g->nr; i++)
+               bch2_btree_trans_to_text(out, i->trans);
+}
+
 static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
 {
        int ret;
@@ -122,7 +133,8 @@ static noinline int break_cycle(struct lock_graph *g)
        BUG();
 }
 
-static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans)
+static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
+                             struct printbuf *cycle)
 {
        struct btree_trans *orig_trans = g->g->trans;
        struct trans_waiting_for_lock *i;
@@ -136,7 +148,14 @@ static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans)
                        }
 
                if (i->trans == trans) {
-                       ret = break_cycle(g);
+                       if (cycle) {
+                               /* Only checking: */
+                               print_cycle(cycle, g);
+                               ret = -1;
+                       } else {
+                               ret = break_cycle(g);
+                       }
+
                        if (ret)
                                goto deadlock;
                        /*
@@ -170,19 +189,6 @@ deadlock:
        return ret;
 }
 
-#if 0
-static void print_cycle(struct printbuf *out, struct lock_graph *g)
-{
-       struct trans_waiting_for_lock *i;
-
-       prt_str(out, "Found lock cycle:");
-       prt_newline(out);
-
-       for (i = g->g; i < g->g + g->nr; i++)
-               bch2_btree_trans_to_text(out, i->trans);
-}
-#endif
-
 static noinline void lock_graph_remove_non_waiters(struct lock_graph *g)
 {
        struct trans_waiting_for_lock *i;
@@ -202,7 +208,7 @@ static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
        return t1 + t2 > 1;
 }
 
-static int check_for_deadlock(struct btree_trans *trans)
+int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
 {
        struct lock_graph g;
        struct trans_waiting_for_lock *top;
@@ -214,7 +220,7 @@ static int check_for_deadlock(struct btree_trans *trans)
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
 
        g.nr = 0;
-       ret = lock_graph_descend(&g, trans);
+       ret = lock_graph_descend(&g, trans, cycle);
        BUG_ON(ret);
 next:
        if (!g.nr)
@@ -265,7 +271,7 @@ next:
                                    !lock_type_conflicts(lock_held, trans->locking_wait.lock_want))
                                        continue;
 
-                               ret = lock_graph_descend(&g, trans);
+                               ret = lock_graph_descend(&g, trans, cycle);
                                raw_spin_unlock(&b->lock.wait_lock);
 
                                if (ret)
@@ -285,7 +291,7 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
 {
        struct btree_trans *trans = p;
 
-       return check_for_deadlock(trans);
+       return bch2_check_for_deadlock(trans, NULL);
 }
 
 int __bch2_btree_node_lock_write(struct btree_trans *trans,
index 874dd4428b3af00b30bc95f233ea61f865f79e87..86f68b26cc94b863b31bea4e5e5d5458d10cbb00 100644 (file)
@@ -426,6 +426,7 @@ struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
                                struct btree_bkey_cached_common *b,
                                unsigned);
 
+int bch2_check_for_deadlock(struct btree_trans *, struct printbuf *);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
 void bch2_btree_path_verify_locks(struct btree_path *);
index 6944dfef5bcb154527b5a8f0dc9d0a8d968eb03c..41b2772afef99fc8a0b0681c6ea2cce5e0513273 100644 (file)
@@ -11,6 +11,7 @@
 #include "btree_cache.h"
 #include "btree_io.h"
 #include "btree_iter.h"
+#include "btree_locking.h"
 #include "btree_update.h"
 #include "buckets.h"
 #include "debug.h"
@@ -708,6 +709,45 @@ static const struct file_operations lock_held_stats_op = {
        .read = lock_held_stats_read,
 };
 
+static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
+                                           size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct bch_fs *c = i->c;
+       struct btree_trans *trans;
+       ssize_t ret = 0;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       if (i->iter)
+               goto out;
+
+       mutex_lock(&c->btree_trans_lock);
+       list_for_each_entry(trans, &c->btree_trans_list, list)
+               if (bch2_check_for_deadlock(trans, &i->buf)) {
+                       i->iter = 1;
+                       break;
+               }
+       mutex_unlock(&c->btree_trans_lock);
+out:
+       if (i->buf.allocation_failure)
+               ret = -ENOMEM;
+
+       if (!ret)
+               ret = flush_buf(i);
+
+       return ret ?: i->ret;
+}
+
+static const struct file_operations btree_deadlock_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_btree_deadlock_read,
+};
+
 void bch2_fs_debug_exit(struct bch_fs *c)
 {
        if (!IS_ERR_OR_NULL(c->fs_debug_dir))
@@ -741,6 +781,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
        debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
                            c, &lock_held_stats_op);
 
+       debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
+                           c->btree_debug, &btree_deadlock_ops);
+
        c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
        if (IS_ERR_OR_NULL(c->btree_debug_dir))
                return;