bcachefs: Improve tracing for transaction restarts
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 28 Oct 2020 18:17:46 +0000 (14:17 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:45 +0000 (17:08 -0400)
We have a bug where we can get stuck with a process spinning in
transaction restarts - need more information.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_cache.h
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_locking.h
fs/bcachefs/trace.h

index c503d76bab3ba3a16906aefc6f45b21f19526a88..02a2f558cf4d1672a90fd05908c859db470dc433 100644 (file)
@@ -706,7 +706,8 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
  */
 struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
                                  const struct bkey_i *k, unsigned level,
-                                 enum six_lock_type lock_type)
+                                 enum six_lock_type lock_type,
+                                 unsigned long trace_ip)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
@@ -768,7 +769,7 @@ lock_node:
                        btree_node_unlock(iter, level + 1);
 
                if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
-                                    lock_node_check_fn, (void *) k)) {
+                                    lock_node_check_fn, (void *) k, trace_ip)) {
                        if (b->hash_val != btree_ptr_hash_val(k))
                                goto retry;
                        return ERR_PTR(-EINTR);
@@ -936,7 +937,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
        bch2_bkey_unpack(parent, &tmp.k, k);
 
        ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-                                 SIX_LOCK_intent);
+                                 SIX_LOCK_intent, _THIS_IP_);
 
        if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
                struct btree_iter *linked;
@@ -956,7 +957,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
                        btree_node_unlock(iter, level);
 
                ret = bch2_btree_node_get(c, iter, &tmp.k, level,
-                                         SIX_LOCK_intent);
+                                         SIX_LOCK_intent, _THIS_IP_);
 
                /*
                 * before btree_iter_relock() calls btree_iter_verify_locks():
index d0d3a85bb8be6354c5cb6218e1e3a69116586aa4..8a19e60e9258014816b6d5a8c4ac54e00587a7f2 100644 (file)
@@ -23,7 +23,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
 
 struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
                                  const struct bkey_i *, unsigned,
-                                 enum six_lock_type);
+                                 enum six_lock_type, unsigned long);
 
 struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
                                         enum btree_id, unsigned);
index b561d0353d77bd08008cf044e95e4aa77ede06cd..a76e13000d11a999a5c58454a64566b9a11d9bea 100644 (file)
@@ -197,13 +197,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
 bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                            unsigned level, struct btree_iter *iter,
                            enum six_lock_type type,
-                           six_lock_should_sleep_fn should_sleep_fn,
-                           void *p)
+                           six_lock_should_sleep_fn should_sleep_fn, void *p,
+                           unsigned long ip)
 {
        struct btree_trans *trans = iter->trans;
-       struct btree_iter *linked;
+       struct btree_iter *linked, *deadlock_iter = NULL;
        u64 start_time = local_clock();
-       bool ret = true;
+       unsigned reason = 9;
 
        /* Check if it's safe to block: */
        trans_for_each_iter(trans, linked) {
@@ -228,10 +228,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                                linked->locks_want = max_t(unsigned,
                                                linked->locks_want,
                                                __fls(linked->nodes_locked) + 1);
-                               if (!btree_iter_get_locks(linked, true, false))
-                                       ret = false;
+                               if (!btree_iter_get_locks(linked, true, false)) {
+                                       deadlock_iter = linked;
+                                       reason = 1;
+                               }
                        } else {
-                               ret = false;
+                               deadlock_iter = linked;
+                               reason = 2;
                        }
                }
 
@@ -247,23 +250,30 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                                        max(level + 1, max_t(unsigned,
                                            linked->locks_want,
                                            iter->locks_want));
-                               if (!btree_iter_get_locks(linked, true, false))
-                                       ret = false;
+                               if (!btree_iter_get_locks(linked, true, false)) {
+                                       deadlock_iter = linked;
+                                       reason = 3;
+                               }
                        } else {
-                               ret = false;
+                               deadlock_iter = linked;
+                               reason = 4;
                        }
                }
 
                /* Must lock btree nodes in key order: */
                if ((cmp_int(iter->btree_id, linked->btree_id) ?:
-                    -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0)
-                       ret = false;
+                    -cmp_int(btree_iter_type(iter), btree_iter_type(linked))) < 0) {
+                       deadlock_iter = linked;
+                       reason = 5;
+               }
 
                if (iter->btree_id == linked->btree_id &&
                    btree_node_locked(linked, level) &&
                    bkey_cmp(pos, btree_node_pos((void *) linked->l[level].b,
-                                                btree_iter_type(linked))) <= 0)
-                       ret = false;
+                                                btree_iter_type(linked))) <= 0) {
+                       deadlock_iter = linked;
+                       reason = 6;
+               }
 
                /*
                 * Recheck if this is a node we already have locked - since one
@@ -277,8 +287,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                }
        }
 
-       if (unlikely(!ret)) {
-               trace_trans_restart_would_deadlock(iter->trans->ip);
+       if (unlikely(deadlock_iter)) {
+               trace_trans_restart_would_deadlock(iter->trans->ip, ip,
+                               reason,
+                               deadlock_iter->btree_id,
+                               btree_iter_type(deadlock_iter),
+                               iter->btree_id,
+                               btree_iter_type(iter));
                return false;
        }
 
@@ -945,7 +960,8 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
 }
 
 static inline int btree_iter_lock_root(struct btree_iter *iter,
-                                      unsigned depth_want)
+                                      unsigned depth_want,
+                                      unsigned long trace_ip)
 {
        struct bch_fs *c = iter->trans->c;
        struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
@@ -974,7 +990,8 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
                lock_type = __btree_lock_want(iter, iter->level);
                if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
                                              iter, lock_type,
-                                             lock_root_check_fn, rootp)))
+                                             lock_root_check_fn, rootp,
+                                             trace_ip)))
                        return -EINTR;
 
                if (likely(b == READ_ONCE(*rootp) &&
@@ -1046,7 +1063,8 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
                btree_node_unlock(iter, plevel);
 }
 
-static __always_inline int btree_iter_down(struct btree_iter *iter)
+static __always_inline int btree_iter_down(struct btree_iter *iter,
+                                          unsigned long trace_ip)
 {
        struct bch_fs *c = iter->trans->c;
        struct btree_iter_level *l = &iter->l[iter->level];
@@ -1060,7 +1078,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter)
        bch2_bkey_unpack(l->b, &tmp.k,
                         bch2_btree_node_iter_peek(&l->iter, l->b));
 
-       b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type);
+       b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip);
        if (unlikely(IS_ERR(b)))
                return PTR_ERR(b);
 
@@ -1084,7 +1102,7 @@ static void btree_iter_up(struct btree_iter *iter)
        btree_node_unlock(iter, iter->level++);
 }
 
-static int btree_iter_traverse_one(struct btree_iter *);
+static int btree_iter_traverse_one(struct btree_iter *, unsigned long);
 
 static int __btree_iter_traverse_all(struct btree_trans *trans, int ret)
 {
@@ -1109,6 +1127,7 @@ retry_all:
        bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx);
 #undef btree_iter_cmp_by_idx
        bch2_trans_unlock(trans);
+       cond_resched();
 
        if (unlikely(ret == -ENOMEM)) {
                struct closure cl;
@@ -1139,7 +1158,7 @@ retry_all:
                if (!(trans->iters_linked & (1ULL << idx)))
                        continue;
 
-               ret = btree_iter_traverse_one(&trans->iters[idx]);
+               ret = btree_iter_traverse_one(&trans->iters[idx], _THIS_IP_);
                if (ret)
                        goto retry_all;
        }
@@ -1202,7 +1221,8 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
  * On error, caller (peek_node()/peek_key()) must return NULL; the error is
  * stashed in the iterator and returned from bch2_trans_exit().
  */
-static int btree_iter_traverse_one(struct btree_iter *iter)
+static int btree_iter_traverse_one(struct btree_iter *iter,
+                                  unsigned long trace_ip)
 {
        unsigned depth_want = iter->level;
 
@@ -1249,8 +1269,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
         */
        while (iter->level > depth_want) {
                int ret = btree_iter_node(iter, iter->level)
-                       ? btree_iter_down(iter)
-                       : btree_iter_lock_root(iter, depth_want);
+                       ? btree_iter_down(iter, trace_ip)
+                       : btree_iter_lock_root(iter, depth_want, trace_ip);
                if (unlikely(ret)) {
                        if (ret == 1)
                                return 0;
@@ -1281,7 +1301,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
        int ret;
 
        ret =   bch2_trans_cond_resched(trans) ?:
-               btree_iter_traverse_one(iter);
+               btree_iter_traverse_one(iter, _RET_IP_);
        if (unlikely(ret))
                ret = __btree_iter_traverse_all(trans, ret);
 
index 52b657030755b730ccbf6234ca74efcecbdbce97..9a93b6d2687862e89502257c2b167c1efc7dd778 100644 (file)
@@ -242,7 +242,7 @@ retry:
                enum six_lock_type lock_want = __btree_lock_want(iter, 0);
 
                if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
-                                    bkey_cached_check_fn, iter)) {
+                                    bkey_cached_check_fn, iter, _THIS_IP_)) {
                        if (ck->key.btree_id != iter->btree_id ||
                            bkey_cmp(ck->key.pos, iter->pos)) {
                                goto retry;
index ffee6f2d7d4b6f116bd291a00f2bfe259d6260d2..cc07ef2938ae04475fe812e5ff25075657b9cd2e 100644 (file)
@@ -175,13 +175,15 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
 
 bool __bch2_btree_node_lock(struct btree *, struct bpos, unsigned,
                            struct btree_iter *, enum six_lock_type,
-                           six_lock_should_sleep_fn, void *);
+                           six_lock_should_sleep_fn, void *,
+                           unsigned long);
 
 static inline bool btree_node_lock(struct btree *b,
                        struct bpos pos, unsigned level,
                        struct btree_iter *iter,
                        enum six_lock_type type,
-                       six_lock_should_sleep_fn should_sleep_fn, void *p)
+                       six_lock_should_sleep_fn should_sleep_fn, void *p,
+                       unsigned long ip)
 {
        struct btree_trans *trans = iter->trans;
        bool ret;
@@ -199,7 +201,7 @@ static inline bool btree_node_lock(struct btree *b,
        ret   = likely(six_trylock_type(&b->c.lock, type)) ||
                btree_node_lock_increment(trans, b, level, type) ||
                __bch2_btree_node_lock(b, pos, level, iter, type,
-                                      should_sleep_fn, p);
+                                      should_sleep_fn, p, ip);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
        trans->locking = NULL;
index 5c57b6efaaf3a4f73e3d8f0f9203f74247509037..c30fb4d74a954c664cbab38f9f77f958e0b70513 100644 (file)
@@ -536,9 +536,46 @@ DEFINE_EVENT(transaction_restart,  trans_restart_btree_node_reused,
        TP_ARGS(ip)
 );
 
-DEFINE_EVENT(transaction_restart,      trans_restart_would_deadlock,
-       TP_PROTO(unsigned long ip),
-       TP_ARGS(ip)
+TRACE_EVENT(trans_restart_would_deadlock,
+       TP_PROTO(unsigned long  trans_ip,
+                unsigned long  caller_ip,
+                unsigned       reason,
+                enum btree_id  have_btree_id,
+                unsigned       have_iter_type,
+                enum btree_id  want_btree_id,
+                unsigned       want_iter_type),
+       TP_ARGS(trans_ip, caller_ip, reason,
+               have_btree_id, have_iter_type,
+               want_btree_id, want_iter_type),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          trans_ip        )
+               __field(unsigned long,          caller_ip       )
+               __field(u8,                     reason          )
+               __field(u8,                     have_btree_id   )
+               __field(u8,                     have_iter_type  )
+               __field(u8,                     want_btree_id   )
+               __field(u8,                     want_iter_type  )
+       ),
+
+       TP_fast_assign(
+               __entry->trans_ip               = trans_ip;
+               __entry->caller_ip              = caller_ip;
+               __entry->reason                 = reason;
+               __entry->have_btree_id          = have_btree_id;
+               __entry->have_iter_type         = have_iter_type;
+               __entry->want_btree_id          = want_btree_id;
+               __entry->want_iter_type         = want_iter_type;
+       ),
+
+       TP_printk("%pF %pF because %u have %u:%u want %u:%u",
+                 (void *) __entry->trans_ip,
+                 (void *) __entry->caller_ip,
+                 __entry->reason,
+                 __entry->have_btree_id,
+                 __entry->have_iter_type,
+                 __entry->want_btree_id,
+                 __entry->want_iter_type)
 );
 
 TRACE_EVENT(trans_restart_iters_realloced,