bcachefs: improved btree locking tracepoints
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 14 May 2019 18:08:23 +0000 (14:08 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:22 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/trace.h

index e18a88cd972e03f148533bdb090de73692b5d700..4d107d890438fb845c77ec98bfd441a6d2b00c53 100644 (file)
@@ -15,13 +15,18 @@ static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *,
                                                    struct btree_iter_level *,
                                                    struct bkey *);
 
-#define BTREE_ITER_NOT_END     ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_GET_LOCKS   ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_DROP                ((struct btree *) 2)
+#define BTREE_ITER_NO_NODE_LOCK_ROOT   ((struct btree *) 3)
+#define BTREE_ITER_NO_NODE_UP          ((struct btree *) 4)
+#define BTREE_ITER_NO_NODE_DOWN                ((struct btree *) 5)
+#define BTREE_ITER_NO_NODE_INIT                ((struct btree *) 6)
+#define BTREE_ITER_NO_NODE_ERROR       ((struct btree *) 7)
 
 static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
 {
        return l < BTREE_MAX_DEPTH &&
-               iter->l[l].b &&
-               iter->l[l].b != BTREE_ITER_NOT_END;
+               (unsigned long) iter->l[l].b >= 128;
 }
 
 /* Returns < 0 if @k is before iter pos, > 0 if @k is after */
@@ -106,19 +111,20 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
        struct btree *b = btree_iter_node(iter, level);
        int want = __btree_lock_want(iter, level);
 
-       if (!b || b == BTREE_ITER_NOT_END)
+       if (!is_btree_node(iter, level))
                return false;
 
        if (race_fault())
                return false;
 
-       if (!six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) &&
-           !(iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1 &&
-             btree_node_lock_increment(iter, b, level, want)))
+       if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) ||
+           (btree_node_lock_seq_matches(iter, b, level) &&
+            btree_node_lock_increment(iter, b, level, want))) {
+               mark_btree_node_locked(iter, level, want);
+               return true;
+       } else {
                return false;
-
-       mark_btree_node_locked(iter, level, want);
-       return true;
+       }
 }
 
 static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
@@ -141,7 +147,7 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
            : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq))
                goto success;
 
-       if (iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1 &&
+       if (btree_node_lock_seq_matches(iter, b, level) &&
            btree_node_lock_increment(iter, b, level, BTREE_NODE_INTENT_LOCKED)) {
                btree_node_unlock(iter, level);
                goto success;
@@ -166,6 +172,23 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
                if (!(upgrade
                      ? bch2_btree_node_upgrade(iter, l)
                      : bch2_btree_node_relock(iter, l))) {
+                       if (upgrade)
+                               trace_node_upgrade_fail(l, iter->l[l].lock_seq,
+                                               is_btree_node(iter, l)
+                                               ? 0
+                                               : (unsigned long) iter->l[l].b,
+                                               is_btree_node(iter, l)
+                                               ? iter->l[l].b->c.lock.state.seq
+                                               : 0);
+                       else
+                               trace_node_relock_fail(l, iter->l[l].lock_seq,
+                                               is_btree_node(iter, l)
+                                               ? 0
+                                               : (unsigned long) iter->l[l].b,
+                                               is_btree_node(iter, l)
+                                               ? iter->l[l].b->c.lock.state.seq
+                                               : 0);
+
                        fail_idx = l;
                        btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
                }
@@ -180,7 +203,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
         */
        while (fail_idx >= 0) {
                btree_node_unlock(iter, fail_idx);
-               iter->l[fail_idx].b = BTREE_ITER_NOT_END;
+               iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
                --fail_idx;
        }
 
@@ -810,7 +833,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
        trans_for_each_iter(iter->trans, linked)
                if (linked->l[level].b == b) {
                        __btree_node_unlock(linked, level);
-                       linked->l[level].b = BTREE_ITER_NOT_END;
+                       linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
                }
 }
 
@@ -848,7 +871,8 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
                         * that depth
                         */
                        iter->level = depth_want;
-                       iter->l[iter->level].b = NULL;
+                       for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
+                               iter->l[i].b = NULL;
                        return 1;
                }
 
@@ -861,13 +885,14 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
                           b->c.level == iter->level &&
                           !race_fault())) {
                        for (i = 0; i < iter->level; i++)
-                               iter->l[i].b = BTREE_ITER_NOT_END;
+                               iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
                        iter->l[iter->level].b = b;
+                       for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
+                               iter->l[i].b = NULL;
 
                        mark_btree_node_locked(iter, iter->level, lock_type);
                        btree_iter_node_set(iter, b);
                        return 0;
-
                }
 
                six_unlock_type(&b->c.lock, lock_type);
@@ -973,7 +998,7 @@ retry_all:
        if (unlikely(ret == -EIO)) {
                trans->error = true;
                iter->flags |= BTREE_ITER_ERROR;
-               iter->l[iter->level].b = BTREE_ITER_NOT_END;
+               iter->l[iter->level].b = BTREE_ITER_NO_NODE_ERROR;
                goto out;
        }
 
@@ -1008,12 +1033,12 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
        unsigned l = iter->level;
 
        while (btree_iter_node(iter, l) &&
-              !(is_btree_node(iter, l) &&
-                bch2_btree_node_relock(iter, l) &&
-                (!check_pos ||
-                 btree_iter_pos_in_node(iter, iter->l[l].b)))) {
+              (!is_btree_node(iter, l) ||
+               !bch2_btree_node_relock(iter, l) ||
+                (check_pos &&
+                 !btree_iter_pos_in_node(iter, iter->l[l].b)))) {
                btree_node_unlock(iter, l);
-               iter->l[l].b = BTREE_ITER_NOT_END;
+               iter->l[l].b = BTREE_ITER_NO_NODE_UP;
                l++;
        }
 
@@ -1069,7 +1094,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
                                return 0;
 
                        iter->level = depth_want;
-                       iter->l[iter->level].b = BTREE_ITER_NOT_END;
+                       iter->l[iter->level].b = BTREE_ITER_NO_NODE_DOWN;
                        return ret;
                }
        }
@@ -1590,7 +1615,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
        iter->nodes_intent_locked       = 0;
        for (i = 0; i < ARRAY_SIZE(iter->l); i++)
                iter->l[i].b            = NULL;
-       iter->l[iter->level].b          = BTREE_ITER_NOT_END;
+       iter->l[iter->level].b          = BTREE_ITER_NO_NODE_INIT;
 
        prefetch(c->btree_roots[btree_id].b);
 }
@@ -1798,7 +1823,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
 
        for (i = 0; i < ARRAY_SIZE(iter->l); i++)
                iter->l[i].b            = NULL;
-       iter->l[iter->level].b          = BTREE_ITER_NOT_END;
+       iter->l[iter->level].b          = BTREE_ITER_NO_NODE_INIT;
 
        return iter;
 }
index ee54378136040bd57b393464c9e9da7979e1d3dd..4dff3bc70fbc7bfb1e58bb39522532918ff1233d 100644 (file)
@@ -76,14 +76,12 @@ static inline struct btree_iter *
 __trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
                            unsigned idx)
 {
-       EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
+       struct btree_iter *iter = __trans_next_iter(trans, idx);
 
-       for (; idx < trans->nr_iters; idx++)
-               if ((trans->iters_linked & (1ULL << idx)) &&
-                   __iter_has_node(&trans->iters[idx], b))
-                       return &trans->iters[idx];
+       while (iter && !__iter_has_node(iter, b))
+               iter = __trans_next_iter(trans, iter->idx + 1);
 
-       return NULL;
+       return iter;
 }
 
 #define trans_for_each_iter_with_node(_trans, _b, _iter)               \
index e3595a8dce6abe4410438ebf3930246e7ad737f6..9ec91c5c1bd8b173f4560d4d8bde77abe576a714 100644 (file)
@@ -1586,6 +1586,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
         * instead of locking/reserving all the way to the root:
         */
        if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+               trace_trans_restart_iter_upgrade(c, iter->trans->ip);
                ret = -EINTR;
                goto out;
        }
index 5366d2a6c268b02d8c35ad165b31c9ebc7cf3ade..d21f6f0351824899ff4015c7c87082a41efe2199 100644 (file)
@@ -567,6 +567,8 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                    update_triggers_transactional(trans, i)) {
                        ret = bch2_trans_mark_update(trans, i,
                                                &trans->fs_usage_deltas);
+                       if (ret == -EINTR)
+                               trace_trans_restart_mark(c, trans->ip);
                        if (ret)
                                return ret;
                }
@@ -714,7 +716,9 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                 * don't care if we got ENOSPC because we told split it
                 * couldn't block:
                 */
-               if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
+               if (!ret ||
+                   ret == -EINTR ||
+                   (flags & BTREE_INSERT_NOUNLOCK)) {
                        trans_restart(" (split)");
                        trace_trans_restart_btree_node_split(c, trans->ip);
                        ret = -EINTR;
@@ -806,6 +810,7 @@ static int __bch2_trans_commit(struct btree_trans *trans,
                if (!bch2_btree_iter_upgrade(i->iter, 1)) {
                        trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
                                      old_locks_want, old_uptodate);
+                       trace_trans_restart_upgrade(c, trans->ip);
                        ret = -EINTR;
                        goto err;
                }
index 22a378d5f64f6f0c42911667e8912206a904db76..86f58206365d53055cd87dd9aec5fa4309205140 100644 (file)
@@ -561,6 +561,21 @@ DEFINE_EVENT(transaction_restart,  trans_restart_btree_node_split,
        TP_ARGS(c, ip)
 );
 
+DEFINE_EVENT(transaction_restart,      trans_restart_mark,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_upgrade,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_iter_upgrade,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
 DEFINE_EVENT(transaction_restart,      trans_restart_traverse,
        TP_PROTO(struct bch_fs *c, unsigned long ip),
        TP_ARGS(c, ip)
@@ -571,6 +586,39 @@ DEFINE_EVENT(transaction_restart,  trans_restart_atomic,
        TP_ARGS(c, ip)
 );
 
+DECLARE_EVENT_CLASS(node_lock_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq),
+
+       TP_STRUCT__entry(
+               __field(u32,            level)
+               __field(u32,            iter_seq)
+               __field(u32,            node)
+               __field(u32,            node_seq)
+       ),
+
+       TP_fast_assign(
+               __entry->level          = level;
+               __entry->iter_seq       = iter_seq;
+               __entry->node           = node;
+               __entry->node_seq       = node_seq;
+       ),
+
+       TP_printk("level %u iter seq %u node %u node seq %u",
+                 __entry->level, __entry->iter_seq,
+                 __entry->node, __entry->node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_relock_fail,
+       TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+       TP_ARGS(level, iter_seq, node, node_seq)
+);
+
 #endif /* _TRACE_BCACHEFS_H */
 
 /* This part must be outside protection */