bcachefs: trace bch2_trans_kmalloc()
authorKent Overstreet <kent.overstreet@linux.dev>
Tue, 1 Apr 2025 18:29:31 +0000 (14:29 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 22 May 2025 00:13:27 +0000 (20:13 -0400)
We're occasionally seeing the WARN_ON() for bump allocator usage
exceeding BTREE_TRANS_MEM_MAX; add some tracing so we can see what's
going on.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/Kconfig
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update.h
fs/bcachefs/debug.c

index 07709b0d7688fc1a4b308b55ac141d68fe8346a4..a14e4a60b18738f6d0d334fd638089e3629f4357 100644 (file)
@@ -103,6 +103,10 @@ config BCACHEFS_PATH_TRACEPOINTS
        Enable extra tracepoints for debugging btree_path operations; we don't
        normally want these enabled because they happen at very high rates.
 
+config BCACHEFS_TRANS_KMALLOC_TRACE
+       bool "Trace bch2_trans_kmalloc() calls"
+       depends on BCACHEFS_FS
+
 config MEAN_AND_VARIANCE_UNIT_TEST
        tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
        depends on KUNIT
index 75f7408da1736bbe8262cf40428fe3aa1744e033..24eed2b3be4d55bd8d582634d2f34aed6ff7002e 100644 (file)
@@ -650,6 +650,9 @@ struct btree_transaction_stats {
        unsigned                nr_max_paths;
        unsigned                journal_entries_size;
        unsigned                max_mem;
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+       darray_trans_kmalloc_trace trans_kmalloc_trace;
+#endif
        char                    *max_paths_text;
 };
 
index ac5f2046550da3244320376b0d36cf4ac73a2006..cfd6363dfc39adb8bd624acb5fe0a811444634c0 100644 (file)
@@ -3095,7 +3095,19 @@ void bch2_trans_copy_iter(struct btree_trans *trans,
        dst->key_cache_path = 0;
 }
 
-void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+void bch2_trans_kmalloc_trace_to_text(struct printbuf *out,
+                                     darray_trans_kmalloc_trace *trace)
+{
+       printbuf_tabstops_reset(out);
+       printbuf_tabstop_push(out, 60);
+
+       darray_for_each(*trace, i)
+               prt_printf(out, "%pS\t%zu\n", (void *) i->ip, i->bytes);
+}
+#endif
+
+void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long ip)
 {
        struct bch_fs *c = trans->c;
        unsigned new_top = trans->mem_top + size;
@@ -3105,14 +3117,35 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        void *new_mem;
        void *p;
 
-       WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+       if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+               struct printbuf buf = PRINTBUF;
+               bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
+               bch2_print_string_as_lines(KERN_ERR, buf.buf);
+               printbuf_exit(&buf);
+#endif
+       }
 
        ret = trans_maybe_inject_restart(trans, _RET_IP_);
        if (ret)
                return ERR_PTR(ret);
 
        struct btree_transaction_stats *s = btree_trans_stats(trans);
-       s->max_mem = max(s->max_mem, new_bytes);
+       if (new_bytes > s->max_mem) {
+               mutex_lock(&s->lock);
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+               darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
+               s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
+                                               trans->trans_kmalloc_trace.nr);
+
+               memcpy(s->trans_kmalloc_trace.data,
+                      trans->trans_kmalloc_trace.data,
+                      sizeof(s->trans_kmalloc_trace.data[0]) *
+                      s->trans_kmalloc_trace.nr);
+#endif
+               s->max_mem = new_bytes;
+               mutex_unlock(&s->lock);
+       }
 
        if (trans->used_mempool) {
                if (trans->mem_bytes >= new_bytes)
@@ -3172,6 +3205,8 @@ out_new_mem:
                                        BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
        }
 out_change_top:
+       bch2_trans_kmalloc_trace(trans, size, ip);
+
        p = trans->mem + trans->mem_top;
        trans->mem_top += size;
        memset(p, 0, size);
@@ -3285,6 +3320,10 @@ u32 bch2_trans_begin(struct btree_trans *trans)
        }
 #endif
 
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+       trans->trans_kmalloc_trace.nr = 0;
+#endif
+
        trans_set_locked(trans, false);
 
        if (trans->restarted) {
@@ -3454,6 +3493,9 @@ void bch2_trans_put(struct btree_trans *trans)
 #ifdef CONFIG_BCACHEFS_DEBUG
        darray_exit(&trans->last_restarted_trace);
 #endif
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+       darray_exit(&trans->trans_kmalloc_trace);
+#endif
 
        unsigned long *paths_allocated = trans->paths_allocated;
        trans->paths_allocated  = NULL;
@@ -3608,6 +3650,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
        for (s = c->btree_transaction_stats;
             s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
             s++) {
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+               darray_exit(&s->trans_kmalloc_trace);
+#endif
                kfree(s->max_paths_text);
                bch2_time_stats_exit(&s->lock_hold_times);
        }
index 9d2cccf5d21a7a9509bbe2584eb159a6db226622..78a805a89860b064273fca28412d0adf6b4222ab 100644 (file)
@@ -543,43 +543,73 @@ void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btre
 
 void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *);
 
-void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+void bch2_trans_kmalloc_trace_to_text(struct printbuf *,
+                                     darray_trans_kmalloc_trace *);
+#endif
 
-/**
- * bch2_trans_kmalloc - allocate memory for use by the current transaction
- *
- * Must be called after bch2_trans_begin, which on second and further calls
- * frees all memory allocated in this transaction
- */
-static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long);
+
+static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size,
+                                           unsigned long ip)
+{
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+       darray_push(&trans->trans_kmalloc_trace,
+                   ((struct trans_kmalloc_trace) { .ip = ip, .bytes = size }));
+#endif
+}
+
+static __always_inline void *bch2_trans_kmalloc_nomemzero_ip(struct btree_trans *trans, size_t size,
+                                                   unsigned long ip)
 {
        size = roundup(size, 8);
 
+       bch2_trans_kmalloc_trace(trans, size, ip);
+
        if (likely(trans->mem_top + size <= trans->mem_bytes)) {
                void *p = trans->mem + trans->mem_top;
 
                trans->mem_top += size;
-               memset(p, 0, size);
                return p;
        } else {
-               return __bch2_trans_kmalloc(trans, size);
+               return __bch2_trans_kmalloc(trans, size, ip);
        }
 }
 
-static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
+static __always_inline void *bch2_trans_kmalloc_ip(struct btree_trans *trans, size_t size,
+                                         unsigned long ip)
 {
-       size = round_up(size, 8);
+       size = roundup(size, 8);
+
+       bch2_trans_kmalloc_trace(trans, size, ip);
 
        if (likely(trans->mem_top + size <= trans->mem_bytes)) {
                void *p = trans->mem + trans->mem_top;
 
                trans->mem_top += size;
+               memset(p, 0, size);
                return p;
        } else {
-               return __bch2_trans_kmalloc(trans, size);
+               return __bch2_trans_kmalloc(trans, size, ip);
        }
 }
 
+/**
+ * bch2_trans_kmalloc - allocate memory for use by the current transaction
+ *
+ * Must be called after bch2_trans_begin, which on second and further calls
+ * frees all memory allocated in this transaction
+ */
+static __always_inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
+{
+       return bch2_trans_kmalloc_ip(trans, size, _THIS_IP_);
+}
+
+static __always_inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
+{
+       return bch2_trans_kmalloc_nomemzero_ip(trans, size, _THIS_IP_);
+}
+
 static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
                                struct btree_iter *iter,
                                unsigned btree_id, struct bpos pos,
index 023c472dc9ee4ba1097e31ec4c0006de8e4c76b4..81175c1344d27fc587816be12773b51787253ad5 100644 (file)
@@ -477,6 +477,12 @@ struct btree_trans_paths {
        struct btree_path       paths[];
 };
 
+struct trans_kmalloc_trace {
+       unsigned long           ip;
+       size_t                  bytes;
+};
+typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace;
+
 struct btree_trans {
        struct bch_fs           *c;
 
@@ -488,6 +494,9 @@ struct btree_trans {
        void                    *mem;
        unsigned                mem_top;
        unsigned                mem_bytes;
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+       darray_trans_kmalloc_trace trans_kmalloc_trace;
+#endif
 
        btree_path_idx_t        nr_sorted;
        btree_path_idx_t        nr_paths;
index 568e56c911907c027f9732b684839539724b2bf6..e674419c299e8bd0c10106f67ce065a8963e5f8f 100644 (file)
@@ -222,7 +222,7 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans)
        trans->extra_disk_res           = 0;
 }
 
-static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
+static __always_inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
                                                  unsigned type, unsigned min_bytes)
 {
        unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
@@ -245,7 +245,7 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t
        return mut;
 }
 
-static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
+static __always_inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
 {
        return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
 }
index 5a8bc7013512fdf44789b8856f1263aaff842421..2c52a2c6502b18639811606a8709a2c3c08fc988 100644 (file)
@@ -770,6 +770,12 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
                mutex_lock(&s->lock);
 
                prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+               printbuf_indent_add(&i->buf, 2);
+               bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
+               printbuf_indent_sub(&i->buf, 2);
+#endif
+
                prt_printf(&i->buf, "Transaction duration:\n");
 
                printbuf_indent_add(&i->buf, 2);