Patch series "bcache: Revert min_heap migration due to performance
regression".
This patch series reverts the migration of bcache from its original heap
implementation to the generic min_heap library. While the original change
aimed to simplify the code and improve maintainability, it introduced a
severe performance regression in real-world scenarios.
As reported by Robert, systems using bcache now suffer from periodic
latency spikes, with P100 (max) latency increasing from 600 ms to 2.4
seconds every 5 minutes. This degrades bcache's value as a low-latency
caching layer, and leads to frequent timeouts and application stalls in
production environments.
The primary cause of this regression is the behavior of the generic
min_heap implementation's bottom-up sift_down, which performs up to 2 *
log2(n) comparisons when many elements are equal. The original top-down
variant used by bcache only required O(1) comparisons in such cases. The
issue was further exacerbated by commit
92a8b224b833 ("lib/min_heap:
introduce non-inline versions of min heap API functions"), which
introduced non-inlined versions of the min_heap API, adding function call
overhead to a performance-critical hot path.
This patch (of 3):
This reverts commit
3d8a9a1c35227c3f1b0bd132c9f0a80dbda07b65.
Although removing the custom swap function simplified the code, this
change is part of a broader migration to the generic min_heap API that
introduced significant performance regressions in bcache.
As reported by Robert, bcache now suffers from latency spikes, with P100
(max) latency increasing from 600 ms to 2.4 seconds every 5 minutes.
These regressions degrade bcache's effectiveness as a low-latency cache
layer and lead to frequent timeouts and application stalls in production
environments.
This revert is part of a series of changes to restore previous performance
by undoing the min_heap transition.
Link: https://lkml.kernel.org/r/20250614202353.1632957-1-visitorckw@gmail.com
Link: https://lore.kernel.org/lkml/CAJhEC05+0S69z+3+FB2Cd0hD+pCRyWTKLEOsc8BOmH73p1m+KQ@mail.gmail.com
Link: https://lkml.kernel.org/r/20250614202353.1632957-2-visitorckw@gmail.com
Fixes:
866898efbb25 ("bcache: remove heap-related macros and switch to generic min_heap")
Fixes:
92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap API functions")
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reported-by: Robert Pang <robertpang@google.com>
Closes: https://lore.kernel.org/linux-bcache/CAJhEC06F_AtrPgw2-7CvCqZgeStgCtitbD-ryuPpXQA-JG5XXw@mail.gmail.com
Acked-by: Coly Li <colyli@kernel.org>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs);
}
+static inline void new_bucket_swap(void *l, void *r, void __always_unused *args)
+{
+ struct bucket **lhs = l, **rhs = r;
+
+ swap(*lhs, *rhs);
+}
+
static void invalidate_buckets_lru(struct cache *ca)
{
struct bucket *b;
const struct min_heap_callbacks bucket_max_cmp_callback = {
.less = new_bucket_max_cmp,
- .swp = NULL,
+ .swp = new_bucket_swap,
};
const struct min_heap_callbacks bucket_min_cmp_callback = {
.less = new_bucket_min_cmp,
- .swp = NULL,
+ .swp = new_bucket_swap,
};
ca->heap.nr = 0;
return bkey_cmp(_l->k, _r->k) <= 0;
}
+static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args)
+{
+ struct btree_iter_set *_iter1 = iter1;
+ struct btree_iter_set *_iter2 = iter2;
+
+ swap(*_iter1, *_iter2);
+}
+
static inline bool btree_iter_end(struct btree_iter *iter)
{
return !iter->heap.nr;
{
const struct min_heap_callbacks callbacks = {
.less = new_btree_iter_cmp,
- .swp = NULL,
+ .swp = new_btree_iter_swap,
};
if (k != end)
struct bkey *ret = NULL;
const struct min_heap_callbacks callbacks = {
.less = cmp,
- .swp = NULL,
+ .swp = new_btree_iter_swap,
};
if (!btree_iter_end(iter)) {
: bch_ptr_invalid;
const struct min_heap_callbacks callbacks = {
.less = b->ops->sort_cmp,
- .swp = NULL,
+ .swp = new_btree_iter_swap,
};
/* Heapify the iterator, using our comparison function */
return !(c ? c > 0 : _l->k < _r->k);
}
+static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args)
+{
+ struct btree_iter_set *_iter1 = iter1;
+ struct btree_iter_set *_iter2 = iter2;
+
+ swap(*_iter1, *_iter2);
+}
+
static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
struct bkey *tmp)
{
const struct min_heap_callbacks callbacks = {
.less = new_bch_extent_sort_cmp,
- .swp = NULL,
+ .swp = new_btree_iter_swap,
};
while (iter->heap.nr > 1) {
struct btree_iter_set *top = iter->heap.data, *i = top + 1;
return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r);
}
+static void new_bucket_swap(void *l, void *r, void __always_unused *args)
+{
+ struct bucket **_l = l;
+ struct bucket **_r = r;
+
+ swap(*_l, *_r);
+}
+
static unsigned int bucket_heap_top(struct cache *ca)
{
struct bucket *b;
unsigned long sectors_to_move, reserve_sectors;
const struct min_heap_callbacks callbacks = {
.less = new_bucket_cmp,
- .swp = NULL,
+ .swp = new_bucket_swap,
};
if (!c->copy_gc_enabled)