From 6756e385a5bdf2e048ce2894208af9497062dcb9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 7 Feb 2025 21:31:03 -0500 Subject: [PATCH] bcachefs: bcachefs_metadata_version_stripe_lru Add a persistent LRU for stripes, ordered by "number of empty blocks", i.e. order in which we wish to reuse them. This will replace the in-memory stripes heap, so we can kill off reading stripes into memory at startup. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 +- fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/ec.c | 51 ++++++++++++++++++++++++++++++++++ fs/bcachefs/ec.h | 27 ++++++++++++++++++ fs/bcachefs/lru.c | 7 +++++ fs/bcachefs/lru.h | 9 ++++-- fs/bcachefs/lru_format.h | 4 ++- 7 files changed, 99 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c5c8497a6339..ecad4a78c3f7 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1757,7 +1757,8 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?: + bch2_check_stripe_to_lru_refs(c); bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index bf3723a2bca4..b4ac311f21a1 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -688,7 +688,8 @@ struct bch_sb_field_ext { x(autofix_errors, BCH_VERSION(1, 19)) \ x(directory_size, BCH_VERSION(1, 20)) \ x(cached_backpointers, BCH_VERSION(1, 21)) \ - x(stripe_backpointers, BCH_VERSION(1, 22)) + x(stripe_backpointers, BCH_VERSION(1, 22)) \ + x(stripe_lru, BCH_VERSION(1, 23)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 36590c0ce09f..1090cdb7d5cc 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -20,6 +20,7 @@ #include "io_read.h" #include "io_write.h" #include "keylist.h" +#include "lru.h" #include "recovery.h" #include "replicas.h" #include "super-io.h" @@ -411,6 +412,15 @@ int bch2_trigger_stripe(struct btree_trans *trans, (new_s->nr_blocks != old_s->nr_blocks || new_s->nr_redundant != old_s->nr_redundant)); + if (flags & BTREE_TRIGGER_transactional) { + int ret = bch2_lru_change(trans, + BCH_LRU_STRIPE_FRAGMENTATION, + idx, + stripe_lru_pos(old_s), + stripe_lru_pos(new_s)); + if (ret) + return ret; + } if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { /* @@ -1175,6 +1185,10 @@ err: return ret; } +/* + * XXX + * can we kill this and delete stripes from the trigger? + */ static void ec_stripe_delete_work(struct work_struct *work) { struct bch_fs *c = @@ -2519,3 +2533,40 @@ int bch2_fs_ec_init(struct bch_fs *c) return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio), BIOSET_NEED_BVECS); } + +static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans, + struct bkey_s_c k, + struct bkey_buf *last_flushed) +{ + if (k.k->type != KEY_TYPE_stripe) + return 0; + + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + + u64 lru_idx = stripe_lru_pos(s.v); + if (lru_idx) { + int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION, + k.k->p.offset, lru_idx, k, last_flushed); + if (ret) + return ret; + } + return 0; +} + +int bch2_check_stripe_to_lru_refs(struct bch_fs *c) +{ + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + int ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, + POS_MIN, BTREE_ITER_prefetch, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + bch2_check_stripe_to_lru_ref(trans, k, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); + bch_err_fn(c, ret); + return ret; +} diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 4c9511887655..cd1c837e4933 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -92,6 +92,31 @@ static inline void stripe_csum_set(struct bch_stripe *s, memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]); } +#define STRIPE_LRU_POS_EMPTY 1 + +static inline u64 stripe_lru_pos(const struct bch_stripe *s) +{ + if (!s) + return 0; + + unsigned blocks_empty = 0, blocks_nonempty = 0; + + for (unsigned i = 0; i < s->nr_blocks; i++) { + blocks_empty += !stripe_blockcount_get(s, i); + blocks_nonempty += !!stripe_blockcount_get(s, i); + } + + /* Will be picked up by the stripe_delete worker */ + if (!blocks_nonempty) + return STRIPE_LRU_POS_EMPTY; + + if (!blocks_empty) + return 0; + + /* invert: more blocks empty = reuse first */ + return LRU_TIME_MAX - blocks_empty; +} + static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr, const struct bch_extent_ptr *data_ptr, unsigned sectors) @@ -282,4 +307,6 @@ void bch2_fs_ec_exit(struct bch_fs *); void bch2_fs_ec_init_early(struct bch_fs *); int bch2_fs_ec_init(struct bch_fs *); +int bch2_check_stripe_to_lru_refs(struct bch_fs *); + #endif /* _BCACHEFS_EC_H */ diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 98ab8496f29d..a299d9ec8ee4 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -6,6 +6,7 @@ #include "btree_iter.h" #include "btree_update.h" #include "btree_write_buffer.h" +#include "ec.h" #include "error.h" #include "lru.h" #include "recovery.h" @@ -124,6 +125,8 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k) case BCH_LRU_read: case BCH_LRU_fragmentation: return BBPOS(BTREE_ID_alloc, u64_to_bucket(lru_k.k->p.offset)); + case BCH_LRU_stripes: + return BBPOS(BTREE_ID_stripes, POS(0, lru_k.k->p.offset)); default: BUG(); } @@ -151,6 +154,10 @@ static u64 bkey_lru_type_idx(struct bch_fs *c, rcu_read_unlock(); return idx; } + case BCH_LRU_stripes: + return k.k->type == KEY_TYPE_stripe + ? stripe_lru_pos(bkey_s_c_to_stripe(k).v) + : 0; default: BUG(); } diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index dea1d75cc9c1..8abd0aa2083a 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -28,9 +28,14 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) { u16 lru_id = l.k->p.inode >> 48; - if (lru_id == BCH_LRU_BUCKET_FRAGMENTATION) + switch (lru_id) { + case BCH_LRU_BUCKET_FRAGMENTATION: return BCH_LRU_fragmentation; - return BCH_LRU_read; + case BCH_LRU_STRIPE_FRAGMENTATION: + return BCH_LRU_stripes; + default: + return BCH_LRU_read; + } } int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context); diff --git a/fs/bcachefs/lru_format.h b/fs/bcachefs/lru_format.h index 353a352d3fb9..b7392ad8e41f 100644 --- a/fs/bcachefs/lru_format.h +++ b/fs/bcachefs/lru_format.h @@ -9,7 +9,8 @@ struct bch_lru { #define BCH_LRU_TYPES() \ x(read) \ - x(fragmentation) + x(fragmentation) \ + x(stripes) enum bch_lru_type { #define x(n) BCH_LRU_##n, @@ -18,6 +19,7 @@ enum bch_lru_type { }; #define BCH_LRU_BUCKET_FRAGMENTATION ((1U << 16) - 1) +#define BCH_LRU_STRIPE_FRAGMENTATION ((1U << 16) - 2) #define LRU_TIME_BITS 48 #define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) -- 2.25.1