From: Kent Overstreet Date: Wed, 13 Feb 2019 19:46:32 +0000 (-0500) Subject: bcachefs: Convert bucket invalidation to key marking path X-Git-Tag: io_uring-6.7-2023-11-10~119^2~2576 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=8fe826f90aad4ea314d0acdf7425a9bf2324e17f;p=linux-block.git bcachefs: Convert bucket invalidation to key marking path Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 5b9d6c77d037..04b75367fcde 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -129,6 +129,34 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p, *p += bytes; } +struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a) +{ + struct bkey_alloc_unpacked ret = { .gen = a->gen }; + const void *d = a->data; + unsigned idx = 0; + +#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++); + BCH_ALLOC_FIELDS() +#undef x + return ret; +} + +static void bch2_alloc_pack(struct bkey_i_alloc *dst, + const struct bkey_alloc_unpacked src) +{ + unsigned idx = 0; + void *d = dst->v.data; + + dst->v.fields = 0; + dst->v.gen = src.gen; + +#define x(_name, _bits) put_alloc_field(dst, &d, idx++, src._name); + BCH_ALLOC_FIELDS() +#undef x + + set_bkey_val_bytes(&dst->k, (void *) d - (void *) &dst->v); +} + static unsigned bch_alloc_val_u64s(const struct bch_alloc *a) { unsigned i, bytes = offsetof(struct bch_alloc, data); @@ -174,16 +202,24 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, static void __alloc_read_key(struct bucket *g, const struct bch_alloc *a) { const void *d = a->data; - unsigned idx = 0; + unsigned idx = 0, data_type, dirty_sectors, cached_sectors; + struct bucket_mark m; - g->_mark.gen = a->gen; - g->gen_valid = 1; g->io_time[READ] = get_alloc_field(a, &d, idx++); g->io_time[WRITE] = get_alloc_field(a, &d, idx++); - g->_mark.data_type = get_alloc_field(a, &d, idx++); - g->_mark.dirty_sectors = get_alloc_field(a, &d, idx++); - g->_mark.cached_sectors = get_alloc_field(a, &d, idx++); + data_type = get_alloc_field(a, &d, idx++); + dirty_sectors = get_alloc_field(a, &d, idx++); + cached_sectors = get_alloc_field(a, &d, idx++); g->oldest_gen = get_alloc_field(a, &d, idx++); + + bucket_cmpxchg(g, m, ({ + m.gen = a->gen; + m.data_type = data_type; + m.dirty_sectors = dirty_sectors; + m.cached_sectors = cached_sectors; + })); + + g->gen_valid = 1; } static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g, @@ -318,6 +354,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE| + BTREE_INSERT_NOMARK| flags, BTREE_INSERT_ENTRY(iter, &a->k_i)); if (ret) @@ -361,7 +398,8 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) ? 0 : bch2_btree_insert_at(c, NULL, NULL, BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_REPLAY, + BTREE_INSERT_JOURNAL_REPLAY| + BTREE_INSERT_NOMARK, BTREE_INSERT_ENTRY(&iter, k)); err: bch2_btree_iter_unlock(&iter); @@ -827,6 +865,142 @@ static inline long next_alloc_bucket(struct bch_dev *ca) return -1; } +/* + * returns sequence number of most recent journal entry that updated this + * bucket: + */ +static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m) +{ + if (m.journal_seq_valid) { + u64 journal_seq = atomic64_read(&c->journal.seq); + u64 bucket_seq = journal_seq; + + bucket_seq &= ~((u64) U16_MAX); + bucket_seq |= m.journal_seq; + + if (bucket_seq > journal_seq) + bucket_seq -= 1 << 16; + + return bucket_seq; + } else { + return 0; + } +} + +static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca, + struct btree_iter *iter, + u64 *journal_seq, unsigned flags) +{ +#if 0 + __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key; +#else + /* hack: */ + __BKEY_PADDED(k, 8) alloc_key; +#endif + struct bkey_i_alloc *a; + struct bkey_alloc_unpacked u; + struct bucket_mark m; + struct bkey_s_c k; + bool invalidating_cached_data; + size_t b; + int ret; + + BUG_ON(!ca->alloc_heap.used || + !ca->alloc_heap.data[0].nr); + b = ca->alloc_heap.data[0].bucket; + + /* first, put on free_inc and mark as owned by allocator: */ + percpu_down_read(&c->mark_lock); + spin_lock(&c->freelist_lock); + + verify_not_on_freelist(c, ca, b); + + BUG_ON(!fifo_push(&ca->free_inc, b)); + + bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0); + m = bucket(ca, b)->mark; + + spin_unlock(&c->freelist_lock); + percpu_up_read(&c->mark_lock); + + bch2_btree_iter_cond_resched(iter); + + BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); + + bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); +retry: + k = bch2_btree_iter_peek_slot(iter); + ret = btree_iter_err(k); + if (ret) + return ret; + + if (k.k && k.k->type == KEY_TYPE_alloc) + u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v); + else + memset(&u, 0, sizeof(u)); + + invalidating_cached_data = u.cached_sectors != 0; + + //BUG_ON(u.dirty_sectors); + u.data_type = 0; + u.dirty_sectors = 0; + u.cached_sectors = 0; + u.read_time = c->bucket_clock[READ].hand; + u.write_time = c->bucket_clock[WRITE].hand; + u.gen++; + + a = bkey_alloc_init(&alloc_key.k); + a->k.p = iter->pos; + bch2_alloc_pack(a, u); + + ret = bch2_btree_insert_at(c, NULL, + invalidating_cached_data ? journal_seq : NULL, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOCHECK_RW| + BTREE_INSERT_NOFAIL| + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_USE_ALLOC_RESERVE| + flags, + BTREE_INSERT_ENTRY(iter, &a->k_i)); + if (ret == -EINTR) + goto retry; + + if (!ret) { + /* remove from alloc_heap: */ + struct alloc_heap_entry e, *top = ca->alloc_heap.data; + + top->bucket++; + top->nr--; + + if (!top->nr) + heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL); + + /* + * Make sure we flush the last journal entry that updated this + * bucket (i.e. deleting the last reference) before writing to + * this bucket again: + */ + *journal_seq = max(*journal_seq, bucket_journal_seq(c, m)); + } else { + size_t b2; + + /* remove from free_inc: */ + percpu_down_read(&c->mark_lock); + spin_lock(&c->freelist_lock); + + bch2_mark_alloc_bucket(c, ca, b, false, + gc_pos_alloc(c, NULL), 0); + + BUG_ON(!fifo_pop_back(&ca->free_inc, b2)); + BUG_ON(b != b2); + + spin_unlock(&c->freelist_lock); + percpu_up_read(&c->mark_lock); + } + + return ret; +} + static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket, u64 *flush_seq) { @@ -847,18 +1021,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, percpu_up_read(&c->mark_lock); - if (m.journal_seq_valid) { - u64 journal_seq = atomic64_read(&c->journal.seq); - u64 bucket_seq = journal_seq; - - bucket_seq &= ~((u64) U16_MAX); - bucket_seq |= m.journal_seq; - - if (bucket_seq > journal_seq) - bucket_seq -= 1 << 16; - - *flush_seq = max(*flush_seq, bucket_seq); - } + *flush_seq = max(*flush_seq, bucket_journal_seq(c, m)); return m.cached_sectors != 0; } @@ -871,7 +1034,6 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) struct btree_iter iter; u64 journal_seq = 0; int ret = 0; - long b; bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -879,16 +1041,11 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) /* Only use nowait if we've already invalidated at least one bucket: */ while (!ret && !fifo_full(&ca->free_inc) && - (b = next_alloc_bucket(ca)) >= 0) { - bool must_flush = - bch2_invalidate_one_bucket(c, ca, b, &journal_seq); - - ret = __bch2_alloc_write_key(c, ca, b, &iter, - must_flush ? &journal_seq : NULL, + ca->alloc_heap.used) + ret = bch2_invalidate_one_bucket2(c, ca, &iter, &journal_seq, BTREE_INSERT_GC_LOCK_HELD| (!fifo_empty(&ca->free_inc) ? BTREE_INSERT_NOWAIT : 0)); - } bch2_btree_iter_unlock(&iter); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 04f1e9152494..ff6eccf904af 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -6,6 +6,15 @@ #include "alloc_types.h" #include "debug.h" +struct bkey_alloc_unpacked { + u8 gen; +#define x(_name, _bits) u##_bits _name; + BCH_ALLOC_FIELDS() +#undef x +}; + +struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *); + #define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9) const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c7971e5c7c36..9a3ca6fa30b7 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -826,12 +826,12 @@ struct bch_alloc { } __attribute__((packed, aligned(8))); #define BCH_ALLOC_FIELDS() \ - x(read_time, 2) \ - x(write_time, 2) \ - x(data_type, 1) \ - x(dirty_sectors, 2) \ - x(cached_sectors, 2) \ - x(oldest_gen, 1) + x(read_time, 16) \ + x(write_time, 16) \ + x(data_type, 8) \ + x(dirty_sectors, 16) \ + x(cached_sectors, 16) \ + x(oldest_gen, 8) enum { #define x(name, bytes) BCH_ALLOC_FIELD_##name, @@ -841,12 +841,12 @@ enum { }; static const unsigned BCH_ALLOC_FIELD_BYTES[] = { -#define x(name, bytes) [BCH_ALLOC_FIELD_##name] = bytes, +#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8, BCH_ALLOC_FIELDS() #undef x }; -#define x(name, bytes) + bytes +#define x(name, bits) + (bits / 8) static const unsigned BKEY_ALLOC_VAL_U64s_MAX = DIV_ROUND_UP(offsetof(struct bch_alloc, data) BCH_ALLOC_FIELDS(), sizeof(u64)); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 5f0e0009ec5d..7e58e82daec1 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -456,6 +456,7 @@ static inline bool btree_node_is_extents(struct btree *b) static inline bool btree_node_type_needs_gc(enum btree_node_type type) { switch (type) { + case BKEY_TYPE_ALLOC: case BKEY_TYPE_BTREE: case BKEY_TYPE_EXTENTS: case BKEY_TYPE_INODES: diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 9bcab29bd033..1fd01fb40482 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -82,6 +82,7 @@ enum { __BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_JOURNAL_REPLAY, + __BTREE_INSERT_NOMARK, __BTREE_INSERT_NOWAIT, __BTREE_INSERT_GC_LOCK_HELD, __BCH_HASH_SET_MUST_CREATE, @@ -108,12 +109,12 @@ enum { #define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE) #define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE) -/* - * Insert is for journal replay: don't get journal reservations, or mark extents - * (bch_mark_key) - */ +/* Insert is for journal replay - don't get journal reservations: */ #define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY) +/* Don't call bch2_mark_key: */ +#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK) + /* Don't block on allocation failure (for new btree nodes: */ #define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT) #define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 949541f15e7d..3286ee26f7e2 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -476,6 +476,60 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, ca, b, owned_by_allocator); } +static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k, + bool inserting, + struct bch_fs_usage *fs_usage, + unsigned journal_seq, unsigned flags, + bool gc) +{ + struct bkey_alloc_unpacked u; + struct bch_dev *ca; + struct bucket *g; + struct bucket_mark old, m; + + if (!inserting) + return 0; + + /* + * alloc btree is read in by bch2_alloc_read, not gc: + */ + if (flags & BCH_BUCKET_MARK_GC) + return 0; + + u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v); + ca = bch_dev_bkey_exists(c, k.k->p.inode); + g = __bucket(ca, k.k->p.offset, gc); + + /* + * this should currently only be getting called from the bucket + * invalidate path: + */ + BUG_ON(u.dirty_sectors); + BUG_ON(u.cached_sectors); + BUG_ON(!g->mark.owned_by_allocator); + + old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({ + m.gen = u.gen; + m.data_type = u.data_type; + m.dirty_sectors = u.dirty_sectors; + m.cached_sectors = u.cached_sectors; + })); + + g->io_time[READ] = u.read_time; + g->io_time[WRITE] = u.write_time; + g->oldest_gen = u.oldest_gen; + g->gen_valid = 1; + + if (old.cached_sectors) { + update_cached_sectors(c, fs_usage, ca->dev_idx, + -old.cached_sectors); + trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset), + old.cached_sectors); + } + + return 0; +} + #define checked_add(a, b) \ do { \ unsigned _res = (unsigned) (a) + (b); \ @@ -840,18 +894,21 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, fs_usage = this_cpu_ptr(c->usage[gc]); switch (k.k->type) { + case KEY_TYPE_alloc: + return bch2_mark_alloc(c, k, inserting, + fs_usage, journal_seq, flags, gc); case KEY_TYPE_btree_ptr: return bch2_mark_extent(c, k, inserting - ? c->opts.btree_node_size - : -c->opts.btree_node_size, - BCH_DATA_BTREE, - fs_usage, journal_seq, flags, gc); + ? c->opts.btree_node_size + : -c->opts.btree_node_size, + BCH_DATA_BTREE, + fs_usage, journal_seq, flags, gc); case KEY_TYPE_extent: return bch2_mark_extent(c, k, sectors, BCH_DATA_USER, - fs_usage, journal_seq, flags, gc); + fs_usage, journal_seq, flags, gc); case KEY_TYPE_stripe: return bch2_mark_stripe(c, k, inserting, - fs_usage, journal_seq, flags, gc); + fs_usage, journal_seq, flags, gc); case KEY_TYPE_inode: if (inserting) fs_usage->s.nr_inodes++; @@ -922,7 +979,7 @@ void bch2_mark_update(struct btree_insert *trans, preempt_disable(); fs_usage = bch2_fs_usage_get_scratch(c); - if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) + if (!(trans->flags & BTREE_INSERT_NOMARK)) bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - bkey_start_offset(&insert->k->k), diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index ffdf176d7ed2..973bf605cbd9 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -17,13 +17,14 @@ #define bucket_cmpxchg(g, new, expr) \ ({ \ + struct bucket *_g = g; \ u64 _v = atomic64_read(&(g)->_mark.v); \ struct bucket_mark _old; \ \ do { \ (new).v.counter = _old.v.counter = _v; \ expr; \ - } while ((_v = atomic64_cmpxchg(&(g)->_mark.v, \ + } while ((_v = atomic64_cmpxchg(&(_g)->_mark.v, \ _old.v.counter, \ (new).v.counter)) != _old.v.counter);\ _old; \ diff --git a/fs/bcachefs/fifo.h b/fs/bcachefs/fifo.h index 0cd5f1931aac..cdb272708a4b 100644 --- a/fs/bcachefs/fifo.h +++ b/fs/bcachefs/fifo.h @@ -101,7 +101,7 @@ do { \ ({ \ bool _r = !fifo_empty((fifo)); \ if (_r) \ - (i) = (fifo)->data[--(fifo)->back & (fifo)->mask] \ + (i) = (fifo)->data[--(fifo)->back & (fifo)->mask]; \ _r; \ }) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index bfa1045b0eb5..17eba4269719 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -854,7 +854,8 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) ret = bch2_btree_insert(c, entry->btree_id, k, &disk_res, NULL, BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_REPLAY); + BTREE_INSERT_JOURNAL_REPLAY| + BTREE_INSERT_NOMARK); } if (ret) {