From 548b3d209fa5c6aaa9db58a69d9f6cf4ce8978b6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 7 Feb 2020 13:38:02 -0500 Subject: [PATCH] bcachefs: btree_ptr_v2 Add a new btree ptr type which contains the sequence number (random 64 bit cookie, actually) for that btree node - this lets us verify that when we read in a btree node it really is the btree node we wanted. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 22 ++++++++-- fs/bcachefs/bkey.h | 1 + fs/bcachefs/btree_cache.h | 2 + fs/bcachefs/btree_io.c | 30 ++++++++++++-- fs/bcachefs/btree_update_interior.c | 63 ++++++++++++++++++++--------- fs/bcachefs/buckets.c | 2 + fs/bcachefs/buckets.h | 3 +- fs/bcachefs/extents.c | 3 ++ fs/bcachefs/extents.h | 15 +++++++ fs/bcachefs/recovery.c | 1 + fs/bcachefs/replicas.c | 1 + 11 files changed, 117 insertions(+), 26 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index dbc9c15514bd..575fb7143cc0 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -343,7 +343,8 @@ static inline void bkey_init(struct bkey *k) x(stripe, 14) \ x(reflink_p, 15) \ x(reflink_v, 16) \ - x(inline_data, 17) + x(inline_data, 17) \ + x(btree_ptr_v2, 18) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -599,6 +600,19 @@ struct bch_btree_ptr { struct bch_extent_ptr start[]; } __attribute__((packed, aligned(8))); +struct bch_btree_ptr_v2 { + struct bch_val v; + + __u64 mem_ptr; + __le64 seq; + __le16 sectors_written; + /* In case we ever decide to do variable size btree nodes: */ + __le16 sectors; + struct bpos min_key; + __u64 _data[0]; + struct bch_extent_ptr start[]; +} __attribute__((packed, aligned(8))); + struct bch_extent { struct bch_val v; @@ -630,7 +644,8 @@ struct bch_reservation { /* Btree pointers don't carry around checksums: */ #define BKEY_BTREE_PTR_VAL_U64s_MAX \ - ((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX) + ((sizeof(struct bch_btree_ptr_v2) + \ + sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64)) #define BKEY_BTREE_PTR_U64s_MAX \ (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) @@ -1299,7 +1314,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); x(new_siphash, 7) \ x(inline_data, 8) \ x(new_extent_overwrite, 9) \ - x(incompressible, 10) + x(incompressible, 10) \ + x(btree_ptr_v2, 11) enum bch_sb_feature { #define x(f, n) BCH_FEATURE_##f, diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 36e6ecc04514..aa729347e448 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -573,6 +573,7 @@ BKEY_VAL_ACCESSORS(stripe); BKEY_VAL_ACCESSORS(reflink_p); BKEY_VAL_ACCESSORS(reflink_v); BKEY_VAL_ACCESSORS(inline_data); +BKEY_VAL_ACCESSORS(btree_ptr_v2); /* byte order helpers */ diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 270f7f8fb140..6e7edcaf6675 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -40,6 +40,8 @@ static inline u64 btree_ptr_hash_val(const struct bkey_i *k) switch (k->k.type) { case KEY_TYPE_btree_ptr: return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start); + case KEY_TYPE_btree_ptr_v2: + return bkey_i_to_btree_ptr_v2_c(k)->v.seq; default: return 0; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 9df8d4f785bf..5fa31698ed67 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -734,6 +734,15 @@ static int validate_bset(struct bch_fs *c, struct btree *b, bch2_bpos_swab(&b->data->max_key); } + if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { + struct bch_btree_ptr_v2 *bp = + &bkey_i_to_btree_ptr_v2(&b->key)->v; + + btree_err_on(bkey_cmp(b->data->min_key, bp->min_key), + BTREE_ERR_MUST_RETRY, c, b, NULL, + "incorrect min_key"); + } + btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p), BTREE_ERR_MUST_RETRY, c, b, i, "incorrect max key"); @@ -897,6 +906,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry BTREE_ERR_MUST_RETRY, c, b, NULL, "bad btree header"); + if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { + struct bch_btree_ptr_v2 *bp = + &bkey_i_to_btree_ptr_v2(&b->key)->v; + + btree_err_on(b->data->keys.seq != bp->seq, + BTREE_ERR_MUST_RETRY, c, b, NULL, + "got wrong btree node"); + } + while (b->written < c->opts.btree_node_size) { unsigned sectors, whiteout_u64s = 0; struct nonce nonce; @@ -1004,15 +1022,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { struct bkey tmp; - struct bkey_s_c u = bkey_disassemble(b, k, &tmp); - const char *invalid = bch2_bkey_val_invalid(c, u); + struct bkey_s u = __bkey_disassemble(b, k, &tmp); + const char *invalid = bch2_bkey_val_invalid(c, u.s_c); if (invalid || (inject_invalid_keys(c) && !bversion_cmp(u.k->version, MAX_VERSION))) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, u); + bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey %s: %s", buf, invalid); @@ -1025,6 +1043,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry continue; } + if (u.k->type == KEY_TYPE_btree_ptr_v2) { + struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); + + bp.v->mem_ptr = 0; + } + k = bkey_next_skip_noops(k, vstruct_last(i)); } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index ff8cb37ed9a1..3d8b6218c983 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -332,7 +332,11 @@ retry: goto retry; } - bkey_btree_ptr_init(&tmp.k); + if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2)) + bkey_btree_ptr_v2_init(&tmp.k); + else + bkey_btree_ptr_init(&tmp.k); + bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size); bch2_open_bucket_get(c, wp, &ob); @@ -354,14 +358,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev { struct bch_fs *c = as->c; struct btree *b; + int ret; BUG_ON(level >= BTREE_MAX_DEPTH); BUG_ON(!as->reserve->nr); b = as->reserve->b[--as->reserve->nr]; - BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id)); - set_btree_node_accessed(b); set_btree_node_dirty(b); set_btree_node_need_write(b); @@ -372,7 +375,16 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b->data->flags = 0; SET_BTREE_NODE_ID(b->data, as->btree_id); SET_BTREE_NODE_LEVEL(b->data, level); - b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0]; + b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr; + + if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { + struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key); + + bp->v.mem_ptr = 0; + bp->v.seq = b->data->keys.seq; + bp->v.sectors_written = 0; + bp->v.sectors = cpu_to_le16(c->opts.btree_node_size); + } if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite)) SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true); @@ -385,10 +397,26 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev btree_node_will_make_reachable(as, b); + ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id); + BUG_ON(ret); + trace_btree_node_alloc(c, b); return b; } +static void btree_set_min(struct btree *b, struct bpos pos) +{ + if (b->key.k.type == KEY_TYPE_btree_ptr_v2) + bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos; + b->data->min_key = pos; +} + +static void btree_set_max(struct btree *b, struct bpos pos) +{ + b->key.k.p = pos; + b->data->max_key = pos; +} + struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as, struct btree *b, struct bkey_format format) @@ -397,11 +425,12 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as, n = bch2_btree_node_alloc(as, b->c.level); - n->data->min_key = b->data->min_key; - n->data->max_key = b->data->max_key; - n->data->format = format; SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1); + btree_set_min(n, b->data->min_key); + btree_set_max(n, b->data->max_key); + + n->data->format = format; btree_node_set_format(n, format); bch2_btree_sort_into(as->c, n, b); @@ -431,10 +460,9 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level) { struct btree *b = bch2_btree_node_alloc(as, level); - b->data->min_key = POS_MIN; - b->data->max_key = POS_MAX; + btree_set_min(b, POS_MIN); + btree_set_max(b, POS_MAX); b->data->format = bch2_btree_calc_format(b); - b->key.k.p = POS_MAX; btree_node_set_format(b, b->data->format); bch2_btree_build_aux_trees(b); @@ -1263,10 +1291,8 @@ static struct btree *__btree_split_node(struct btree_update *as, BUG_ON(!prev); - n1->key.k.p = bkey_unpack_pos(n1, prev); - n1->data->max_key = n1->key.k.p; - n2->data->min_key = - btree_type_successor(n1->c.btree_id, n1->key.k.p); + btree_set_max(n1, bkey_unpack_pos(n1, prev)); + btree_set_min(n2, btree_type_successor(n1->c.btree_id, n1->key.k.p)); set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k); set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s)); @@ -1749,10 +1775,9 @@ retry: n = bch2_btree_node_alloc(as, b->c.level); - n->data->min_key = prev->data->min_key; - n->data->max_key = next->data->max_key; + btree_set_min(n, prev->data->min_key); + btree_set_max(n, next->data->max_key); n->data->format = new_f; - n->key.k.p = next->key.k.p; btree_node_set_format(n, new_f); @@ -2202,8 +2227,8 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) bch2_btree_build_aux_trees(b); b->data->flags = 0; - b->data->min_key = POS_MIN; - b->data->max_key = POS_MAX; + btree_set_min(b, POS_MIN); + btree_set_max(b, POS_MAX); b->data->format = bch2_btree_calc_format(b); btree_node_set_format(b, b->data->format); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 60ad443bb509..9fae7d9fb495 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1194,6 +1194,7 @@ int bch2_mark_key_locked(struct bch_fs *c, ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags); break; case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: sectors = !(flags & BTREE_TRIGGER_OVERWRITE) ? c->opts.btree_node_size : -c->opts.btree_node_size; @@ -1729,6 +1730,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, switch (k.k->type) { case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: sectors = !(flags & BTREE_TRIGGER_OVERWRITE) ? c->opts.btree_node_size : -c->opts.btree_node_size; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2e49f2a8ccd9..4c84787575f5 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -97,7 +97,8 @@ static inline struct bucket *PTR_BUCKET(struct bch_dev *ca, static inline enum bch_data_type ptr_data_type(const struct bkey *k, const struct bch_extent_ptr *ptr) { - if (k->type == KEY_TYPE_btree_ptr) + if (k->type == KEY_TYPE_btree_ptr || + k->type == KEY_TYPE_btree_ptr_v2) return BCH_DATA_BTREE; return ptr->cached ? BCH_DATA_CACHED : BCH_DATA_USER; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 10ca544317ba..cff4955d203b 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -748,6 +748,7 @@ void bch2_bkey_append_ptr(struct bkey_i *k, switch (k->k.type) { case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: case KEY_TYPE_extent: EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX); @@ -1030,6 +1031,8 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k) if (k.k->type == KEY_TYPE_btree_ptr) size_ondisk = c->opts.btree_node_size; + if (k.k->type == KEY_TYPE_btree_ptr_v2) + size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors); bkey_extent_entry_for_each(ptrs, entry) { if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 6e8119a8ad30..70b7d70269dc 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -225,6 +225,13 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) bkey_val_end(r), }; } + case KEY_TYPE_btree_ptr_v2: { + struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k); + return (struct bkey_ptrs_c) { + to_entry(&e.v->start[0]), + to_entry(extent_entry_last(e)) + }; + } default: return (struct bkey_ptrs_c) { NULL, NULL }; } @@ -372,6 +379,13 @@ void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, .swab = bch2_ptr_swab, \ } +#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) { \ + .key_invalid = bch2_btree_ptr_invalid, \ + .key_debugcheck = bch2_btree_ptr_debugcheck, \ + .val_to_text = bch2_btree_ptr_to_text, \ + .swab = bch2_ptr_swab, \ +} + /* KEY_TYPE_extent: */ const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); @@ -416,6 +430,7 @@ static inline bool bkey_extent_is_direct_data(const struct bkey *k) { switch (k->type) { case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: case KEY_TYPE_extent: case KEY_TYPE_reflink_v: return true; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 29e6f9f00bad..c9d12f7c180e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1010,6 +1010,7 @@ int bch2_fs_recovery(struct bch_fs *c) c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash; c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_ptr_v2; write_sb = true; } diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 66787d0c5c63..f4851c8b8f88 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -112,6 +112,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e, switch (k.k->type) { case KEY_TYPE_btree_ptr: + case KEY_TYPE_btree_ptr_v2: e->data_type = BCH_DATA_BTREE; extent_to_replicas(k, e); break; -- 2.25.1