#include "bcachefs.h"
#include "bkey_methods.h"
#include "btree_gc.h"
+#include "btree_io.h"
#include "btree_iter.h"
#include "buckets.h"
#include "checksum.h"
return bch2_rand_range(l1 + l2) > l1;
}
- if (force_reconstruct_read(c))
+ if (bch2_force_reconstruct_read)
return p1.idx > p2.idx;
return p1.idx < p2.idx;
!bch2_dev_is_readable(ca))
p.idx++;
- if (force_reconstruct_read(c) &&
+ if (bch2_force_reconstruct_read &&
!p.idx && p.has_ec)
p.idx++;
const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
- if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
+ if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX)
return "value too big";
return bch2_bkey_ptrs_invalid(c, k);
}
-void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
+void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
{
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const struct bch_extent_ptr *ptr;
- const char *err;
- char buf[160];
- struct bucket_mark mark;
- struct bch_dev *ca;
-
- bch2_fs_bug_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
- !bch2_bkey_replicas_marked(c, k, false), c,
- "btree key bad (replicas not marked in superblock):\n%s",
- (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
-
- if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags))
- return;
+ bch2_bkey_ptrs_to_text(out, c, k);
+}
- bkey_for_each_ptr(ptrs, ptr) {
- ca = bch_dev_bkey_exists(c, ptr->dev);
+const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
- mark = ptr_bucket_mark(ca, ptr);
+ if (bkey_val_bytes(k.k) <= sizeof(*bp.v))
+ return "value too small";
- err = "stale";
- if (gen_after(mark.gen, ptr->gen))
- goto err;
+ if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
+ return "value too big";
- err = "inconsistent";
- if (mark.data_type != BCH_DATA_BTREE ||
- mark.dirty_sectors < c->opts.btree_node_size)
- goto err;
- }
+ if (c->sb.version < bcachefs_metadata_version_snapshot &&
+ bp.v->min_key.snapshot)
+ return "invalid min_key.snapshot";
- return;
-err:
- bch2_bkey_val_to_text(&PBUF(buf), c, k);
- bch2_fs_bug(c, "%s btree pointer %s: bucket %zi gen %i mark %08x",
- err, buf, PTR_BUCKET_NR(ca, ptr),
- mark.gen, (unsigned) mark.v.counter);
+ return bch2_bkey_ptrs_invalid(c, k);
}
-void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
+void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
+
+ pr_buf(out, "seq %llx written %u min_key ",
+ le64_to_cpu(bp.v->seq),
+ le16_to_cpu(bp.v->sectors_written));
+
+ bch2_bpos_to_text(out, bp.v->min_key);
+ pr_buf(out, " ");
bch2_bkey_ptrs_to_text(out, c, k);
}
+void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
+ unsigned big_endian, int write,
+ struct bkey_s k)
+{
+ struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(k);
+
+ compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
+
+ if (version < bcachefs_metadata_version_inode_btree_change &&
+ btree_node_type_is_extents(btree_id) &&
+ bkey_cmp(bp.v->min_key, POS_MIN))
+ bp.v->min_key = write
+ ? bpos_nosnap_predecessor(bp.v->min_key)
+ : bpos_nosnap_successor(bp.v->min_key);
+}
+
/* KEY_TYPE_extent: */
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
return bch2_bkey_ptrs_invalid(c, k);
}
-void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
-{
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- char buf[160];
-
- /*
- * XXX: we should be doing most/all of these checks at startup time,
- * where we check bch2_bkey_invalid() in btree_node_read_done()
- *
- * But note that we can't check for stale pointers or incorrect gc marks
- * until after journal replay is done (it might be an extent that's
- * going to get overwritten during replay)
- */
-
- if (percpu_down_read_trylock(&c->mark_lock)) {
- bch2_fs_bug_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
- !bch2_bkey_replicas_marked_locked(c, e.s_c, false), c,
- "extent key bad (replicas not marked in superblock):\n%s",
- (bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf));
- percpu_up_read(&c->mark_lock);
- }
- /*
- * If journal replay hasn't finished, we might be seeing keys
- * that will be overwritten by the time journal replay is done:
- */
- if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
- return;
-
- extent_for_each_ptr_decode(e, p, entry) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- struct bucket_mark mark = ptr_bucket_mark(ca, &p.ptr);
- unsigned stale = gen_after(mark.gen, p.ptr.gen);
- unsigned disk_sectors = ptr_disk_sectors(p);
- unsigned mark_sectors = p.ptr.cached
- ? mark.cached_sectors
- : mark.dirty_sectors;
-
- bch2_fs_bug_on(stale && !p.ptr.cached, c,
- "stale dirty pointer (ptr gen %u bucket %u",
- p.ptr.gen, mark.gen);
-
- bch2_fs_bug_on(stale > 96, c, "key too stale: %i", stale);
-
- bch2_fs_bug_on(!stale &&
- (mark.data_type != BCH_DATA_USER ||
- mark_sectors < disk_sectors), c,
- "extent pointer not marked: %s:\n"
- "type %u sectors %u < %u",
- (bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf),
- mark.data_type,
- mark_sectors, disk_sectors);
- }
-}
-
void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
if (!bch2_checksum_mergeable(crc_l.csum_type))
return BCH_MERGE_NOMERGE;
- if (crc_l.compression_type)
+ if (crc_is_compressed(crc_l))
return BCH_MERGE_NOMERGE;
if (crc_l.csum_type &&
crc_r.uncompressed_size > c->sb.encoded_extent_max)
return BCH_MERGE_NOMERGE;
- if (crc_l.uncompressed_size + crc_r.uncompressed_size - 1 >
+ if (crc_l.uncompressed_size + crc_r.uncompressed_size >
bch2_crc_field_size_max[extent_entry_type(en_l)])
return BCH_MERGE_NOMERGE;
static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
struct bch_extent_crc_unpacked n)
{
- return !u.compression_type &&
+ return !crc_is_compressed(u) &&
u.csum_type &&
u.uncompressed_size > u.live_size &&
bch2_csum_type_is_encryption(u.csum_type) ==
/* Find a checksum entry that covers only live data: */
if (!n.csum_type) {
bkey_for_each_crc(&k->k, ptrs, u, i)
- if (!u.compression_type &&
+ if (!crc_is_compressed(u) &&
u.csum_type &&
u.live_size == u.uncompressed_size) {
n = u;
return false;
}
found:
- BUG_ON(n.compression_type);
+ BUG_ON(crc_is_compressed(n));
BUG_ON(n.offset);
BUG_ON(n.live_size != k->k.size);
enum bch_extent_entry_type type;
if (bch_crc_bytes[new.csum_type] <= 4 &&
- new.uncompressed_size - 1 <= CRC32_SIZE_MAX &&
+ new.uncompressed_size <= CRC32_SIZE_MAX &&
new.nonce <= CRC32_NONCE_MAX)
type = BCH_EXTENT_ENTRY_crc32;
else if (bch_crc_bytes[new.csum_type] <= 10 &&
- new.uncompressed_size - 1 <= CRC64_SIZE_MAX &&
+ new.uncompressed_size <= CRC64_SIZE_MAX &&
new.nonce <= CRC64_NONCE_MAX)
type = BCH_EXTENT_ENTRY_crc64;
else if (bch_crc_bytes[new.csum_type] <= 16 &&
- new.uncompressed_size - 1 <= CRC128_SIZE_MAX &&
+ new.uncompressed_size <= CRC128_SIZE_MAX &&
new.nonce <= CRC128_NONCE_MAX)
type = BCH_EXTENT_ENTRY_crc128;
else
struct extent_ptr_decoded p;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- ret += !p.ptr.cached &&
- p.crc.compression_type == BCH_COMPRESSION_TYPE_none;
+ ret += !p.ptr.cached && !crc_is_compressed(p.crc);
}
return ret;
unsigned ret = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached &&
- p.crc.compression_type != BCH_COMPRESSION_TYPE_none)
+ if (!p.ptr.cached && crc_is_compressed(p.crc))
ret += p.crc.compressed_size;
return ret;
}
+bool bch2_bkey_is_incompressible(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct bch_extent_crc_unpacked crc;
+
+ bkey_for_each_crc(k.k, ptrs, crc, entry)
+ if (crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
+ return true;
+ return false;
+}
+
bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
- unsigned nr_replicas)
+ unsigned nr_replicas, bool compressed)
{
struct btree_trans trans;
struct btree_iter *iter;
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos,
+ for_each_btree_key(&trans, iter, BTREE_ID_extents, pos,
BTREE_ITER_SLOTS, k, err) {
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break;
- if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) {
+ if (nr_replicas > bch2_bkey_replicas(c, k) ||
+ (!compressed && bch2_bkey_sectors_compressed(k))) {
ret = false;
break;
}
}
+ bch2_trans_iter_put(&trans, iter);
+
bch2_trans_exit(&trans);
return ret;
}
+unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p = { 0 };
+ unsigned replicas = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.ptr.cached)
+ continue;
+
+ if (p.has_ec)
+ replicas += p.ec.redundancy;
+
+ replicas++;
+
+ }
+
+ return replicas;
+}
+
static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
struct extent_ptr_decoded p)
{
ca = bch_dev_bkey_exists(c, p.ptr.dev);
- if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
+ if (ca->mi.state != BCH_MEMBER_STATE_failed)
durability = max_t(unsigned, durability, ca->mi.durability);
- if (p.has_ec) {
- struct stripe *s =
- genradix_ptr(&c->stripes[0], p.ec.idx);
-
- if (WARN_ON(!s))
- goto out;
+ if (p.has_ec)
+ durability += p.ec.redundancy;
- durability = max_t(unsigned, durability, s->nr_redundant);
- }
-out:
return durability;
}
}
}
+void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry)
+{
+ union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
+ union bch_extent_entry *next = extent_entry_next(entry);
+
+ memmove_u64s(entry, next, (u64 *) end - (u64 *) next);
+ k->k.u64s -= extent_entry_u64s(entry);
+}
+
void bch2_bkey_append_ptr(struct bkey_i *k,
struct bch_extent_ptr ptr)
{
switch (k->k.type) {
case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
case KEY_TYPE_extent:
EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
/* will only happen if all pointers were cached: */
if (!bch2_bkey_nr_ptrs(k.s_c))
- k.k->type = KEY_TYPE_discard;
+ k.k->type = KEY_TYPE_deleted;
- return bkey_whiteout(k.k);
+ return bkey_deleted(k.k);
}
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ struct bch_devs_list devs;
const union bch_extent_entry *entry;
struct bch_extent_crc_unpacked crc;
unsigned size_ondisk = k.k->size;
const char *reason;
unsigned nonce = UINT_MAX;
+ unsigned i;
- if (k.k->type == KEY_TYPE_btree_ptr)
+ if (k.k->type == KEY_TYPE_btree_ptr ||
+ k.k->type == KEY_TYPE_btree_ptr_v2)
size_ondisk = c->opts.btree_node_size;
bkey_extent_entry_for_each(ptrs, entry) {
}
}
+ devs = bch2_bkey_devs(k);
+ bubble_sort(devs.devs, devs.nr, u8_cmp);
+ for (i = 0; i + 1 < devs.nr; i++)
+ if (devs.devs[i] == devs.devs[i + 1])
+ return "multiple ptrs to same device";
+
return NULL;
}
-void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
+void bch2_ptr_swab(struct bkey_s k)
{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
- u64 *d = (u64 *) bkeyp_val(f, k);
- unsigned i;
+ u64 *d;
- for (i = 0; i < bkeyp_val_u64s(f, k); i++)
- d[i] = swab64(d[i]);
+ for (d = (u64 *) ptrs.start;
+ d != (u64 *) ptrs.end;
+ d++)
+ *d = swab64(*d);
- for (entry = (union bch_extent_entry *) d;
- entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
+ for (entry = ptrs.start;
+ entry < ptrs.end;
entry = extent_entry_next(entry)) {
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
le64_add_cpu(&p.v->idx, sub);
break;
}
- case KEY_TYPE_inline_data: {
- struct bkey_s_inline_data d = bkey_s_to_inline_data(k);
+ case KEY_TYPE_inline_data:
+ case KEY_TYPE_indirect_inline_data: {
+ void *p = bkey_inline_data_p(k);
+ unsigned bytes = bkey_inline_data_bytes(k.k);
- sub = min_t(u64, sub << 9, bkey_val_bytes(d.k));
+ sub = min_t(u64, sub << 9, bytes);
- memmove(d.v->data,
- d.v->data + sub,
- bkey_val_bytes(d.k) - sub);
+ memmove(p, p + sub, bytes - sub);
new_val_u64s -= sub >> 3;
break;
len = where.offset - bkey_start_offset(k.k);
- k.k->p = where;
+ k.k->p.offset = where.offset;
k.k->size = len;
if (!len) {
switch (k.k->type) {
case KEY_TYPE_inline_data:
- new_val_u64s = min(new_val_u64s, k.k->size << 6);
+ case KEY_TYPE_indirect_inline_data:
+ new_val_u64s = (bkey_inline_data_offset(k.k) +
+ min(bkey_inline_data_bytes(k.k), k.k->size << 9)) >> 3;
break;
}