bcachefs: Unwritten extents support
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 13 Nov 2022 23:59:01 +0000 (18:59 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:51 +0000 (17:09 -0400)
 - bch2_extent_merge checks unwritten bit
 - read path returns 0s for unwritten extents without actually reading
 - reflink path skips over unwritten extents
 - bch2_bkey_ptrs_invalid() checks for extents with both written and
   unwritten extents, and non-normal extents (stripes, btree ptrs) with
   unwritten ptrs
 - fiemap checks for unwritten extents and returns
   FIEMAP_EXTENT_UNWRITTEN

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/fs-io.c
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/io.c
fs/bcachefs/reflink.c

index e0e2219fb1cc2d2d0f87641c90f1dfe9ab8bad4c..57327c4dc9b481f53e1a8a6db008ae16b942e35a 100644 (file)
@@ -582,7 +582,7 @@ struct bch_extent_ptr {
        __u64                   type:1,
                                cached:1,
                                unused:1,
-                               reservation:1,
+                               unwritten:1,
                                offset:44, /* 8 petabytes */
                                dev:8,
                                gen:8;
@@ -590,7 +590,7 @@ struct bch_extent_ptr {
        __u64                   gen:8,
                                dev:8,
                                offset:44,
-                               reservation:1,
+                               unwritten:1,
                                unused:1,
                                cached:1,
                                type:1;
index 3d124dc5bbefef29515e99c1161158cc5e479b2b..627edba24900bb4da8ca51bf54acac0593ebbf08 100644 (file)
@@ -116,6 +116,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
                return -EIO;
 
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               /*
+                * Unwritten extent: no need to actually read, treat it as a
+                * hole and return 0s:
+                */
+               if (p.ptr.unwritten)
+                       return 0;
+
                ca = bch_dev_bkey_exists(c, p.ptr.dev);
 
                /*
@@ -269,6 +276,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
                    rp.ptr.offset + rp.crc.offset ||
                    lp.ptr.dev                  != rp.ptr.dev ||
                    lp.ptr.gen                  != rp.ptr.gen ||
+                   lp.ptr.unwritten            != rp.ptr.unwritten ||
                    lp.has_ec                   != rp.has_ec)
                        return false;
 
@@ -904,6 +912,9 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
        const union bch_extent_entry *entry1, *entry2;
        struct extent_ptr_decoded p1, p2;
 
+       if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
+               return false;
+
        bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
                bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
                        if (p1.ptr.dev          == p2.ptr.dev &&
@@ -981,10 +992,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
                                u32 offset;
                                u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
 
-                               prt_printf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev,
-                                      b, offset, ptr->gen,
-                                      ptr->cached ? " cached" : "");
-
+                               prt_printf(out, "ptr: %u:%llu:%u gen %u",
+                                          ptr->dev, b, offset, ptr->gen);
+                               if (ptr->cached)
+                                       prt_str(out, " cached");
+                               if (ptr->unwritten)
+                                       prt_str(out, " unwritten");
                                if (ca && ptr_stale(ca, ptr))
                                        prt_printf(out, " stale");
                        }
@@ -1073,6 +1086,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
        unsigned size_ondisk = k.k->size;
        unsigned nonce = UINT_MAX;
        unsigned nr_ptrs = 0;
+       bool unwritten = false;
        int ret;
 
        if (bkey_is_btree_ptr(k.k))
@@ -1097,6 +1111,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
                                                 false, err);
                        if (ret)
                                return ret;
+
+                       if (nr_ptrs && unwritten != entry->ptr.unwritten) {
+                               prt_printf(err, "extent with unwritten and written ptrs");
+                               return -BCH_ERR_invalid_bkey;
+                       }
+
+                       if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
+                               prt_printf(err, "has unwritten ptrs");
+                               return -BCH_ERR_invalid_bkey;
+                       }
+
+                       unwritten = entry->ptr.unwritten;
                        nr_ptrs++;
                        break;
                case BCH_EXTENT_ENTRY_crc32:
index f640254004e7819983d97d437d6e6313f828c493..659ab76ea62cd21e5edfd2dc8cffcd72dcb2910c 100644 (file)
@@ -510,6 +510,23 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k)
        }
 }
 
+static inline bool bkey_extent_is_unwritten(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       bkey_for_each_ptr(ptrs, ptr)
+               if (ptr->unwritten)
+                       return true;
+       return false;
+}
+
+static inline bool bkey_extent_is_reservation(struct bkey_s_c k)
+{
+       return k.k->type == KEY_TYPE_reservation ||
+               bkey_extent_is_unwritten(k);
+}
+
 static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
 {
        struct bch_devs_list ret = (struct bch_devs_list) { 0 };
index 77037574cb0d5d57054d1751587d6738b5a87002..b5cf0a3218eae97946829c24ce9116b5f249d037 100644 (file)
@@ -341,11 +341,11 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
        return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
 }
 
-static unsigned bkey_to_sector_state(const struct bkey *k)
+static unsigned bkey_to_sector_state(struct bkey_s_c k)
 {
-       if (k->type == KEY_TYPE_reservation)
+       if (bkey_extent_is_reservation(k))
                return SECTOR_RESERVED;
-       if (bkey_extent_is_allocation(k))
+       if (bkey_extent_is_allocation(k.k))
                return SECTOR_ALLOCATED;
        return SECTOR_UNALLOCATED;
 }
@@ -396,7 +396,7 @@ retry:
                           SPOS(inum.inum, offset, snapshot),
                           BTREE_ITER_SLOTS, k, ret) {
                unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
-               unsigned state = bkey_to_sector_state(k.k);
+               unsigned state = bkey_to_sector_state(k);
 
                while (pg_idx < nr_pages) {
                        struct page *page = pages[pg_idx];
@@ -436,7 +436,7 @@ static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
        struct bio_vec bv;
        unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
                ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
-       unsigned state = bkey_to_sector_state(k.k);
+       unsigned state = bkey_to_sector_state(k);
 
        bio_for_each_segment(bv, bio, iter)
                __bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9,
@@ -3093,8 +3093,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
                        goto bkey_err;
 
                /* already reserved */
-               if (k.k->type == KEY_TYPE_reservation &&
-                   bkey_s_c_to_reservation(k).v->nr_replicas >= opts.data_replicas) {
+               if (bkey_extent_is_reservation(k) &&
+                   bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
                        bch2_btree_iter_advance(&iter);
                        continue;
                }
index cc41472a335e00874d3a920a9f86d3096cf7ecaa..15ab77ebb8c626e41012541e5e9152e1243caca5 100644 (file)
@@ -811,6 +811,9 @@ static int bch2_fill_extent(struct bch_fs *c,
                        int flags2 = 0;
                        u64 offset = p.ptr.offset;
 
+                       if (p.ptr.unwritten)
+                               flags2 |= FIEMAP_EXTENT_UNWRITTEN;
+
                        if (p.crc.compression_type)
                                flags2 |= FIEMAP_EXTENT_ENCODED;
                        else
index 24365b9260f6008a1a2ca44d8872700c5f010e69..5887d78190eb971176276ef31dea635ba05baa00 100644 (file)
@@ -1251,8 +1251,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        continue;
 
                if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-                               k.k->type != KEY_TYPE_reservation &&
-                               k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
+                               k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
+                               !bkey_extent_is_reservation(k), c,
                                "extent type past end of inode %llu:%u, i_size %llu\n  %s",
                                i->inode.bi_inum, i->snapshot, i->inode.bi_size,
                                (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
index c51381daf1c51ffd387ebc90143f5a6273fd2b99..1d0ec638f64568857cfa19e7c04522cdb45b01fe 100644 (file)
@@ -1481,6 +1481,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
        if (bch2_bkey_has_target(c, k, opts.promote_target))
                return false;
 
+       if (bkey_extent_is_unwritten(k))
+               return false;
+
        if (bch2_target_congested(c, opts.promote_target)) {
                /* XXX trace this */
                return false;
index aae924dc81f7c71049e282dc4b5e1f751a8b79fc..faf75bcf9ee78526bbc9e6a0e855cc462b0076d5 100644 (file)
@@ -251,9 +251,13 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
        struct bkey_s_c k;
        int ret;
 
-       for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret)
+       for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) {
+               if (bkey_extent_is_unwritten(k))
+                       continue;
+
                if (bkey_extent_is_data(k.k))
                        return k;
+       }
 
        if (bkey_ge(iter->pos, end))
                bch2_btree_iter_set_pos(iter, end);