bcachefs: Fix read retry path for indirect extents
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 15 Mar 2021 01:30:08 +0000 (21:30 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:56 +0000 (17:08 -0400)
In the read path, for retry of indirect extents to work we need to
differentiate between the location in the btree the read was for, vs.
the location where we found the data. This patch adds that plumbing to
bch_read_bio.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fs-io.c
fs/bcachefs/fs.c
fs/bcachefs/io.c
fs/bcachefs/io.h
fs/bcachefs/io_types.h
fs/bcachefs/move.c

index 4ccc9318a924498761b5700f3effa10166096d01..8584b90a3df924bd35c647a3761c9592632b39e0 100644 (file)
@@ -788,6 +788,7 @@ retry:
        while (1) {
                struct bkey_s_c k;
                unsigned bytes, sectors, offset_into_extent;
+               enum btree_id data_btree = BTREE_ID_extents;
 
                bch2_btree_iter_set_pos(iter,
                                POS(inum, rbio->bio.bi_iter.bi_sector));
@@ -803,7 +804,7 @@ retry:
 
                bch2_bkey_buf_reassemble(&sk, c, k);
 
-               ret = bch2_read_indirect_extent(trans,
+               ret = bch2_read_indirect_extent(trans, &data_btree,
                                        &offset_into_extent, &sk);
                if (ret)
                        break;
@@ -827,7 +828,8 @@ retry:
                if (bkey_extent_is_allocation(k.k))
                        bch2_add_page_sectors(&rbio->bio, k);
 
-               bch2_read_extent(trans, rbio, k, offset_into_extent, flags);
+               bch2_read_extent(trans, rbio, iter->pos,
+                                data_btree, k, offset_into_extent, flags);
 
                if (flags & BCH_READ_LAST_FRAGMENT)
                        break;
index ef8505da7391664e7e159d5f602089cf315c4f19..1fafd393912c67a46925ed614dbf4a6b047f8af7 100644 (file)
@@ -908,6 +908,8 @@ retry:
        while ((k = bch2_btree_iter_peek(iter)).k &&
               !(ret = bkey_err(k)) &&
               bkey_cmp(iter->pos, end) < 0) {
+               enum btree_id data_btree = BTREE_ID_extents;
+
                if (!bkey_extent_is_data(k.k) &&
                    k.k->type != KEY_TYPE_reservation) {
                        bch2_btree_iter_next(iter);
@@ -920,7 +922,7 @@ retry:
 
                bch2_bkey_buf_reassemble(&cur, c, k);
 
-               ret = bch2_read_indirect_extent(&trans,
+               ret = bch2_read_indirect_extent(&trans, &data_btree,
                                        &offset_into_extent, &cur);
                if (ret)
                        break;
index b402fc2e51d612ad44dae3c5b3a23bd1b6a1101e..425502f7b1b800fd1beca974ff302997363c1733 100644 (file)
@@ -1627,8 +1627,8 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
        bch2_bkey_buf_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
-                                  rbio->pos, BTREE_ITER_SLOTS);
+       iter = bch2_trans_get_iter(&trans, rbio->data_btree,
+                                  rbio->read_pos, BTREE_ITER_SLOTS);
 retry:
        rbio->bio.bi_status = 0;
 
@@ -1642,14 +1642,17 @@ retry:
 
        if (!bch2_bkey_matches_ptr(c, k,
                                   rbio->pick.ptr,
-                                  rbio->pos.offset -
+                                  rbio->data_pos.offset -
                                   rbio->pick.crc.offset)) {
                /* extent we wanted to read no longer exists: */
                rbio->hole = true;
                goto out;
        }
 
-       ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags);
+       ret = __bch2_read_extent(&trans, rbio, bvec_iter,
+                                rbio->read_pos,
+                                rbio->data_btree,
+                                k, 0, failed, flags);
        if (ret == READ_RETRY)
                goto retry;
        if (ret)
@@ -1671,7 +1674,7 @@ static void bch2_rbio_retry(struct work_struct *work)
        struct bch_fs *c        = rbio->c;
        struct bvec_iter iter   = rbio->bvec_iter;
        unsigned flags          = rbio->flags;
-       u64 inode               = rbio->pos.inode;
+       u64 inode               = rbio->read_pos.inode;
        struct bch_io_failures failed = { .nr = 0 };
 
        trace_read_retry(&rbio->bio);
@@ -1719,7 +1722,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
                                   struct bch_read_bio *rbio)
 {
        struct bch_fs *c = rbio->c;
-       u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
+       u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset;
        struct bch_extent_crc_unpacked new_crc;
        struct btree_iter *iter = NULL;
        struct bkey_i *new;
@@ -1729,7 +1732,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
        if (crc_is_compressed(rbio->pick.crc))
                return 0;
 
-       iter = bch2_trans_get_iter(trans, BTREE_ID_extents, rbio->pos,
+       iter = bch2_trans_get_iter(trans, rbio->data_btree, rbio->data_pos,
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
        k = bch2_btree_iter_peek_slot(iter);
        if ((ret = bkey_err(k)))
@@ -1862,14 +1865,14 @@ csum_err:
                return;
        }
 
-       bch2_dev_inum_io_error(ca, rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
+       bch2_dev_inum_io_error(ca, rbio->read_pos.inode, (u64) rbio->bvec_iter.bi_sector,
                "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %u)",
                rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
                csum.hi, csum.lo, crc.csum_type);
        bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
        return;
 decompression_err:
-       bch_err_inum_ratelimited(c, rbio->pos.inode,
+       bch_err_inum_ratelimited(c, rbio->read_pos.inode,
                                 "decompression error");
        bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
        return;
@@ -1892,13 +1895,9 @@ static void bch2_read_endio(struct bio *bio)
        if (!rbio->split)
                rbio->bio.bi_end_io = rbio->end_io;
 
-       /*
-        * XXX: rbio->pos is not what we want here when reading from indirect
-        * extents
-        */
        if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
-                                   rbio->pos.inode,
-                                   rbio->pos.offset,
+                                   rbio->read_pos.inode,
+                                   rbio->read_pos.offset,
                                    "data read error: %s",
                               bch2_blk_status_to_str(bio->bi_status))) {
                bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
@@ -1963,7 +1962,8 @@ err:
 }
 
 int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
-                      struct bvec_iter iter, struct bkey_s_c k,
+                      struct bvec_iter iter, struct bpos read_pos,
+                      enum btree_id data_btree, struct bkey_s_c k,
                       unsigned offset_into_extent,
                       struct bch_io_failures *failed, unsigned flags)
 {
@@ -1973,7 +1973,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
        struct bch_dev *ca;
        struct promote_op *promote = NULL;
        bool bounce = false, read_full = false, narrow_crcs = false;
-       struct bpos pos = bkey_start_pos(k.k);
+       struct bpos data_pos = bkey_start_pos(k.k);
        int pick_ret;
 
        if (bkey_extent_is_inline_data(k.k)) {
@@ -2049,7 +2049,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
                         pick.crc.offset ||
                         offset_into_extent));
 
-               pos.offset += offset_into_extent;
+               data_pos.offset += offset_into_extent;
                pick.ptr.offset += pick.crc.offset +
                        offset_into_extent;
                offset_into_extent              = 0;
@@ -2123,7 +2123,9 @@ get_bio:
        /* XXX: only initialize this if needed */
        rbio->devs_have         = bch2_bkey_devs(k);
        rbio->pick              = pick;
-       rbio->pos               = pos;
+       rbio->read_pos          = read_pos;
+       rbio->data_btree        = data_btree;
+       rbio->data_pos          = data_pos;
        rbio->version           = k.k->version;
        rbio->promote           = promote;
        INIT_WORK(&rbio->work, NULL);
@@ -2249,6 +2251,7 @@ retry:
                                   BTREE_ITER_SLOTS);
        while (1) {
                unsigned bytes, sectors, offset_into_extent;
+               enum btree_id data_btree = BTREE_ID_extents;
 
                bch2_btree_iter_set_pos(iter,
                                POS(inode, bvec_iter.bi_sector));
@@ -2264,7 +2267,7 @@ retry:
 
                bch2_bkey_buf_reassemble(&sk, c, k);
 
-               ret = bch2_read_indirect_extent(&trans,
+               ret = bch2_read_indirect_extent(&trans, &data_btree,
                                        &offset_into_extent, &sk);
                if (ret)
                        goto err;
@@ -2289,7 +2292,8 @@ retry:
                if (bvec_iter.bi_size == bytes)
                        flags |= BCH_READ_LAST_FRAGMENT;
 
-               ret = __bch2_read_extent(&trans, rbio, bvec_iter, k,
+               ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos,
+                                        data_btree, k,
                                         offset_into_extent, failed, flags);
                switch (ret) {
                case READ_RETRY:
index 1c0a444ea32503aa09e6db692e5b5349ebd0bfc9..ccbd8c3e6642451048e3a733d6f92d6732b4a0cb 100644 (file)
@@ -121,12 +121,15 @@ int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
                                struct bkey_buf *);
 
 static inline int bch2_read_indirect_extent(struct btree_trans *trans,
+                                           enum btree_id *data_btree,
                                            unsigned *offset_into_extent,
                                            struct bkey_buf *k)
 {
-       return k->k->k.type == KEY_TYPE_reflink_p
-               ? __bch2_read_indirect_extent(trans, offset_into_extent, k)
-               : 0;
+       if (k->k->k.type != KEY_TYPE_reflink_p)
+               return 0;
+
+       *data_btree = BTREE_ID_reflink;
+       return __bch2_read_indirect_extent(trans, offset_into_extent, k);
 }
 
 enum bch_read_flags {
@@ -143,17 +146,17 @@ enum bch_read_flags {
 };
 
 int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
-                      struct bvec_iter, struct bkey_s_c, unsigned,
+                      struct bvec_iter, struct bpos, enum btree_id,
+                      struct bkey_s_c, unsigned,
                       struct bch_io_failures *, unsigned);
 
 static inline void bch2_read_extent(struct btree_trans *trans,
-                                   struct bch_read_bio *rbio,
-                                   struct bkey_s_c k,
-                                   unsigned offset_into_extent,
-                                   unsigned flags)
+                       struct bch_read_bio *rbio, struct bpos read_pos,
+                       enum btree_id data_btree, struct bkey_s_c k,
+                       unsigned offset_into_extent, unsigned flags)
 {
-       __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k,
-                          offset_into_extent, NULL, flags);
+       __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
+                          data_btree, k, offset_into_extent, NULL, flags);
 }
 
 void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
index 65969eeac2532f8bfff28557b05873afac313a63..99b4b4c4a53b7521d979bbd3416f86138f6b0286 100644 (file)
@@ -58,8 +58,18 @@ struct bch_read_bio {
        struct bch_devs_list    devs_have;
 
        struct extent_ptr_decoded pick;
-       /* start pos of data we read (may not be pos of data we want) */
-       struct bpos             pos;
+
+       /*
+        * pos we read from - different from data_pos for indirect extents:
+        */
+       struct bpos             read_pos;
+
+       /*
+        * start pos of data we read (may not be pos of data we want) - for
+        * promote, narrow extents paths:
+        */
+       enum btree_id           data_btree;
+       struct bpos             data_pos;
        struct bversion         version;
 
        struct promote_op       *promote;
index dfe7f05f39e99fdc71073cdea03bea6be7187760..3ff31d25f39630b58a318e129010cf2c496a62ab 100644 (file)
@@ -208,9 +208,9 @@ void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio)
        BUG_ON(!m->op.wbio.bio.bi_vcnt);
 
        m->ptr          = rbio->pick.ptr;
-       m->offset       = rbio->pos.offset - rbio->pick.crc.offset;
+       m->offset       = rbio->data_pos.offset - rbio->pick.crc.offset;
        m->op.devs_have = rbio->devs_have;
-       m->op.pos       = rbio->pos;
+       m->op.pos       = rbio->data_pos;
        m->op.version   = rbio->version;
        m->op.crc       = rbio->pick.crc;
        m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9;
@@ -492,7 +492,9 @@ static int bch2_move_extent(struct btree_trans *trans,
         * ctxt when doing wakeup
         */
        closure_get(&ctxt->cl);
-       bch2_read_extent(trans, &io->rbio, k, 0,
+       bch2_read_extent(trans, &io->rbio,
+                        bkey_start_pos(k.k),
+                        btree_id, k, 0,
                         BCH_READ_NODECODE|
                         BCH_READ_LAST_FRAGMENT);
        return 0;