bcachefs: new avoid mechanism for io retries
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 1 Nov 2018 19:28:45 +0000 (15:28 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:10 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_io.c
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/extents_types.h
fs/bcachefs/io.c
fs/bcachefs/io.h

index 8f8e5fab10866e46378a1b79c376fdc65545a4cf..c8809a59a765912323ff341772c7886964280dc9 100644 (file)
@@ -1345,11 +1345,9 @@ static void btree_node_read_work(struct work_struct *work)
        struct bch_dev *ca      = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
        struct btree *b         = rb->bio.bi_private;
        struct bio *bio         = &rb->bio;
-       struct bch_devs_mask avoid;
+       struct bch_io_failures failed = { .nr = 0 };
        bool can_retry;
 
-       memset(&avoid, 0, sizeof(avoid));
-
        goto start;
        while (1) {
                bch_info(c, "retrying read");
@@ -1371,8 +1369,9 @@ start:
                        percpu_ref_put(&ca->io_ref);
                rb->have_ioref = false;
 
-               __set_bit(rb->pick.ptr.dev, avoid.d);
-               can_retry = bch2_btree_pick_ptr(c, b, &avoid, &rb->pick) > 0;
+               bch2_mark_io_failure(&failed, &rb->pick);
+
+               can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
 
                if (!bio->bi_status &&
                    !bch2_btree_node_read_done(c, b, can_retry))
index 4a1ec3bba91bbcb37344afb343fd598d1a0aae18..4a62eefd40cdbc3814cd1f91ba88bb77f92e30ef 100644 (file)
@@ -519,12 +519,45 @@ out:
        return out - buf;
 }
 
-static inline bool dev_latency_better(struct bch_fs *c,
-                             const struct bch_extent_ptr *ptr1,
-                             const struct bch_extent_ptr *ptr2)
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+                                                  unsigned dev)
 {
-       struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev);
-       struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev);
+       struct bch_dev_io_failures *i;
+
+       for (i = f->devs; i < f->devs + f->nr; i++)
+               if (i->dev == dev)
+                       return i;
+
+       return NULL;
+}
+
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+                         struct extent_ptr_decoded *p)
+{
+       struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+       if (!f) {
+               BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+               f = &failed->devs[failed->nr++];
+               f->dev          = p->ptr.dev;
+               f->nr_failed    = 1;
+               f->nr_retries   = 0;
+       } else {
+               f->nr_failed++;
+       }
+}
+
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+                             const struct extent_ptr_decoded p1,
+                             const struct extent_ptr_decoded p2)
+{
+       struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+       struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
        u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
        u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
 
@@ -535,11 +568,12 @@ static inline bool dev_latency_better(struct bch_fs *c,
 
 static int extent_pick_read_device(struct bch_fs *c,
                                   struct bkey_s_c_extent e,
-                                  struct bch_devs_mask *avoid,
+                                  struct bch_io_failures *failed,
                                   struct extent_ptr_decoded *pick)
 {
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
+       struct bch_dev_io_failures *f;
        struct bch_dev *ca;
        int ret = 0;
 
@@ -549,14 +583,11 @@ static int extent_pick_read_device(struct bch_fs *c,
                if (p.ptr.cached && ptr_stale(ca, &p.ptr))
                        continue;
 
-               /*
-                * XXX: need to make avoid work correctly for stripe ptrs
-                */
-
-               if (avoid && test_bit(p.ptr.dev, avoid->d))
+               f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+               if (f && f->nr_failed >= f->nr_retries)
                        continue;
 
-               if (ret && !dev_latency_better(c, &p.ptr, &pick->ptr))
+               if (ret && !ptr_better(c, p, *pick))
                        continue;
 
                *pick = p;
@@ -685,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
 }
 
 int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
-                       struct bch_devs_mask *avoid,
+                       struct bch_io_failures *failed,
                        struct extent_ptr_decoded *pick)
 {
        return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
-                                      avoid, pick);
+                                      failed, pick);
 }
 
 /* Extents */
@@ -1909,7 +1940,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
  * other devices, it will still pick a pointer from avoid.
  */
 int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
-                        struct bch_devs_mask *avoid,
+                        struct bch_io_failures *failed,
                         struct extent_ptr_decoded *pick)
 {
        int ret;
@@ -1921,7 +1952,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
        case BCH_EXTENT:
        case BCH_EXTENT_CACHED:
                ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
-                                             avoid, pick);
+                                             failed, pick);
 
                if (!ret && !bkey_extent_is_cached(k.k))
                        ret = -EIO;
index e2f6caefcb31de62063b58846da879b94dd65a32..8754a940a4761e335ea55d9bcfe4bc049f449b83 100644 (file)
@@ -53,12 +53,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
                                                     struct btree *,
                                                     struct btree_node_iter_large *);
 
+void bch2_mark_io_failure(struct bch_io_failures *,
+                         struct extent_ptr_decoded *);
 int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
-                       struct bch_devs_mask *avoid,
+                       struct bch_io_failures *,
                        struct extent_ptr_decoded *);
-
 int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
-                        struct bch_devs_mask *,
+                        struct bch_io_failures *,
                         struct extent_ptr_decoded *);
 
 void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
index 3540e2558c0ff6df7fb5e6ec0bf59dbacdc7fbbe..5738738d7953402f8b77f82f6a7d642f0aa85a8c 100644 (file)
@@ -24,4 +24,13 @@ struct extent_ptr_decoded {
        struct bch_extent_ptr           ptr;
 };
 
+struct bch_io_failures {
+       u8                      nr;
+       struct bch_dev_io_failures {
+               u8              dev;
+               u8              nr_failed;
+               u8              nr_retries;
+       }                       devs[BCH_REPLICAS_MAX];
+};
+
 #endif /* _BCACHEFS_EXTENTS_TYPES_H */
index a4660746be0d938f64b8c0ffd86c1fe9cbe7fee8..133b702299dd8118a7f4537ef104b3e26fbe0b6c 100644 (file)
@@ -1203,7 +1203,8 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
 
 static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
                                     struct bvec_iter bvec_iter, u64 inode,
-                                    struct bch_devs_mask *avoid, unsigned flags)
+                                    struct bch_io_failures *failed,
+                                    unsigned flags)
 {
        struct btree_iter iter;
        BKEY_PADDED(k) tmp;
@@ -1237,7 +1238,7 @@ retry:
                goto out;
        }
 
-       ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+       ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
        if (ret == READ_RETRY)
                goto retry;
        if (ret)
@@ -1251,7 +1252,7 @@ out:
 
 static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
                            struct bvec_iter bvec_iter, u64 inode,
-                           struct bch_devs_mask *avoid, unsigned flags)
+                           struct bch_io_failures *failed, unsigned flags)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -1274,7 +1275,7 @@ retry:
                              (k.k->p.offset - bvec_iter.bi_sector) << 9);
                swap(bvec_iter.bi_size, bytes);
 
-               ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+               ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
                switch (ret) {
                case READ_RETRY:
                        goto retry;
@@ -1310,14 +1311,12 @@ static void bch2_rbio_retry(struct work_struct *work)
        struct bvec_iter iter   = rbio->bvec_iter;
        unsigned flags          = rbio->flags;
        u64 inode               = rbio->pos.inode;
-       struct bch_devs_mask avoid;
+       struct bch_io_failures failed = { .nr = 0 };
 
        trace_read_retry(&rbio->bio);
 
-       memset(&avoid, 0, sizeof(avoid));
-
        if (rbio->retry == READ_RETRY_AVOID)
-               __set_bit(rbio->pick.ptr.dev, avoid.d);
+               bch2_mark_io_failure(&failed, &rbio->pick);
 
        rbio->bio.bi_status = 0;
 
@@ -1327,9 +1326,9 @@ static void bch2_rbio_retry(struct work_struct *work)
        flags &= ~BCH_READ_MAY_PROMOTE;
 
        if (flags & BCH_READ_NODECODE)
-               bch2_read_retry_nodecode(c, rbio, iter, inode, &avoid, flags);
+               bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
        else
-               bch2_read_retry(c, rbio, iter, inode, &avoid, flags);
+               bch2_read_retry(c, rbio, iter, inode, &failed, flags);
 }
 
 static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
@@ -1569,7 +1568,7 @@ static void bch2_read_endio(struct bio *bio)
 
 int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
                       struct bvec_iter iter, struct bkey_s_c k,
-                      struct bch_devs_mask *avoid, unsigned flags)
+                      struct bch_io_failures *failed, unsigned flags)
 {
        struct extent_ptr_decoded pick;
        struct bch_read_bio *rbio = NULL;
@@ -1579,7 +1578,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
        struct bpos pos = bkey_start_pos(k.k);
        int pick_ret;
 
-       pick_ret = bch2_extent_pick_ptr(c, k, avoid, &pick);
+       pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
 
        /* hole or reservation - just zero fill: */
        if (!pick_ret)
@@ -1750,7 +1749,7 @@ noclone:
                rbio = bch2_rbio_free(rbio);
 
                if (ret == READ_RETRY_AVOID) {
-                       __set_bit(pick.ptr.dev, avoid->d);
+                       bch2_mark_io_failure(failed, &pick);
                        ret = READ_RETRY;
                }
 
index c832b72910056b8947b7a18a1f2081c41427c7bc..8a7f246e88235b618bb949c206e2c3caf32a420d 100644 (file)
@@ -102,7 +102,7 @@ struct cache_promote_op;
 struct extent_ptr_decoded;
 
 int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
-                      struct bkey_s_c, struct bch_devs_mask *, unsigned);
+                      struct bkey_s_c, struct bch_io_failures *, unsigned);
 void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
 
 enum bch_read_flags {