struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
struct btree *b = rb->bio.bi_private;
struct bio *bio = &rb->bio;
- struct bch_devs_mask avoid;
+ struct bch_io_failures failed = { .nr = 0 };
bool can_retry;
- memset(&avoid, 0, sizeof(avoid));
-
goto start;
while (1) {
bch_info(c, "retrying read");
percpu_ref_put(&ca->io_ref);
rb->have_ioref = false;
- __set_bit(rb->pick.ptr.dev, avoid.d);
- can_retry = bch2_btree_pick_ptr(c, b, &avoid, &rb->pick) > 0;
+ bch2_mark_io_failure(&failed, &rb->pick);
+
+ can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
if (!bio->bi_status &&
!bch2_btree_node_read_done(c, b, can_retry))
return out - buf;
}
-static inline bool dev_latency_better(struct bch_fs *c,
- const struct bch_extent_ptr *ptr1,
- const struct bch_extent_ptr *ptr2)
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+ unsigned dev)
{
- struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev);
- struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev);
+ struct bch_dev_io_failures *i;
+
+ for (i = f->devs; i < f->devs + f->nr; i++)
+ if (i->dev == dev)
+ return i;
+
+ return NULL;
+}
+
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+ struct extent_ptr_decoded *p)
+{
+ struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+ if (!f) {
+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+ f = &failed->devs[failed->nr++];
+ f->dev = p->ptr.dev;
+ f->nr_failed = 1;
+ f->nr_retries = 0;
+ } else {
+ f->nr_failed++;
+ }
+}
+
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+ const struct extent_ptr_decoded p1,
+ const struct extent_ptr_decoded p2)
+{
+ struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+ struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
static int extent_pick_read_device(struct bch_fs *c,
struct bkey_s_c_extent e,
- struct bch_devs_mask *avoid,
+ struct bch_io_failures *failed,
struct extent_ptr_decoded *pick)
{
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
+ struct bch_dev_io_failures *f;
struct bch_dev *ca;
int ret = 0;
if (p.ptr.cached && ptr_stale(ca, &p.ptr))
continue;
- /*
- * XXX: need to make avoid work correctly for stripe ptrs
- */
-
- if (avoid && test_bit(p.ptr.dev, avoid->d))
+ f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+ if (f && f->nr_failed >= f->nr_retries)
continue;
- if (ret && !dev_latency_better(c, &p.ptr, &pick->ptr))
+ if (ret && !ptr_better(c, p, *pick))
continue;
*pick = p;
}
int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
- struct bch_devs_mask *avoid,
+ struct bch_io_failures *failed,
struct extent_ptr_decoded *pick)
{
return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
- avoid, pick);
+ failed, pick);
}
/* Extents */
* other devices, it will still pick a pointer from avoid.
*/
int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
- struct bch_devs_mask *avoid,
+ struct bch_io_failures *failed,
struct extent_ptr_decoded *pick)
{
int ret;
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
- avoid, pick);
+ failed, pick);
if (!ret && !bkey_extent_is_cached(k.k))
ret = -EIO;
struct btree *,
struct btree_node_iter_large *);
+void bch2_mark_io_failure(struct bch_io_failures *,
+ struct extent_ptr_decoded *);
int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
- struct bch_devs_mask *avoid,
+ struct bch_io_failures *,
struct extent_ptr_decoded *);
-
int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
- struct bch_devs_mask *,
+ struct bch_io_failures *,
struct extent_ptr_decoded *);
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
struct bch_extent_ptr ptr;
};
+struct bch_io_failures {
+ u8 nr;
+ struct bch_dev_io_failures {
+ u8 dev;
+ u8 nr_failed;
+ u8 nr_retries;
+ } devs[BCH_REPLICAS_MAX];
+};
+
#endif /* _BCACHEFS_EXTENTS_TYPES_H */
static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode,
- struct bch_devs_mask *avoid, unsigned flags)
+ struct bch_io_failures *failed,
+ unsigned flags)
{
struct btree_iter iter;
BKEY_PADDED(k) tmp;
goto out;
}
- ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+ ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
if (ret == READ_RETRY)
goto retry;
if (ret)
static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode,
- struct bch_devs_mask *avoid, unsigned flags)
+ struct bch_io_failures *failed, unsigned flags)
{
struct btree_iter iter;
struct bkey_s_c k;
(k.k->p.offset - bvec_iter.bi_sector) << 9);
swap(bvec_iter.bi_size, bytes);
- ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+ ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
switch (ret) {
case READ_RETRY:
goto retry;
struct bvec_iter iter = rbio->bvec_iter;
unsigned flags = rbio->flags;
u64 inode = rbio->pos.inode;
- struct bch_devs_mask avoid;
+ struct bch_io_failures failed = { .nr = 0 };
trace_read_retry(&rbio->bio);
- memset(&avoid, 0, sizeof(avoid));
-
if (rbio->retry == READ_RETRY_AVOID)
- __set_bit(rbio->pick.ptr.dev, avoid.d);
+ bch2_mark_io_failure(&failed, &rbio->pick);
rbio->bio.bi_status = 0;
flags &= ~BCH_READ_MAY_PROMOTE;
if (flags & BCH_READ_NODECODE)
- bch2_read_retry_nodecode(c, rbio, iter, inode, &avoid, flags);
+ bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
else
- bch2_read_retry(c, rbio, iter, inode, &avoid, flags);
+ bch2_read_retry(c, rbio, iter, inode, &failed, flags);
}
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k,
- struct bch_devs_mask *avoid, unsigned flags)
+ struct bch_io_failures *failed, unsigned flags)
{
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
struct bpos pos = bkey_start_pos(k.k);
int pick_ret;
- pick_ret = bch2_extent_pick_ptr(c, k, avoid, &pick);
+ pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
if (!pick_ret)
rbio = bch2_rbio_free(rbio);
if (ret == READ_RETRY_AVOID) {
- __set_bit(pick.ptr.dev, avoid->d);
+ bch2_mark_io_failure(failed, &pick);
ret = READ_RETRY;
}
struct extent_ptr_decoded;
int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
- struct bkey_s_c, struct bch_devs_mask *, unsigned);
+ struct bkey_s_c, struct bch_io_failures *, unsigned);
void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
enum bch_read_flags {