From b31c070407edda710ad087d143353ddd0f2c9499 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Feb 2025 14:38:47 -0500 Subject: [PATCH] bcachefs: Finish bch2_account_io_completion() conversions More prep work for automatically kicking devices out after too many IO errors. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 38 +++++++++++++++++------------ fs/bcachefs/btree_node_scan.c | 12 ++++++--- fs/bcachefs/ec.c | 15 +++++++----- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.h | 33 +++++++------------------ fs/bcachefs/io_write.c | 12 +++++---- fs/bcachefs/journal_io.c | 46 +++++++++++++++++++++++------------ fs/bcachefs/journal_types.h | 1 + fs/bcachefs/super-io.c | 12 ++++----- 9 files changed, 95 insertions(+), 75 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 18413b4f22a3..cd792fee7ee3 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1329,6 +1329,7 @@ static void btree_node_read_work(struct work_struct *work) bch_info(c, "retrying read"); ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ); rb->have_ioref = ca != NULL; + rb->start_time = local_clock(); bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); bio->bi_iter.bi_sector = rb->pick.ptr.offset; bio->bi_iter.bi_size = btree_buf_bytes(b); @@ -1339,12 +1340,17 @@ static void btree_node_read_work(struct work_struct *work) } else { bio->bi_status = BLK_STS_REMOVED; } + + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, + rb->start_time, !bio->bi_status); start: printbuf_reset(&buf); bch2_btree_pos_to_text(&buf, c, b); - bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read, - "btree read error %s for %s", - bch2_blk_status_to_str(bio->bi_status), buf.buf); + + if (ca && bio->bi_status) + bch_err_dev_ratelimited(ca, + "btree read error %s for %s", + bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) percpu_ref_put(&ca->io_ref); rb->have_ioref = false; @@ -1401,12 +1407,11 @@ static void btree_node_read_endio(struct bio *bio) struct btree_read_bio *rb = container_of(bio, struct btree_read_bio, bio); struct bch_fs *c = rb->c; + struct bch_dev *ca = rb->have_ioref + ? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL; - if (rb->have_ioref) { - struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); - - bch2_latency_acct(ca, rb->start_time, READ); - } + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, + rb->start_time, !bio->bi_status); queue_work(c->btree_read_complete_wq, &rb->work); } @@ -2126,16 +2131,17 @@ static void btree_node_write_endio(struct bio *bio) struct bch_fs *c = wbio->c; struct btree *b = wbio->bio.bi_private; struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL; - unsigned long flags; - if (wbio->have_ioref) - bch2_latency_acct(ca, wbio->submit_time, WRITE); + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write, + wbio->submit_time, !bio->bi_status); + + if (ca && bio->bi_status) + bch_err_dev_ratelimited(ca, + "btree write error: %s", + bch2_blk_status_to_str(bio->bi_status)); - if (!ca || - bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, - "btree write error: %s", - bch2_blk_status_to_str(bio->bi_status)) || - bch2_meta_write_fault("btree")) { + if (bio->bi_status) { + unsigned long flags; spin_lock_irqsave(&c->btree_write_error_lock, flags); bch2_dev_list_add_dev(&orig->failed, wbio->dev); spin_unlock_irqrestore(&c->btree_write_error_lock, flags); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index a7f06deee13c..fb73ec77c099 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -166,11 +166,17 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, bio->bi_iter.bi_sector = offset; bch2_bio_map(bio, bn, PAGE_SIZE); + u64 submit_time = local_clock(); submit_bio_wait(bio); - if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read, - "IO error in try_read_btree_node() at %llu: %s", - offset, bch2_blk_status_to_str(bio->bi_status))) + + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); + + if (bio->bi_status) { + bch_err_dev_ratelimited(ca, + "IO error in try_read_btree_node() at %llu: %s", + offset, bch2_blk_status_to_str(bio->bi_status)); return; + } if (le64_to_cpu(bn->magic) != bset_magic(c)) return; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 1090cdb7d5cc..8c7a9addafae 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -105,6 +105,7 @@ struct ec_bio { struct bch_dev *ca; struct ec_stripe_buf *buf; size_t idx; + u64 submit_time; struct bio bio; }; @@ -748,14 +749,15 @@ static void ec_block_endio(struct bio *bio) struct bch_dev *ca = ec_bio->ca; struct closure *cl = bio->bi_private; - if (bch2_dev_io_err_on(bio->bi_status, ca, - bio_data_dir(bio) - ? BCH_MEMBER_ERROR_write - : BCH_MEMBER_ERROR_read, - "erasure coding %s error: %s", + bch2_account_io_completion(ca, bio_data_dir(bio), + ec_bio->submit_time, !bio->bi_status); + + if (bio->bi_status) { + bch_err_dev_ratelimited(ca, "erasure coding %s error: %s", str_write_read(bio_data_dir(bio)), - bch2_blk_status_to_str(bio->bi_status))) + bch2_blk_status_to_str(bio->bi_status)); clear_bit(ec_bio->idx, ec_bio->buf->valid); + } int stale = dev_ptr_stale(ca, ptr); if (stale) { @@ -818,6 +820,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, ec_bio->ca = ca; ec_bio->buf = buf; ec_bio->idx = idx; + ec_bio->submit_time = local_clock(); ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9); ec_bio->bio.bi_end_io = ec_block_endio; diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ed4214e9beba..d45ef03abc91 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -279,6 +279,7 @@ x(EIO, no_encryption_key) \ x(EIO, insufficient_journal_devices) \ x(EIO, device_offline) \ + x(EIO, EIO_fault_injected) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index e055b606fb6f..a57b9f18d060 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -222,6 +222,14 @@ void bch2_latency_acct(struct bch_dev *, u64, int); static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {} #endif +static inline void bch2_account_io_success_fail(struct bch_dev *ca, + enum bch_member_error_type type, + bool success) +{ + if (!success) + bch2_io_error(ca, type); +} + static inline void bch2_account_io_completion(struct bch_dev *ca, enum bch_member_error_type type, u64 submit_time, bool success) @@ -232,32 +240,9 @@ static inline void bch2_account_io_completion(struct bch_dev *ca, if (type != BCH_MEMBER_ERROR_checksum) bch2_latency_acct(ca, submit_time, type); - if (!success) - bch2_io_error(ca, type); + bch2_account_io_success_fail(ca, type, success); } -#define bch2_dev_io_err_on(cond, ca, _type, ...) \ -({ \ - bool _ret = (cond); \ - \ - if (_ret) { \ - bch_err_dev_ratelimited(ca, __VA_ARGS__); \ - bch2_io_error(ca, _type); \ - } \ - _ret; \ -}) - -#define bch2_dev_inum_io_err_on(cond, ca, _type, ...) \ -({ \ - bool _ret = (cond); \ - \ - if (_ret) { \ - bch_err_inum_offset_ratelimited(ca, __VA_ARGS__); \ - bch2_io_error(ca, _type); \ - } \ - _ret; \ -}) - int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64); void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 738bdbfbdb14..dbfcb28f003d 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -716,11 +716,15 @@ static void bch2_write_endio(struct bio *bio) ? bch2_dev_have_ref(c, wbio->dev) : NULL; - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write, + wbio->submit_time, !bio->bi_status); + + if (bio->bi_status) { + bch_err_inum_offset_ratelimited(ca, op->pos.inode, wbio->inode_offset << 9, "data write error: %s", - bch2_blk_status_to_str(bio->bi_status))) { + bch2_blk_status_to_str(bio->bi_status)); set_bit(wbio->dev, op->failed.d); op->flags |= BCH_WRITE_io_error; } @@ -732,10 +736,8 @@ static void bch2_write_endio(struct bio *bio) set_bit(wbio->dev, op->devs_need_flush->d); } - if (wbio->have_ioref) { - bch2_latency_acct(ca, wbio->submit_time, WRITE); + if (wbio->have_ioref) percpu_ref_put(&ca->io_ref); - } if (wbio->bounce) bch2_bio_free_pages_pool(c, bio); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7d59ccc07315..c12d9f9bd536 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1041,13 +1041,19 @@ reread: bio->bi_iter.bi_sector = offset; bch2_bio_map(bio, buf->data, sectors_read << 9); + u64 submit_time = local_clock(); ret = submit_bio_wait(bio); kfree(bio); - if (bch2_dev_io_err_on(ret, ca, BCH_MEMBER_ERROR_read, - "journal read error: sector %llu", - offset) || - bch2_meta_read_fault("journal")) { + if (!ret && bch2_meta_read_fault("journal")) + ret = -BCH_ERR_EIO_fault_injected; + + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, + submit_time, !ret); + + if (ret) { + bch_err_dev_ratelimited(ca, + "journal read error: sector %llu", offset); /* * We don't error out of the recovery process * here, since the relevant journal entry may be @@ -1110,13 +1116,16 @@ reread: struct bch_csum csum; csum_good = jset_csum_good(c, j, &csum); - if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum, - "%s", - (printbuf_reset(&err), - prt_str(&err, "journal "), - bch2_csum_err_msg(&err, csum_type, j->csum, csum), - err.buf))) + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_checksum, 0, csum_good); + + if (!csum_good) { + bch_err_dev_ratelimited(ca, "%s", + (printbuf_reset(&err), + prt_str(&err, "journal "), + bch2_csum_err_msg(&err, csum_type, j->csum, csum), + err.buf)); saw_bad = true; + } ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), j->encrypted_start, @@ -1727,13 +1736,16 @@ static void journal_write_endio(struct bio *bio) struct journal *j = &ca->fs->journal; struct journal_buf *w = j->buf + jbio->buf_idx; - if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write, + jbio->submit_time, !bio->bi_status); + + if (bio->bi_status) { + bch_err_dev_ratelimited(ca, "error writing journal entry %llu: %s", le64_to_cpu(w->data->seq), - bch2_blk_status_to_str(bio->bi_status)) || - bch2_meta_write_fault("journal")) { - unsigned long flags; + bch2_blk_status_to_str(bio->bi_status)); + unsigned long flags; spin_lock_irqsave(&j->err_lock, flags); bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx); spin_unlock_irqrestore(&j->err_lock, flags); @@ -1762,7 +1774,11 @@ static CLOSURE_CALLBACK(journal_write_submit) sectors); struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio[w->idx]->bio; + struct journal_bio *jbio = ja->bio[w->idx]; + struct bio *bio = &jbio->bio; + + jbio->submit_time = local_clock(); + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); bio->bi_iter.bi_sector = ptr->offset; bio->bi_end_io = journal_write_endio; diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index a0b17c6ed83e..fd82f5d80355 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -175,6 +175,7 @@ typedef DARRAY(u64) darray_u64; struct journal_bio { struct bch_dev *ca; unsigned buf_idx; + u64 submit_time; struct bio bio; }; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 9a204baa3ab9..2fef285cfc1a 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -911,16 +911,16 @@ static void write_super_endio(struct bio *bio) { struct bch_dev *ca = bio->bi_private; + bch2_account_io_success_fail(ca, bio_data_dir(bio), !bio->bi_status); + /* XXX: return errors directly */ - if (bch2_dev_io_err_on(bio->bi_status, ca, - bio_data_dir(bio) - ? BCH_MEMBER_ERROR_write - : BCH_MEMBER_ERROR_read, - "superblock %s error: %s", + if (bio->bi_status) { + bch_err_dev_ratelimited(ca, "superblock %s error: %s", str_write_read(bio_data_dir(bio)), - bch2_blk_status_to_str(bio->bi_status))) + bch2_blk_status_to_str(bio->bi_status)); ca->sb_write_error = 1; + } closure_put(&ca->fs->sb_write); percpu_ref_put(&ca->io_ref); -- 2.25.1