bcachefs: improve checksum error messages
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 5 Jan 2024 16:59:03 +0000 (11:59 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 6 Jan 2024 04:24:21 +0000 (23:24 -0500)
new helpers:
 - bch2_csum_to_text()
 - bch2_csum_err_msg()

standardize our checksum error messages a bit, and print out the
checksums a bit more nicely.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_io.c
fs/bcachefs/checksum.h
fs/bcachefs/io_read.c
fs/bcachefs/journal_io.c
fs/bcachefs/super-io.c

index 250a2a59960f68034abe36bc8160e2bb2a60a230..33db48e2153fef61f0c733f97278018f419c2b05 100644 (file)
@@ -1042,8 +1042,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
                        nonce = btree_nonce(i, b->written << 9);
 
-                       csum_bad = bch2_crc_cmp(b->data->csum,
-                               csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data));
+                       struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+                       csum_bad = bch2_crc_cmp(b->data->csum, csum);
                        if (csum_bad)
                                bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
 
@@ -1051,7 +1051,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                                     -BCH_ERR_btree_node_read_err_want_retry,
                                     c, ca, b, i,
                                     bset_bad_csum,
-                                    "invalid checksum");
+                                    "%s",
+                                    (printbuf_reset(&buf),
+                                     bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
+                                     buf.buf));
 
                        ret = bset_encrypt(c, i, b->written << 9);
                        if (bch2_fs_fatal_err_on(ret, c,
@@ -1080,8 +1083,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                                     "unknown checksum type %llu", BSET_CSUM_TYPE(i));
 
                        nonce = btree_nonce(i, b->written << 9);
-                       csum_bad = bch2_crc_cmp(bne->csum,
-                               csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne));
+                       struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+                       csum_bad = bch2_crc_cmp(bne->csum, csum);
                        if (csum_bad)
                                bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
 
@@ -1089,7 +1092,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                                     -BCH_ERR_btree_node_read_err_want_retry,
                                     c, ca, b, i,
                                     bset_bad_csum,
-                                    "invalid checksum");
+                                    "%s",
+                                    (printbuf_reset(&buf),
+                                     bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
+                                     buf.buf));
 
                        ret = bset_encrypt(c, i, b->written << 9);
                        if (bch2_fs_fatal_err_on(ret, c,
index 13998388c545c476545b1e6cd418306f67dcf90e..1b8c2c1016dc6347ce12ef3161d4723835dfa56e 100644 (file)
@@ -45,6 +45,29 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce,
        bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\
 })
 
+static inline void bch2_csum_to_text(struct printbuf *out,
+                                    enum bch_csum_type type,
+                                    struct bch_csum csum)
+{
+       const u8 *p = (u8 *) &csum;
+       unsigned bytes = type < BCH_CSUM_NR ? bch_crc_bytes[type] : 16;
+
+       for (unsigned i = 0; i < bytes; i++)
+               prt_hex_byte(out, p[i]);
+}
+
+static inline void bch2_csum_err_msg(struct printbuf *out,
+                                    enum bch_csum_type type,
+                                    struct bch_csum expected,
+                                    struct bch_csum got)
+{
+       prt_printf(out, "checksum error: got ");
+       bch2_csum_to_text(out, type, got);
+       prt_str(out, " should be ");
+       bch2_csum_to_text(out, type, expected);
+       prt_printf(out, " type %s", bch2_csum_types[type]);
+}
+
 int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
 int bch2_request_key(struct bch_sb *, struct bch_key *);
 #ifndef __KERNEL__
index 5c2d118eaf6feeba1c759596b76f1d47acb4b419..3c574d8873a1e209dc7f7f48faacf9928f8a1272 100644 (file)
@@ -642,12 +642,17 @@ csum_err:
                goto out;
        }
 
+       struct printbuf buf = PRINTBUF;
+       buf.atomic++;
+       prt_str(&buf, "data ");
+       bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
+
        bch_err_inum_offset_ratelimited(ca,
                rbio->read_pos.inode,
                rbio->read_pos.offset << 9,
-               "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)",
-               rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
-               csum.hi, csum.lo, bch2_csum_types[crc.csum_type]);
+               "data %s", buf.buf);
+       printbuf_exit(&buf);
+
        bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
        bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
        goto out;
index c5bc58247146a2cdbc1eecb987db7ba667e9677f..b0f4dd491e1205d28c6af528fb59696cdbc4dc9c 100644 (file)
@@ -27,11 +27,15 @@ static struct nonce journal_nonce(const struct jset *jset)
        }};
 }
 
-static bool jset_csum_good(struct bch_fs *c, struct jset *j)
+static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum)
 {
-       return bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)) &&
-               !bch2_crc_cmp(j->csum,
-                             csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j));
+       if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) {
+               *csum = (struct bch_csum) {};
+               return false;
+       }
+
+       *csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
+       return !bch2_crc_cmp(j->csum, *csum);
 }
 
 static inline u32 journal_entry_radix_idx(struct bch_fs *c, u64 seq)
@@ -934,6 +938,7 @@ static int journal_read_bucket(struct bch_dev *ca,
        u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
            end = offset + ca->mi.bucket_size;
        bool saw_bad = false, csum_good;
+       struct printbuf err = PRINTBUF;
        int ret = 0;
 
        pr_debug("reading %u", bucket);
@@ -966,7 +971,7 @@ reread:
                                 * found on a different device, and missing or
                                 * no journal entries will be handled later
                                 */
-                               return 0;
+                               goto out;
                        }
 
                        j = buf->data;
@@ -983,12 +988,12 @@ reread:
                                ret = journal_read_buf_realloc(buf,
                                                        vstruct_bytes(j));
                                if (ret)
-                                       return ret;
+                                       goto err;
                        }
                        goto reread;
                case JOURNAL_ENTRY_NONE:
                        if (!saw_bad)
-                               return 0;
+                               goto out;
                        /*
                         * On checksum error we don't really trust the size
                         * field of the journal entry we read, so try reading
@@ -997,7 +1002,7 @@ reread:
                        sectors = block_sectors(c);
                        goto next_block;
                default:
-                       return ret;
+                       goto err;
                }
 
                /*
@@ -1007,20 +1012,28 @@ reread:
                 * bucket:
                 */
                if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
-                       return 0;
+                       goto out;
 
                ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
 
-               csum_good = jset_csum_good(c, j);
+               enum bch_csum_type csum_type = JSET_CSUM_TYPE(j);
+               struct bch_csum csum;
+               csum_good = jset_csum_good(c, j, &csum);
+
                if (bch2_dev_io_err_on(!csum_good, ca, BCH_MEMBER_ERROR_checksum,
-                                      "journal checksum error"))
+                                      "%s",
+                                      (printbuf_reset(&err),
+                                       prt_str(&err, "journal "),
+                                       bch2_csum_err_msg(&err, csum_type, j->csum, csum),
+                                       err.buf)))
                        saw_bad = true;
 
                ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
                             j->encrypted_start,
                             vstruct_end(j) - (void *) j->encrypted_start);
                bch2_fs_fatal_err_on(ret, c,
-                               "error decrypting journal entry: %i", ret);
+                               "error decrypting journal entry: %s",
+                               bch2_err_str(ret));
 
                mutex_lock(&jlist->lock);
                ret = journal_entry_add(c, ca, (struct journal_ptr) {
@@ -1039,7 +1052,7 @@ reread:
                case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
                        break;
                default:
-                       return ret;
+                       goto err;
                }
 next_block:
                pr_debug("next");
@@ -1048,7 +1061,11 @@ next_block:
                j = ((void *) j) + (sectors << 9);
        }
 
-       return 0;
+out:
+       ret = 0;
+err:
+       printbuf_exit(&err);
+       return ret;
 }
 
 static CLOSURE_CALLBACK(bch2_journal_read_device)
index b05c5f1fe09d3d6570469ad6ae85792ae5bb048f..c05983fe681557875e75c57d4202de944ed1e4b7 100644 (file)
@@ -612,7 +612,6 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
 
 static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
 {
-       struct bch_csum csum;
        size_t bytes;
        int ret;
 reread:
@@ -653,17 +652,16 @@ reread:
                goto reread;
        }
 
-       if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) {
+       enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
+       if (csum_type >= BCH_CSUM_NR) {
                prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
                return -BCH_ERR_invalid_sb_csum_type;
        }
 
        /* XXX: verify MACs */
-       csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
-                           null_nonce(), sb->sb);
-
+       struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb);
        if (bch2_crc_cmp(csum, sb->sb->csum)) {
-               prt_printf(err, "bad checksum");
+               bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum);
                return -BCH_ERR_invalid_sb_csum;
        }