bcachefs: Fix a "no journal entries found" bug
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 2 Dec 2022 16:45:58 +0000 (11:45 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:47 +0000 (17:09 -0400)
On startup, we need to ensure the first journal entry written is a flush
write: after a clean shutdown we generally don't read the journal, which
means we might be overwriting whatever was there previously, and there
must always be at least one flush entry in the journal or recovery will
fail.

Found by fstests generic/388.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal_io.c
fs/bcachefs/journal_types.h
fs/bcachefs/super.c

index cd48ba11e771aefe07856d99989a844956ae5934..a5c9524aa6e76a846d4793bd019498a48c5e9478 100644 (file)
@@ -1661,20 +1661,42 @@ void bch2_journal_write(struct closure *cl)
        j->write_start_time = local_clock();
 
        spin_lock(&j->lock);
-       if (bch2_journal_error(j) ||
-           w->noflush ||
-           (!w->must_flush &&
-            (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
-            test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) {
+
+       /*
+        * If the journal is in an error state - we did an emergency shutdown -
+        * we prefer to continue doing journal writes. We just mark them as
+        * noflush so they'll never be used, but they'll still be visible by the
+        * list_journal tool - this helps in debugging.
+        *
+        * There's a caveat: the first journal write after marking the
+        * superblock dirty must always be a flush write, because on startup
+        * from a clean shutdown we didn't necessarily read the journal and the
+        * new journal write might overwrite whatever was in the journal
+        * previously - we can't leave the journal without any flush writes in
+        * it.
+        *
+        * So if we're in an error state, and we're still starting up, we don't
+        * write anything at all.
+        */
+       if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) &&
+           (bch2_journal_error(j) ||
+            w->noflush ||
+            (!w->must_flush &&
+             (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) &&
+             test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) {
                w->noflush = true;
                SET_JSET_NO_FLUSH(jset, true);
                jset->last_seq  = 0;
                w->last_seq     = 0;
 
                j->nr_noflush_writes++;
-       } else {
+       } else if (!bch2_journal_error(j)) {
                j->last_flush_write = jiffies;
                j->nr_flush_writes++;
+               clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
+       } else {
+               spin_unlock(&j->lock);
+               goto err;
        }
        spin_unlock(&j->lock);
 
index a41b915b3ac6db6a0fff76c9f3962420d531a9d6..4c3065dceeea98a651f2b726ddf44f6fad05cb72 100644 (file)
@@ -141,10 +141,11 @@ enum journal_space_from {
        journal_space_nr,
 };
 
-enum {
+enum journal_flags {
        JOURNAL_REPLAY_DONE,
        JOURNAL_STARTED,
        JOURNAL_MAY_SKIP_FLUSH,
+       JOURNAL_NEED_FLUSH_WRITE,
 };
 
 #define JOURNAL_WATERMARKS()           \
index 8ee0783a1e78042b1108ec40324d0360197d4352..234dab15fa6358d259b9cc74de5bc602a5d4a476 100644 (file)
@@ -379,6 +379,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
 
        clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
 
+       /*
+        * First journal write must be a flush write: after a clean shutdown we
+        * don't read the journal, so the first journal write may end up
+        * overwriting whatever was there previously, and there must always be
+        * at least one non-flush write in the journal or recovery will fail:
+        */
+       set_bit(JOURNAL_NEED_FLUSH_WRITE, &c->journal.flags);
+
        for_each_rw_member(ca, c, i)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);