bcachefs: opts.journal_rewind

author Kent Overstreet <kent.overstreet@linux.dev>

Sat, 7 Jun 2025 23:16:12 +0000 (19:16 -0400)

committer Kent Overstreet <kent.overstreet@linux.dev>

Mon, 16 Jun 2025 23:03:52 +0000 (19:03 -0400)
author Kent Overstreet <kent.overstreet@linux.dev>
Sat, 7 Jun 2025 23:16:12 +0000 (19:16 -0400)
committer Kent Overstreet <kent.overstreet@linux.dev>
Mon, 16 Jun 2025 23:03:52 +0000 (19:03 -0400)
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c

index de996c848e43b8996f2ac9a70db2ac74f9d98ee5..a41fabd0633280cdce4e56bc8a876efa6ae968ba 100644 (file)
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -641,10 +641,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
  {
         const struct journal_key *l = _l;
         const struct journal_key *r = _r;
+       int rewind = l->rewind && r->rewind ? -1 : 1;
  
         return  journal_key_cmp(l, r) ?:
-               cmp_int(l->journal_seq, r->journal_seq) ?:
-               cmp_int(l->journal_offset, r->journal_offset);
+               ((cmp_int(l->journal_seq, r->journal_seq) ?:
+                 cmp_int(l->journal_offset, r->journal_offset)) * rewind);
  }
  
  void bch2_journal_keys_put(struct bch_fs *c)
@@ -713,6 +714,8 @@ int bch2_journal_keys_sort(struct bch_fs *c)
         struct journal_keys *keys = &c->journal_keys;
         size_t nr_read = 0;
  
+       u64 rewind_seq = c->opts.journal_rewind ?: U64_MAX;
+
         genradix_for_each(&c->journal_entries, iter, _i) {
                 i = *_i;
  
@@ -721,28 +724,43 @@ int bch2_journal_keys_sort(struct bch_fs *c)
  
                 cond_resched();
  
-               for_each_jset_key(k, entry, &i->j) {
-                       struct journal_key n = (struct journal_key) {
-                               .btree_id       = entry->btree_id,
-                               .level          = entry->level,
-                               .k              = k,
-                               .journal_seq    = le64_to_cpu(i->j.seq),
-                               .journal_offset = k->_data - i->j._data,
-                       };
-
-                       if (darray_push(keys, n)) {
-                               __journal_keys_sort(keys);
-
-                               if (keys->nr * 8 > keys->size * 7) {
-                                       bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
-                                               keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
-                                       return bch_err_throw(c, ENOMEM_journal_keys_sort);
+               vstruct_for_each(&i->j, entry) {
+                       bool rewind = !entry->level &&
+                               !btree_id_is_alloc(entry->btree_id) &&
+                               le64_to_cpu(i->j.seq) >= rewind_seq;
+
+                       if (entry->type != (rewind
+                                           ? BCH_JSET_ENTRY_overwrite
+                                           : BCH_JSET_ENTRY_btree_keys))
+                               continue;
+
+                       if (!rewind && le64_to_cpu(i->j.seq) < c->journal_replay_seq_start)
+                               continue;
+
+                       jset_entry_for_each_key(entry, k) {
+                               struct journal_key n = (struct journal_key) {
+                                       .btree_id       = entry->btree_id,
+                                       .level          = entry->level,
+                                       .rewind         = rewind,
+                                       .k              = k,
+                                       .journal_seq    = le64_to_cpu(i->j.seq),
+                                       .journal_offset = k->_data - i->j._data,
+                               };
+
+                               if (darray_push(keys, n)) {
+                                       __journal_keys_sort(keys);
+
+                                       if (keys->nr * 8 > keys->size * 7) {
+                                               bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
+                                                       keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
+                                               return bch_err_throw(c, ENOMEM_journal_keys_sort);
+                                       }
+
+                                       BUG_ON(darray_push(keys, n));
                                 }
  
-                               BUG_ON(darray_push(keys, n));
+                               nr_read++;
                         }
-
-                       nr_read++;
                 }
         }
  
diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h

index 8b773823704f27fc53d41dcd99abc6d9ff43bcf9..86aacb254fb2dd41bb18dc0baf0bd03ae9f3b2ac 100644 (file)
--- a/fs/bcachefs/btree_journal_iter_types.h
+++ b/fs/bcachefs/btree_journal_iter_types.h
@@ -11,8 +11,9 @@ struct journal_key {
         u32                     journal_offset;
         enum btree_id           btree_id:8;
         unsigned                level:8;
-       bool                    allocated;
-       bool                    overwritten;
+       bool                    allocated:1;
+       bool                    overwritten:1;
+       bool                    rewind:1;
         struct journal_key_range_overwritten __rcu *
                                 overwritten_range;
         struct bkey_i           *k;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c

index 0b15d71a8d2d526dc9f0ddafcdd1a4768bd7060a..afbf12e8f0c56ac0bf61698e6ebff052d86e211a 100644 (file)
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -160,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
         struct printbuf buf = PRINTBUF;
         int ret = JOURNAL_ENTRY_ADD_OK;
  
+       if (last_seq && c->opts.journal_rewind)
+               last_seq = min(last_seq, c->opts.journal_rewind);
+
         if (!c->journal.oldest_seq_found_ondisk ||
             le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk)
                 c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq);
@@ -1430,11 +1433,21 @@ int bch2_journal_read(struct bch_fs *c,
         printbuf_reset(&buf);
         prt_printf(&buf, "journal read done, replaying entries %llu-%llu",
                    *last_seq, *blacklist_seq - 1);
+
+       /*
+        * Drop blacklisted entries and entries older than last_seq (or start of
+        * journal rewind:
+        */
+       u64 drop_before = *last_seq;
+       if (c->opts.journal_rewind) {
+               drop_before = min(drop_before, c->opts.journal_rewind);
+               prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind);
+       }
+
+       *last_seq = drop_before;
         if (*start_seq != *blacklist_seq)
                 prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1);
         bch_info(c, "%s", buf.buf);
-
-       /* Drop blacklisted entries and entries older than last_seq: */
         genradix_for_each(&c->journal_entries, radix_iter, _i) {
                 i = *_i;
  
@@ -1442,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c,
                         continue;
  
                 seq = le64_to_cpu(i->j.seq);
-               if (seq < *last_seq) {
+               if (seq < drop_before) {
                         journal_replay_free(c, i, false);
                         continue;
                 }
@@ -1455,7 +1468,7 @@ int bch2_journal_read(struct bch_fs *c,
                 }
         }
  
-       ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1);
+       ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1);
         if (ret)
                 goto err;
  
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h

index 2a02606254b369c4ee8dad7bd1677ba0d96f06fc..b0a76bd6d6f54975f4fed7a8dbec3d317e5eb434 100644 (file)
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -379,6 +379,11 @@ enum fsck_err_opts {
           OPT_BOOL(),                                                   \
           BCH2_NO_SB_OPT,               false,                          \
           NULL,         "Exit recovery immediately prior to journal replay")\
+       x(journal_rewind,               u64,                            \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_UINT(0, U64_MAX),                                         \
+         BCH2_NO_SB_OPT,               0,                              \
+         NULL,         "Rewind journal")                               \
         x(recovery_passes,              u64,                            \
           OPT_FS|OPT_MOUNT,                                             \
           OPT_BITFIELD(bch2_recovery_passes),                           \
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c

index 820249e9c5ea27ef92ca4bf23309b66b2da827a9..37f2cc1ec2f8e2414c7776b7d4b2c466d5c55c94 100644 (file)
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -757,6 +757,11 @@ int bch2_fs_recovery(struct bch_fs *c)
         if (c->opts.nochanges)
                 c->opts.read_only = true;
  
+       if (c->opts.journal_rewind) {
+               bch_info(c, "rewinding journal, fsck required");
+               c->opts.fsck = true;
+       }
+
         mutex_lock(&c->sb_lock);
         struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
         bool write_sb = false;
author	Kent Overstreet <kent.overstreet@linux.dev>
	Sat, 7 Jun 2025 23:16:12 +0000 (19:16 -0400)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Mon, 16 Jun 2025 23:03:52 +0000 (19:03 -0400)
fs/bcachefs/btree_journal_iter.c		patch \| blob \| blame \| history
fs/bcachefs/btree_journal_iter_types.h		patch \| blob \| blame \| history
fs/bcachefs/journal_io.c		patch \| blob \| blame \| history
fs/bcachefs/opts.h		patch \| blob \| blame \| history
fs/bcachefs/recovery.c		patch \| blob \| blame \| history