bcachefs: Fix a deadlock on journal reclaim
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 20 Apr 2021 21:09:25 +0000 (17:09 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:01 +0000 (17:09 -0400)
Flushing the btree key cache needs to use allocation reserves - journal
reclaim depends on flushing the btree key cache for making forward
progress, and the allocator and copygc depend on journal reclaim making
forward progress.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/movinggc.c

index ac844f47b8dde8f3469fc14aa7550ebdf464b09c..0716c3314a36a32bcf80cf388c4935d2967aad10 100644 (file)
@@ -386,12 +386,18 @@ retry:
                goto evict;
        }
 
+       /*
+        * Since journal reclaim depends on us making progress here, and the
+        * allocator/copygc depend on journal reclaim making progress, we need
+        * to be using alloc reserves:
+        * */
        ret   = bch2_btree_iter_traverse(b_iter) ?:
                bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
                bch2_trans_commit(trans, NULL, NULL,
                                  BTREE_INSERT_NOUNLOCK|
                                  BTREE_INSERT_NOCHECK_RW|
                                  BTREE_INSERT_NOFAIL|
+                                 BTREE_INSERT_USE_RESERVE|
                                  (ck->journal.seq == journal_last_seq(j)
                                   ? BTREE_INSERT_JOURNAL_RESERVED
                                   : 0)|
index e2086c76d1044feabbbd68d017d671a3a872397b..416f8611f0085bc31011dd582f7c17ad4502fd6c 100644 (file)
@@ -634,7 +634,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
                               msecs_to_jiffies(j->reclaim_delay_ms)))
                        min_nr = 1;
 
-               if (j->prereserved.reserved * 2 > j->prereserved.remaining)
+               if (j->prereserved.reserved * 4 > j->prereserved.remaining)
                        min_nr = 1;
 
                if (fifo_free(&j->pin) <= 32)
index 3d57a72e63e430eb9a68e85ed63039dab4f4e5db..f9146ccd70ef05b7f44823d87eb54780e6e84049 100644 (file)
@@ -87,9 +87,20 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
                if (i >= 0 &&
                    p.ptr.offset < h->data[i].offset + ca->mi.bucket_size &&
                    p.ptr.gen == h->data[i].gen) {
+                       /*
+                        * We need to use the journal reserve here, because
+                        *  - journal reclaim depends on btree key cache
+                        *    flushing to make forward progress,
+                        *  - which has to make forward progress when the
+                        *    journal is pre-reservation full,
+                        *  - and depends on allocation - meaning allocator and
+                        *    copygc
+                        */
+
                        data_opts->target               = io_opts->background_target;
                        data_opts->nr_replicas          = 1;
-                       data_opts->btree_insert_flags   = BTREE_INSERT_USE_RESERVE;
+                       data_opts->btree_insert_flags   = BTREE_INSERT_USE_RESERVE|
+                               BTREE_INSERT_JOURNAL_RESERVED;
                        data_opts->rewrite_dev          = p.ptr.dev;
 
                        if (p.has_ec)