bcachefs: Don't block on journal reservation with btree locks held

author Kent Overstreet <kent.overstreet@gmail.com>

Mon, 19 Nov 2018 02:35:59 +0000 (21:35 -0500)

committer Kent Overstreet <kent.overstreet@linux.dev>

Sun, 22 Oct 2023 21:08:11 +0000 (17:08 -0400)
author Kent Overstreet <kent.overstreet@gmail.com>
Mon, 19 Nov 2018 02:35:59 +0000 (21:35 -0500)
committer Kent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:11 +0000 (17:08 -0400)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c

index 01e476d72595cb39eee870a74e022274476cd4ef..af31819c88c78e9339156e59714e17d61e2dca33 100644 (file)
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -638,12 +638,12 @@ static void btree_update_wait_on_journal(struct closure *cl)
         int ret;
  
         ret = bch2_journal_open_seq_async(&c->journal, as->journal_seq, cl);
-       if (ret < 0)
-               goto err;
-       if (!ret) {
+       if (ret == -EAGAIN) {
                 continue_at(cl, btree_update_wait_on_journal, system_wq);
                 return;
         }
+       if (ret < 0)
+               goto err;
  
         bch2_journal_flush_seq_async(&c->journal, as->journal_seq, cl);
  err:
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c

index 093e480977c713eb47c38c917f26c39e716cceea..41691bebf6795cc40035d28ad92e9268d27b6bc3 100644 (file)
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -344,19 +344,35 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
         trans_for_each_entry(trans, i)
                 BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
  
-       u64s = 0;
-       trans_for_each_entry(trans, i)
-               u64s += jset_u64s(i->k->k.u64s);
-
         memset(&trans->journal_res, 0, sizeof(trans->journal_res));
  
-       ret = !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)
-               ? bch2_journal_res_get(&c->journal,
-                                     &trans->journal_res,
-                                     u64s, u64s)
-               : 0;
-       if (ret)
-               return ret;
+       if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
+               u64s = 0;
+               trans_for_each_entry(trans, i)
+                       u64s += jset_u64s(i->k->k.u64s);
+
+               while ((ret = bch2_journal_res_get(&c->journal,
+                                       &trans->journal_res, u64s,
+                                       JOURNAL_RES_GET_NONBLOCK)) == -EAGAIN) {
+                       struct btree_iter *iter = trans->entries[0].iter;
+
+                       bch2_btree_iter_unlock(iter);
+
+                       ret = bch2_journal_res_get(&c->journal,
+                                       &trans->journal_res, u64s,
+                                       JOURNAL_RES_GET_CHECK);
+                       if (ret)
+                               return ret;
+
+                       if (!bch2_btree_iter_relock(iter)) {
+                               trans_restart(" (iter relock after journal res get blocked)");
+                               return -EINTR;
+                       }
+               }
+
+               if (ret)
+                       return ret;
+       }
  
         multi_lock_write(c, trans);
  
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c

index b4d03766462811e1546e0d0311686f64d1ab94d6..5db0a469ac24ac0de19c75a615bc905fba696fcd 100644 (file)
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -335,15 +335,14 @@ u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
  }
  
  static int __journal_res_get(struct journal *j, struct journal_res *res,
-                             unsigned u64s_min, unsigned u64s_max)
+                            unsigned flags)
  {
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
         struct journal_buf *buf;
         int ret;
  retry:
-       ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
-       if (ret)
-               return ret;
+       if (journal_res_get_fast(j, res, flags))
+               return 0;
  
         spin_lock(&j->lock);
         /*
@@ -351,10 +350,9 @@ retry:
          * that just did journal_entry_open() and call journal_entry_close()
          * unnecessarily
          */
-       ret = journal_res_get_fast(j, res, u64s_min, u64s_max);
-       if (ret) {
+       if (journal_res_get_fast(j, res, flags)) {
                 spin_unlock(&j->lock);
-               return 1;
+               return 0;
         }
  
         /*
@@ -377,7 +375,12 @@ retry:
                 spin_unlock(&j->lock);
                 return -EROFS;
         case JOURNAL_ENTRY_INUSE:
-               /* haven't finished writing out the previous one: */
+               /*
+                * The current journal entry is still open, but we failed to get
+                * a journal reservation because there's not enough space in it,
+                * and we can't close it and start another because we haven't
+                * finished writing out the previous entry:
+                */
                 spin_unlock(&j->lock);
                 trace_journal_entry_full(c);
                 goto blocked;
@@ -408,7 +411,7 @@ retry:
  blocked:
         if (!j->res_get_blocked_start)
                 j->res_get_blocked_start = local_clock() ?: 1;
-       return 0;
+       return -EAGAIN;
  }
  
  /*
@@ -422,14 +425,14 @@ blocked:
   * btree node write locks.
   */
  int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
-                                unsigned u64s_min, unsigned u64s_max)
+                                 unsigned flags)
  {
         int ret;
  
         wait_event(j->wait,
-                  (ret = __journal_res_get(j, res, u64s_min,
-                                           u64s_max)));
-       return ret < 0 ? ret : 0;
+                  (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
+                  (flags & JOURNAL_RES_GET_NONBLOCK));
+       return ret;
  }
  
  u64 bch2_journal_last_unwritten_seq(struct journal *j)
@@ -453,28 +456,55 @@ u64 bch2_journal_last_unwritten_seq(struct journal *j)
   * btree root - every journal entry contains the roots of all the btrees, so it
   * doesn't need to bother with getting a journal reservation
   */
-int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *parent)
+int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
  {
-       int ret;
-
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       bool need_reclaim = false;
+retry:
         spin_lock(&j->lock);
-       BUG_ON(seq > journal_cur_seq(j));
  
         if (seq < journal_cur_seq(j) ||
             journal_entry_is_open(j)) {
                 spin_unlock(&j->lock);
-               return 1;
+               return 0;
+       }
+
+       if (journal_cur_seq(j) < seq) {
+               switch (journal_buf_switch(j, false)) {
+               case JOURNAL_ENTRY_ERROR:
+                       spin_unlock(&j->lock);
+                       return -EROFS;
+               case JOURNAL_ENTRY_INUSE:
+                       /* haven't finished writing out the previous one: */
+                       trace_journal_entry_full(c);
+                       goto blocked;
+               case JOURNAL_ENTRY_CLOSED:
+                       break;
+               case JOURNAL_UNLOCKED:
+                       goto retry;
+               }
+       }
+
+       BUG_ON(journal_cur_seq(j) < seq);
+
+       if (!journal_entry_open(j)) {
+               need_reclaim = true;
+               goto blocked;
         }
  
-       ret = journal_entry_open(j);
-       if (!ret)
-               closure_wait(&j->async_wait, parent);
         spin_unlock(&j->lock);
  
-       if (!ret)
-               bch2_journal_reclaim_work(&j->reclaim_work.work);
+       return 0;
+blocked:
+       if (!j->res_get_blocked_start)
+               j->res_get_blocked_start = local_clock() ?: 1;
  
-       return ret;
+       closure_wait(&j->async_wait, cl);
+       spin_unlock(&j->lock);
+
+       if (need_reclaim)
+               bch2_journal_reclaim_work(&j->reclaim_work.work);
+       return -EAGAIN;
  }
  
  static int journal_seq_error(struct journal *j, u64 seq)
@@ -594,11 +624,10 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
  void bch2_journal_meta_async(struct journal *j, struct closure *parent)
  {
         struct journal_res res;
-       unsigned u64s = jset_u64s(0);
  
         memset(&res, 0, sizeof(res));
  
-       bch2_journal_res_get(j, &res, u64s, u64s);
+       bch2_journal_res_get(j, &res, jset_u64s(0), 0);
         bch2_journal_res_put(j, &res);
  
         bch2_journal_flush_seq_async(j, res.seq, parent);
@@ -607,12 +636,11 @@ void bch2_journal_meta_async(struct journal *j, struct closure *parent)
  int bch2_journal_meta(struct journal *j)
  {
         struct journal_res res;
-       unsigned u64s = jset_u64s(0);
         int ret;
  
         memset(&res, 0, sizeof(res));
  
-       ret = bch2_journal_res_get(j, &res, u64s, u64s);
+       ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
         if (ret)
                 return ret;
  
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h

index 77cf39cc64ff57b42a5e152710f6b9083572893b..d9c094ba2ca0b2fc809b8ac98ec21f2f8c682e4e 100644 (file)
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -272,12 +272,14 @@ static inline void bch2_journal_res_put(struct journal *j,
  }
  
  int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
-                                unsigned, unsigned);
+                                 unsigned);
+
+#define JOURNAL_RES_GET_NONBLOCK       (1 << 0)
+#define JOURNAL_RES_GET_CHECK          (1 << 1)
  
  static inline int journal_res_get_fast(struct journal *j,
                                        struct journal_res *res,
-                                      unsigned u64s_min,
-                                      unsigned u64s_max)
+                                      unsigned flags)
  {
         union journal_res_state old, new;
         u64 v = atomic64_read(&j->reservations.counter);
@@ -289,42 +291,45 @@ static inline int journal_res_get_fast(struct journal *j,
                  * Check if there is still room in the current journal
                  * entry:
                  */
-               if (old.cur_entry_offset + u64s_min > j->cur_entry_u64s)
+               if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s)
                         return 0;
  
-               res->offset     = old.cur_entry_offset;
-               res->u64s       = min(u64s_max, j->cur_entry_u64s -
-                                     old.cur_entry_offset);
+               if (flags & JOURNAL_RES_GET_CHECK)
+                       return 1;
  
-               journal_state_inc(&new);
                 new.cur_entry_offset += res->u64s;
+               journal_state_inc(&new);
         } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                        old.v, new.v)) != old.v);
  
-       res->ref = true;
-       res->idx = new.idx;
-       res->seq = le64_to_cpu(j->buf[res->idx].data->seq);
+       res->ref        = true;
+       res->idx        = old.idx;
+       res->offset     = old.cur_entry_offset;
+       res->seq        = le64_to_cpu(j->buf[old.idx].data->seq);
         return 1;
  }
  
  static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
-                                     unsigned u64s_min, unsigned u64s_max)
+                                      unsigned u64s, unsigned flags)
  {
         int ret;
  
         EBUG_ON(res->ref);
-       EBUG_ON(u64s_max < u64s_min);
         EBUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
  
-       if (journal_res_get_fast(j, res, u64s_min, u64s_max))
+       res->u64s = u64s;
+
+       if (journal_res_get_fast(j, res, flags))
                 goto out;
  
-       ret = bch2_journal_res_get_slowpath(j, res, u64s_min, u64s_max);
+       ret = bch2_journal_res_get_slowpath(j, res, flags);
         if (ret)
                 return ret;
  out:
-       lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_);
-       EBUG_ON(!res->ref);
+       if (!(flags & JOURNAL_RES_GET_CHECK)) {
+               lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_);
+               EBUG_ON(!res->ref);
+       }
         return 0;
  }
author	Kent Overstreet <kent.overstreet@gmail.com>
	Mon, 19 Nov 2018 02:35:59 +0000 (21:35 -0500)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Sun, 22 Oct 2023 21:08:11 +0000 (17:08 -0400)
fs/bcachefs/btree_update_interior.c		patch \| blob \| blame \| history
fs/bcachefs/btree_update_leaf.c		patch \| blob \| blame \| history
fs/bcachefs/journal.c		patch \| blob \| blame \| history
fs/bcachefs/journal.h		patch \| blob \| blame \| history