bcachefs: Assorted journal refactoring
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 18 Feb 2019 22:39:42 +0000 (17:39 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:16 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_io.c
fs/bcachefs/journal_io.h
fs/bcachefs/journal_types.h

index cf4729b7a083547e47634d922136375ae8848f53..91d0e5d443ed01f4725c1d74b14c7f0383e67c3e 100644 (file)
 #include "super-io.h"
 #include "trace.h"
 
-static bool journal_entry_is_open(struct journal *j)
+static bool __journal_entry_is_open(union journal_res_state state)
 {
-       return j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
+       return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
 }
 
-void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
+static bool journal_entry_is_open(struct journal *j)
 {
-       struct journal_buf *w = journal_prev_buf(j);
-
-       atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
-
-       if (!need_write_just_set &&
-           test_bit(JOURNAL_NEED_WRITE, &j->flags))
-               bch2_time_stats_update(j->delay_time,
-                                      j->need_write_time);
-
-       closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
+       return __journal_entry_is_open(j->reservations);
 }
 
 static void journal_pin_new_entry(struct journal *j, int count)
@@ -77,39 +68,76 @@ static inline bool journal_entry_empty(struct jset *j)
        return true;
 }
 
-static enum {
-       JOURNAL_ENTRY_ERROR,
-       JOURNAL_ENTRY_INUSE,
-       JOURNAL_ENTRY_CLOSED,
-       JOURNAL_UNLOCKED,
-} journal_buf_switch(struct journal *j, bool need_write_just_set)
+void bch2_journal_halt(struct journal *j)
+{
+       union journal_res_state old, new;
+       u64 v = atomic64_read(&j->reservations.counter);
+
+       do {
+               old.v = new.v = v;
+               if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
+                       return;
+
+               new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
+       } while ((v = atomic64_cmpxchg(&j->reservations.counter,
+                                      old.v, new.v)) != old.v);
+
+       journal_wake(j);
+       closure_wake_up(&journal_cur_buf(j)->wait);
+       closure_wake_up(&journal_prev_buf(j)->wait);
+}
+
+/* journal entry close/open: */
+
+void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
+{
+       struct journal_buf *w = journal_prev_buf(j);
+
+       atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
+
+       if (!need_write_just_set &&
+           test_bit(JOURNAL_NEED_WRITE, &j->flags))
+               bch2_time_stats_update(j->delay_time,
+                                      j->need_write_time);
+
+       clear_bit(JOURNAL_NEED_WRITE, &j->flags);
+
+       closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
+}
+
+/*
+ * Returns true if journal entry is now closed:
+ */
+static bool __journal_entry_close(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct journal_buf *buf = journal_cur_buf(j);
        union journal_res_state old, new;
        u64 v = atomic64_read(&j->reservations.counter);
+       bool set_need_write = false;
+       unsigned sectors;
 
        lockdep_assert_held(&j->lock);
 
        do {
                old.v = new.v = v;
                if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
-                       return JOURNAL_ENTRY_CLOSED;
+                       return true;
 
                if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
                        /* this entry will never be written: */
                        closure_wake_up(&buf->wait);
-                       return JOURNAL_ENTRY_ERROR;
+                       return true;
                }
 
-               if (new.prev_buf_unwritten)
-                       return JOURNAL_ENTRY_INUSE;
+               if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
+                       set_bit(JOURNAL_NEED_WRITE, &j->flags);
+                       j->need_write_time = local_clock();
+                       set_need_write = true;
+               }
 
-               /*
-                * avoid race between setting buf->data->u64s and
-                * journal_res_put starting write:
-                */
-               journal_state_inc(&new);
+               if (new.prev_buf_unwritten)
+                       return false;
 
                new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
                new.idx++;
@@ -119,15 +147,12 @@ static enum {
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
 
-       clear_bit(JOURNAL_NEED_WRITE, &j->flags);
-
        buf->data->u64s         = cpu_to_le32(old.cur_entry_offset);
 
-       j->prev_buf_sectors =
-               vstruct_blocks_plus(buf->data, c->block_bits,
-                                   buf->u64s_reserved) *
-               c->opts.block_size;
-       BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
+       sectors = vstruct_blocks_plus(buf->data, c->block_bits,
+                                     buf->u64s_reserved) << c->block_bits;
+       BUG_ON(sectors > buf->sectors);
+       buf->sectors = sectors;
 
        bkey_extent_init(&buf->key);
 
@@ -163,32 +188,22 @@ static enum {
        bch2_journal_buf_init(j);
 
        cancel_delayed_work(&j->write_work);
-       spin_unlock(&j->lock);
 
        /* ugh - might be called from __journal_res_get() under wait_event() */
        __set_current_state(TASK_RUNNING);
-       bch2_journal_buf_put(j, old.idx, need_write_just_set);
-
-       return JOURNAL_UNLOCKED;
+       bch2_journal_buf_put(j, old.idx, set_need_write);
+       return true;
 }
 
-void bch2_journal_halt(struct journal *j)
+static bool journal_entry_close(struct journal *j)
 {
-       union journal_res_state old, new;
-       u64 v = atomic64_read(&j->reservations.counter);
-
-       do {
-               old.v = new.v = v;
-               if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-                       return;
+       bool ret;
 
-               new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
-       } while ((v = atomic64_cmpxchg(&j->reservations.counter,
-                                      old.v, new.v)) != old.v);
+       spin_lock(&j->lock);
+       ret = __journal_entry_close(j);
+       spin_unlock(&j->lock);
 
-       journal_wake(j);
-       closure_wake_up(&journal_cur_buf(j)->wait);
-       closure_wake_up(&journal_prev_buf(j)->wait);
+       return ret;
 }
 
 /*
@@ -196,17 +211,16 @@ void bch2_journal_halt(struct journal *j)
  * journal reservation - journal entry is open means journal is dirty:
  *
  * returns:
- * 1:          success
- * 0:          journal currently full (must wait)
- * -EROFS:     insufficient rw devices
- * -EIO:       journal error
+ * 0:          success
+ * -ENOSPC:    journal currently full, must invoke reclaim
+ * -EAGAIN:    journal blocked, must wait
+ * -EROFS:     insufficient rw devices or journal error
  */
 static int journal_entry_open(struct journal *j)
 {
        struct journal_buf *buf = journal_cur_buf(j);
        union journal_res_state old, new;
-       ssize_t u64s;
-       int sectors;
+       int u64s, ret;
        u64 v;
 
        lockdep_assert_held(&j->lock);
@@ -216,29 +230,22 @@ static int journal_entry_open(struct journal *j)
                return -EAGAIN;
 
        if (!fifo_free(&j->pin))
-               return 0;
+               return -ENOSPC;
 
-       sectors = bch2_journal_entry_sectors(j);
-       if (sectors <= 0)
-               return sectors;
+       ret = bch2_journal_space_available(j);
+       if (ret)
+               return ret;
 
-       buf->disk_sectors       = sectors;
        buf->u64s_reserved      = j->entry_u64s_reserved;
+       buf->disk_sectors       = j->cur_entry_sectors;
+       buf->sectors            = min(buf->disk_sectors, buf->buf_size >> 9);
 
-       sectors = min_t(unsigned, sectors, buf->size >> 9);
-       j->cur_buf_sectors      = sectors;
-
-       u64s = (sectors << 9) / sizeof(u64);
-
-       /* Subtract the journal header */
-       u64s -= sizeof(struct jset) / sizeof(u64);
-       u64s -= buf->u64s_reserved;
-       u64s  = max_t(ssize_t, 0L, u64s);
-
-       BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
+       u64s = (int) (buf->sectors << 9) / sizeof(u64) -
+               journal_entry_overhead(j);
+       u64s  = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
 
        if (u64s <= le32_to_cpu(buf->data->u64s))
-               return 0;
+               return -ENOSPC;
 
        /*
         * Must be set before marking the journal entry as open:
@@ -250,10 +257,11 @@ static int journal_entry_open(struct journal *j)
                old.v = new.v = v;
 
                if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
-                       return -EIO;
+                       return -EROFS;
 
                /* Handle any already added entries */
                new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
+               journal_state_inc(&new);
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
 
@@ -266,48 +274,16 @@ static int journal_entry_open(struct journal *j)
                         &j->write_work,
                         msecs_to_jiffies(j->write_delay_ms));
        journal_wake(j);
-       return 1;
-}
-
-static bool __journal_entry_close(struct journal *j)
-{
-       bool set_need_write;
-
-       if (!journal_entry_is_open(j)) {
-               spin_unlock(&j->lock);
-               return true;
-       }
-
-       set_need_write = !test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags);
-       if (set_need_write)
-               j->need_write_time = local_clock();
-
-       switch (journal_buf_switch(j, set_need_write)) {
-       case JOURNAL_ENTRY_INUSE:
-               spin_unlock(&j->lock);
-               return false;
-       default:
-               spin_unlock(&j->lock);
-               fallthrough;
-       case JOURNAL_UNLOCKED:
-               return false;
-       }
-}
-
-static bool journal_entry_close(struct journal *j)
-{
-       spin_lock(&j->lock);
-       return __journal_entry_close(j);
+       return 0;
 }
 
 static bool journal_quiesced(struct journal *j)
 {
-       bool ret;
+       union journal_res_state state = READ_ONCE(j->reservations);
+       bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
 
-       spin_lock(&j->lock);
-       ret = !j->reservations.prev_buf_unwritten &&
-               !journal_entry_is_open(j);
-       __journal_entry_close(j);
+       if (!ret)
+               journal_entry_close(j);
        return ret;
 }
 
@@ -357,7 +333,11 @@ retry:
        if (journal_res_get_fast(j, res, flags))
                return 0;
 
+       if (bch2_journal_error(j))
+               return -EROFS;
+
        spin_lock(&j->lock);
+
        /*
         * Recheck after taking the lock, so we don't race with another thread
         * that just did journal_entry_open() and call journal_entry_close()
@@ -375,56 +355,42 @@ retry:
         */
        buf = journal_cur_buf(j);
        if (journal_entry_is_open(j) &&
-           buf->size >> 9 < buf->disk_sectors &&
-           buf->size < JOURNAL_ENTRY_SIZE_MAX)
-               j->buf_size_want = max(j->buf_size_want, buf->size << 1);
+           buf->buf_size >> 9 < buf->disk_sectors &&
+           buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
+               j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
 
-       /*
-        * Close the current journal entry if necessary, then try to start a new
-        * one:
-        */
-       switch (journal_buf_switch(j, false)) {
-       case JOURNAL_ENTRY_ERROR:
-               spin_unlock(&j->lock);
-               return -EROFS;
-       case JOURNAL_ENTRY_INUSE:
+       if (journal_entry_is_open(j) &&
+           !__journal_entry_close(j)) {
                /*
-                * The current journal entry is still open, but we failed to get
-                * a journal reservation because there's not enough space in it,
-                * and we can't close it and start another because we haven't
-                * finished writing out the previous entry:
+                * We failed to get a reservation on the current open journal
+                * entry because it's full, and we can't close it because
+                * there's still a previous one in flight:
                 */
-               spin_unlock(&j->lock);
                trace_journal_entry_full(c);
-               goto blocked;
-       case JOURNAL_ENTRY_CLOSED:
-               break;
-       case JOURNAL_UNLOCKED:
-               goto retry;
+               ret = -EAGAIN;
+       } else {
+               ret = journal_entry_open(j);
        }
 
-       /* We now have a new, closed journal buf - see if we can open it: */
-       ret = journal_entry_open(j);
+       if ((ret == -EAGAIN || ret == -ENOSPC) &&
+           !j->res_get_blocked_start)
+               j->res_get_blocked_start = local_clock() ?: 1;
+
        spin_unlock(&j->lock);
 
-       if (ret < 0)
-               return ret;
-       if (ret)
+       if (!ret)
                goto retry;
+       if (ret == -ENOSPC) {
+               /*
+                * Journal is full - can't rely on reclaim from work item due to
+                * freezing:
+                */
+               trace_journal_full(c);
+               bch2_journal_reclaim_work(&j->reclaim_work.work);
+               ret = -EAGAIN;
+       }
 
-       /* Journal's full, we have to wait */
-
-       /*
-        * Direct reclaim - can't rely on reclaim from work item
-        * due to freezing..
-        */
-       bch2_journal_reclaim_work(&j->reclaim_work.work);
-
-       trace_journal_full(c);
-blocked:
-       if (!j->res_get_blocked_start)
-               j->res_get_blocked_start = local_clock() ?: 1;
-       return -EAGAIN;
+       return ret;
 }
 
 /*
@@ -461,7 +427,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
 
        j->entry_u64s_reserved += d;
        if (d <= 0)
-               goto out_unlock;
+               goto out;
 
        j->cur_entry_u64s -= d;
        smp_mb();
@@ -474,15 +440,12 @@ void bch2_journal_entry_res_resize(struct journal *j,
                 * Not enough room in current journal entry, have to flush it:
                 */
                __journal_entry_close(j);
-               goto out;
+       } else {
+               journal_cur_buf(j)->u64s_reserved += d;
        }
-
-       journal_cur_buf(j)->u64s_reserved += d;
-out_unlock:
-       spin_unlock(&j->lock);
 out:
+       spin_unlock(&j->lock);
        res->u64s += d;
-       return;
 }
 
 /* journal flushing: */
@@ -512,47 +475,47 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        int ret;
-retry:
+
        spin_lock(&j->lock);
 
-       if (seq < journal_cur_seq(j) ||
+       /*
+        * Can't try to open more than one sequence number ahead:
+        */
+       BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
+
+       if (journal_cur_seq(j) > seq ||
            journal_entry_is_open(j)) {
                spin_unlock(&j->lock);
                return 0;
        }
 
-       if (journal_cur_seq(j) < seq) {
-               switch (journal_buf_switch(j, false)) {
-               case JOURNAL_ENTRY_ERROR:
-                       spin_unlock(&j->lock);
-                       return -EROFS;
-               case JOURNAL_ENTRY_INUSE:
-                       /* haven't finished writing out the previous one: */
-                       trace_journal_entry_full(c);
-                       goto blocked;
-               case JOURNAL_ENTRY_CLOSED:
-                       break;
-               case JOURNAL_UNLOCKED:
-                       goto retry;
-               }
-       }
-
-       BUG_ON(journal_cur_seq(j) < seq);
+       if (journal_cur_seq(j) < seq &&
+           !__journal_entry_close(j)) {
+               /* haven't finished writing out the previous one: */
+               trace_journal_entry_full(c);
+               ret = -EAGAIN;
+       } else {
+               BUG_ON(journal_cur_seq(j) != seq);
 
-       ret = journal_entry_open(j);
-       if (ret) {
-               spin_unlock(&j->lock);
-               return ret < 0 ? ret : 0;
+               ret = journal_entry_open(j);
        }
-blocked:
-       if (!j->res_get_blocked_start)
+
+       if ((ret == -EAGAIN || ret == -ENOSPC) &&
+           !j->res_get_blocked_start)
                j->res_get_blocked_start = local_clock() ?: 1;
 
-       closure_wait(&j->async_wait, cl);
+       if (ret == -EAGAIN || ret == -ENOSPC)
+               closure_wait(&j->async_wait, cl);
+
        spin_unlock(&j->lock);
 
-       bch2_journal_reclaim_work(&j->reclaim_work.work);
-       return -EAGAIN;
+       if (ret == -ENOSPC) {
+               trace_journal_full(c);
+               bch2_journal_reclaim_work(&j->reclaim_work.work);
+               ret = -EAGAIN;
+       }
+
+       return ret;
 }
 
 static int journal_seq_error(struct journal *j, u64 seq)
@@ -635,8 +598,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
 
        if (seq == journal_cur_seq(j))
                __journal_entry_close(j);
-       else
-               spin_unlock(&j->lock);
+       spin_unlock(&j->lock);
 }
 
 static int journal_seq_flushed(struct journal *j, u64 seq)
@@ -648,8 +610,7 @@ static int journal_seq_flushed(struct journal *j, u64 seq)
 
        if (seq == journal_cur_seq(j))
                __journal_entry_close(j);
-       else
-               spin_unlock(&j->lock);
+       spin_unlock(&j->lock);
 
        return ret;
 }
@@ -783,7 +744,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                goto err;
 
        journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
-                               nr + sizeof(*journal_buckets) / sizeof(u64));
+                                                nr + sizeof(*journal_buckets) / sizeof(u64));
        if (!journal_buckets)
                goto err;
 
@@ -846,9 +807,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                ja->nr++;
 
                bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
-                               ca->mi.bucket_size,
-                               gc_phase(GC_PHASE_SB),
-                               0);
+                                         ca->mi.bucket_size,
+                                         gc_phase(GC_PHASE_SB),
+                                         0);
 
                if (c) {
                        spin_unlock(&c->journal.lock);
@@ -899,7 +860,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
                 */
 
                if (bch2_disk_reservation_get(c, &disk_res,
-                               bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
+                                             bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
                        mutex_unlock(&c->sb_lock);
                        return -ENOSPC;
                }
@@ -996,7 +957,7 @@ void bch2_fs_journal_start(struct journal *j)
                journal_pin_new_entry(j, 0);
 
        /*
-        * journal_buf_switch() only inits the next journal entry when it
+        * __journal_entry_close() only inits the next journal entry when it
         * closes an open journal entry - the very first journal entry gets
         * initialized here:
         */
@@ -1063,8 +1024,8 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
 
 void bch2_fs_journal_exit(struct journal *j)
 {
-       kvpfree(j->buf[1].data, j->buf[1].size);
-       kvpfree(j->buf[0].data, j->buf[0].size);
+       kvpfree(j->buf[1].data, j->buf[1].buf_size);
+       kvpfree(j->buf[0].data, j->buf[0].buf_size);
        free_fifo(&j->pin);
 }
 
@@ -1088,8 +1049,8 @@ int bch2_fs_journal_init(struct journal *j)
 
        lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
 
-       j->buf[0].size          = JOURNAL_ENTRY_SIZE_MIN;
-       j->buf[1].size          = JOURNAL_ENTRY_SIZE_MIN;
+       j->buf[0].buf_size      = JOURNAL_ENTRY_SIZE_MIN;
+       j->buf[1].buf_size      = JOURNAL_ENTRY_SIZE_MIN;
        j->write_delay_ms       = 1000;
        j->reclaim_delay_ms     = 100;
 
@@ -1102,8 +1063,8 @@ int bch2_fs_journal_init(struct journal *j)
                 { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
 
        if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
-           !(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) ||
-           !(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) {
+           !(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
+           !(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
                ret = -ENOMEM;
                goto out;
        }
index 5290cdeab585c940107c06fad23f7e8fb7d95da0..4acb0f59396dedc6594c3f8dbf60993034dafec4 100644 (file)
@@ -179,6 +179,11 @@ static inline unsigned jset_u64s(unsigned u64s)
        return u64s + sizeof(struct jset_entry) / sizeof(u64);
 }
 
+static inline int journal_entry_overhead(struct journal *j)
+{
+       return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
+}
+
 static inline struct jset_entry *
 bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
 {
@@ -225,7 +230,7 @@ static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *
                               id, 0, k, k->k.u64s);
 }
 
-void bch2_journal_buf_put_slowpath(struct journal *, bool);
+void __bch2_journal_buf_put(struct journal *, bool);
 
 static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
                                       bool need_write_just_set)
@@ -236,17 +241,10 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
                                    .buf0_count = idx == 0,
                                    .buf1_count = idx == 1,
                                    }).v, &j->reservations.counter);
-
-       EBUG_ON(s.idx != idx && !s.prev_buf_unwritten);
-
-       /*
-        * Do not initiate a journal write if the journal is in an error state
-        * (previous journal entry write may have failed)
-        */
-       if (s.idx != idx &&
-           !journal_state_count(s, idx) &&
-           s.cur_entry_offset != JOURNAL_ENTRY_ERROR_VAL)
-               bch2_journal_buf_put_slowpath(j, need_write_just_set);
+       if (!journal_state_count(s, idx)) {
+               EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
+               __bch2_journal_buf_put(j, need_write_just_set);
+       }
 }
 
 /*
@@ -333,6 +331,8 @@ out:
        return 0;
 }
 
+/* journal_entry_res: */
+
 void bch2_journal_entry_res_resize(struct journal *,
                                   struct journal_entry_res *,
                                   unsigned);
index 17eba4269719b2554f14bab419b0e724c06b5383..e5e50be80126fb798ef4eb53da80e5c4c4cc2eba 100644 (file)
@@ -902,13 +902,16 @@ static unsigned journal_dev_buckets_available(struct journal *j,
        return available;
 }
 
-/* returns number of sectors available for next journal entry: */
-int bch2_journal_entry_sectors(struct journal *j)
+int bch2_journal_space_available(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
-       unsigned sectors_available = UINT_MAX;
+       unsigned sectors_next_entry     = UINT_MAX;
        unsigned i, nr_online = 0, nr_devs = 0;
+       unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
+               ? journal_prev_buf(j)->sectors
+               : 0;
+       int ret = 0;
 
        lockdep_assert_held(&j->lock);
 
@@ -921,16 +924,16 @@ int bch2_journal_entry_sectors(struct journal *j)
                if (!ja->nr)
                        continue;
 
+               nr_online++;
+
                buckets_this_device = journal_dev_buckets_available(j, ja);
                sectors_this_device = ja->sectors_free;
 
-               nr_online++;
-
                /*
                 * We that we don't allocate the space for a journal entry
                 * until we write it out - thus, account for it here:
                 */
-               if (j->prev_buf_sectors >= sectors_this_device) {
+               if (unwritten_sectors >= sectors_this_device) {
                        if (!buckets_this_device)
                                continue;
 
@@ -938,7 +941,7 @@ int bch2_journal_entry_sectors(struct journal *j)
                        sectors_this_device = ca->mi.bucket_size;
                }
 
-               sectors_this_device -= j->prev_buf_sectors;
+               sectors_this_device -= unwritten_sectors;
 
                if (buckets_this_device)
                        sectors_this_device = ca->mi.bucket_size;
@@ -946,19 +949,26 @@ int bch2_journal_entry_sectors(struct journal *j)
                if (!sectors_this_device)
                        continue;
 
-               sectors_available = min(sectors_available,
-                                       sectors_this_device);
+               sectors_next_entry = min(sectors_next_entry,
+                                        sectors_this_device);
+
                nr_devs++;
        }
        rcu_read_unlock();
 
-       if (nr_online < c->opts.metadata_replicas_required)
-               return -EROFS;
+       if (nr_online < c->opts.metadata_replicas_required) {
+               ret = -EROFS;
+               sectors_next_entry = 0;
+       } else if (!sectors_next_entry ||
+                  nr_devs < min_t(unsigned, nr_online,
+                                  c->opts.metadata_replicas)) {
+               ret = -ENOSPC;
+               sectors_next_entry = 0;
+       }
 
-       if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas))
-               return 0;
+       WRITE_ONCE(j->cur_entry_sectors, sectors_next_entry);
 
-       return sectors_available;
+       return ret;
 }
 
 static void __journal_write_alloc(struct journal *j,
@@ -1059,9 +1069,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
        __journal_write_alloc(j, w, &devs_sorted,
                              sectors, &replicas, replicas_want);
 done:
-       if (replicas >= replicas_want)
-               j->prev_buf_sectors = 0;
-
        spin_unlock(&j->lock);
        rcu_read_unlock();
 
@@ -1117,17 +1124,17 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
        unsigned new_size = READ_ONCE(j->buf_size_want);
        void *new_buf;
 
-       if (buf->size >= new_size)
+       if (buf->buf_size >= new_size)
                return;
 
        new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
        if (!new_buf)
                return;
 
-       memcpy(new_buf, buf->data, buf->size);
-       kvpfree(buf->data, buf->size);
+       memcpy(new_buf, buf->data, buf->buf_size);
+       kvpfree(buf->data, buf->buf_size);
        buf->data       = new_buf;
-       buf->size       = new_size;
+       buf->buf_size   = new_size;
 }
 
 static void journal_write_done(struct closure *cl)
@@ -1227,15 +1234,14 @@ void bch2_journal_write(struct closure *cl)
 
        j->write_start_time = local_clock();
 
-       start   = vstruct_last(w->data);
+       start   = vstruct_last(jset);
        end     = bch2_journal_super_entries_add_common(c, start,
                                                le64_to_cpu(jset->seq));
        u64s    = (u64 *) end - (u64 *) start;
        BUG_ON(u64s > j->entry_u64s_reserved);
 
-       le32_add_cpu(&w->data->u64s, u64s);
-       BUG_ON(vstruct_sectors(jset, c->block_bits) >
-              w->disk_sectors);
+       le32_add_cpu(&jset->u64s, u64s);
+       BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
 
        journal_write_compact(jset);
 
@@ -1273,10 +1279,10 @@ void bch2_journal_write(struct closure *cl)
                goto err;
 
        sectors = vstruct_sectors(jset, c->block_bits);
-       BUG_ON(sectors > j->prev_buf_sectors);
+       BUG_ON(sectors > w->sectors);
 
-       bytes = vstruct_bytes(w->data);
-       memset((void *) w->data + bytes, 0, (sectors << 9) - bytes);
+       bytes = vstruct_bytes(jset);
+       memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
 
        if (journal_write_alloc(j, w, sectors)) {
                bch2_journal_halt(j);
@@ -1286,6 +1292,12 @@ void bch2_journal_write(struct closure *cl)
                return;
        }
 
+       /*
+        * write is allocated, no longer need to account for it in
+        * bch2_journal_entry_sectors:
+        */
+       w->sectors = 0;
+
        /*
         * XXX: we really should just disable the entire journal in nochanges
         * mode
@@ -1316,7 +1328,7 @@ void bch2_journal_write(struct closure *cl)
                trace_journal_write(bio);
                closure_bio_submit(bio, cl);
 
-               ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq);
+               ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
        }
 
        for_each_rw_member(ca, c, i)
index e19e549baf8a7d2c6f3d51d252cbba09f5aa4038..d1409039724dbaeef877c19fb81f411b588b36ea 100644 (file)
@@ -40,7 +40,7 @@ int bch2_journal_read(struct bch_fs *, struct list_head *);
 void bch2_journal_entries_free(struct list_head *);
 int bch2_journal_replay(struct bch_fs *, struct list_head *);
 
-int bch2_journal_entry_sectors(struct journal *);
+int bch2_journal_space_available(struct journal *);
 void bch2_journal_write(struct closure *);
 
 #endif /* _BCACHEFS_JOURNAL_IO_H */
index e952eb06eff52739525ad36a60ee46671231452b..3372e87be1245c68145e89b63c72dc83b4bcb094 100644 (file)
@@ -22,8 +22,10 @@ struct journal_buf {
 
        struct closure_waitlist wait;
 
-       unsigned                size;
-       unsigned                disk_sectors;
+       unsigned                buf_size;       /* size in bytes of @data */
+       unsigned                sectors;        /* maximum size for current entry */
+       unsigned                disk_sectors;   /* maximum size entry could have been, if
+                                                  buf_size was bigger */
        unsigned                u64s_reserved;
        /* bloom filter: */
        unsigned long           has_inode[1024 / sizeof(unsigned long)];
@@ -129,9 +131,14 @@ struct journal {
        unsigned long           flags;
 
        union journal_res_state reservations;
+
+       /* Max size of current journal entry */
        unsigned                cur_entry_u64s;
-       unsigned                prev_buf_sectors;
-       unsigned                cur_buf_sectors;
+       unsigned                cur_entry_sectors;
+
+       /* Reserved space in journal entry to be used just prior to write */
+       unsigned                entry_u64s_reserved;
+
        unsigned                buf_size_want;
 
        /*
@@ -159,9 +166,6 @@ struct journal {
        u64                     seq_ondisk;
        u64                     last_seq_ondisk;
 
-       /* Reserved space in journal entry to be used just prior to write */
-       unsigned                entry_u64s_reserved;
-
        /*
         * FIFO of journal entries whose btree updates have not yet been
         * written out.