xfs: attach iclog callbacks in xlog_cil_set_ctx_write_state()
authorDave Chinner <dchinner@redhat.com>
Wed, 11 Aug 2021 01:00:43 +0000 (18:00 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Mon, 16 Aug 2021 19:09:30 +0000 (12:09 -0700)
Now that we have a mechanism to guarantee that the callbacks
attached to an iclog are owned by the context that attaches them
until they drop their reference to the iclog via
xlog_state_release_iclog(), we can attach callbacks to the iclog at
any time we have an active reference to the iclog.

xlog_state_get_iclog_space() always guarantees that the commit
record will fit in the iclog it returns, so we can move this IO
callback setting to xlog_cil_set_ctx_write_state(), record the
commit iclog in the context and remove the need for the commit iclog
to be returned by xlog_write() altogether.

This, in turn, allows us to move the wakeup for ordered commit
record writes up into xlog_cil_set_ctx_write_state(), too, because
we have been guaranteed that this commit record will be physically
located in the iclog before any waiting commit record at a higher
sequence number will be granted iclog space.

This further cleans up the post commit record write processing in
the CIL push code, especially as xlog_state_release_iclog() will now
clean up the context when shutdown errors occur.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
fs/xfs/xfs_log.c
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_priv.h

index 6ac5d52f573d47f0474738b2af6b6e0230764b59..cc65b3a13ad2831574c50942ae1698eecb24b5ba 100644 (file)
@@ -933,7 +933,7 @@ xlog_write_unmount_record(
        /* account for space used by record data */
        ticket->t_curr_res -= sizeof(ulf);
 
-       return xlog_write(log, NULL, &vec, ticket, NULL, XLOG_UNMOUNT_TRANS);
+       return xlog_write(log, NULL, &vec, ticket, XLOG_UNMOUNT_TRANS);
 }
 
 /*
@@ -2380,8 +2380,7 @@ xlog_write_copy_finish(
        int                     *data_cnt,
        int                     *partial_copy,
        int                     *partial_copy_len,
-       int                     log_offset,
-       struct xlog_in_core     **commit_iclog)
+       int                     log_offset)
 {
        int                     error;
 
@@ -2400,27 +2399,20 @@ xlog_write_copy_finish(
        *partial_copy = 0;
        *partial_copy_len = 0;
 
-       if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
-               /* no more space in this iclog - push it. */
-               spin_lock(&log->l_icloglock);
-               xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
-               *record_cnt = 0;
-               *data_cnt = 0;
-
-               if (iclog->ic_state == XLOG_STATE_ACTIVE)
-                       xlog_state_switch_iclogs(log, iclog, 0);
-               else
-                       ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
-                               xlog_is_shutdown(log));
-               if (!commit_iclog)
-                       goto release_iclog;
-               spin_unlock(&log->l_icloglock);
-               ASSERT(flags & XLOG_COMMIT_TRANS);
-               *commit_iclog = iclog;
-       }
+       if (iclog->ic_size - log_offset > sizeof(xlog_op_header_t))
+               return 0;
 
-       return 0;
+       /* no more space in this iclog - push it. */
+       spin_lock(&log->l_icloglock);
+       xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
+       *record_cnt = 0;
+       *data_cnt = 0;
 
+       if (iclog->ic_state == XLOG_STATE_ACTIVE)
+               xlog_state_switch_iclogs(log, iclog, 0);
+       else
+               ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
+                       xlog_is_shutdown(log));
 release_iclog:
        error = xlog_state_release_iclog(log, iclog, 0);
        spin_unlock(&log->l_icloglock);
@@ -2473,7 +2465,6 @@ xlog_write(
        struct xfs_cil_ctx      *ctx,
        struct xfs_log_vec      *log_vector,
        struct xlog_ticket      *ticket,
-       struct xlog_in_core     **commit_iclog,
        uint                    optype)
 {
        struct xlog_in_core     *iclog = NULL;
@@ -2602,8 +2593,7 @@ xlog_write(
                                                       &record_cnt, &data_cnt,
                                                       &partial_copy,
                                                       &partial_copy_len,
-                                                      log_offset,
-                                                      commit_iclog);
+                                                      log_offset);
                        if (error)
                                return error;
 
@@ -2641,12 +2631,7 @@ next_lv:
 
        spin_lock(&log->l_icloglock);
        xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-       if (commit_iclog) {
-               ASSERT(optype & XLOG_COMMIT_TRANS);
-               *commit_iclog = iclog;
-       } else {
-               error = xlog_state_release_iclog(log, iclog, 0);
-       }
+       error = xlog_state_release_iclog(log, iclog, 0);
        spin_unlock(&log->l_icloglock);
 
        return error;
index 38917c97a5824bec7beb5e2fa6b8a523742c7f84..ae6449fd5cf2fff645f40ca69a4ed983194f233d 100644 (file)
@@ -646,11 +646,41 @@ xlog_cil_set_ctx_write_state(
        xfs_lsn_t               lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 
        ASSERT(!ctx->commit_lsn);
-       spin_lock(&cil->xc_push_lock);
-       if (!ctx->start_lsn)
+       if (!ctx->start_lsn) {
+               spin_lock(&cil->xc_push_lock);
                ctx->start_lsn = lsn;
-       else
-               ctx->commit_lsn = lsn;
+               spin_unlock(&cil->xc_push_lock);
+               return;
+       }
+
+       /*
+        * Take a reference to the iclog for the context so that we still hold
+        * it when xlog_write is done and has released it. This means the
+        * context controls when the iclog is released for IO.
+        */
+       atomic_inc(&iclog->ic_refcnt);
+
+       /*
+        * xlog_state_get_iclog_space() guarantees there is enough space in the
+        * iclog for an entire commit record, so we can attach the context
+        * callbacks now.  This needs to be done before we make the commit_lsn
+        * visible to waiters so that checkpoints with commit records in the
+        * same iclog order their IO completion callbacks in the same order that
+        * the commit records appear in the iclog.
+        */
+       spin_lock(&cil->xc_log->l_icloglock);
+       list_add_tail(&ctx->iclog_entry, &iclog->ic_callbacks);
+       spin_unlock(&cil->xc_log->l_icloglock);
+
+       /*
+        * Now we can record the commit LSN and wake anyone waiting for this
+        * sequence to have the ordered commit record assigned to a physical
+        * location in the log.
+        */
+       spin_lock(&cil->xc_push_lock);
+       ctx->commit_iclog = iclog;
+       ctx->commit_lsn = lsn;
+       wake_up_all(&cil->xc_commit_wait);
        spin_unlock(&cil->xc_push_lock);
 }
 
@@ -707,8 +737,7 @@ restart:
  */
 static int
 xlog_cil_write_commit_record(
-       struct xfs_cil_ctx      *ctx,
-       struct xlog_in_core     **iclog)
+       struct xfs_cil_ctx      *ctx)
 {
        struct xlog             *log = ctx->cil->xc_log;
        struct xfs_log_iovec    reg = {
@@ -725,8 +754,7 @@ xlog_cil_write_commit_record(
        if (xlog_is_shutdown(log))
                return -EIO;
 
-       error = xlog_write(log, ctx, &vec, ctx->ticket, iclog,
-                       XLOG_COMMIT_TRANS);
+       error = xlog_write(log, ctx, &vec, ctx->ticket, XLOG_COMMIT_TRANS);
        if (error)
                xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
        return error;
@@ -756,7 +784,6 @@ xlog_cil_push_work(
        struct xfs_log_vec      *lv;
        struct xfs_cil_ctx      *ctx;
        struct xfs_cil_ctx      *new_ctx;
-       struct xlog_in_core     *commit_iclog;
        struct xlog_ticket      *tic;
        int                     num_iovecs;
        int                     error = 0;
@@ -945,7 +972,7 @@ xlog_cil_push_work(
         */
        wait_for_completion(&bdev_flush);
 
-       error = xlog_write(log, ctx, &lvhdr, tic, NULL, XLOG_START_TRANS);
+       error = xlog_write(log, ctx, &lvhdr, tic, XLOG_START_TRANS);
        if (error)
                goto out_abort_free_ticket;
 
@@ -953,36 +980,12 @@ xlog_cil_push_work(
        if (error)
                goto out_abort_free_ticket;
 
-       error = xlog_cil_write_commit_record(ctx, &commit_iclog);
+       error = xlog_cil_write_commit_record(ctx);
        if (error)
                goto out_abort_free_ticket;
 
        xfs_log_ticket_ungrant(log, tic);
 
-       /*
-        * Once we attach the ctx to the iclog, it is effectively owned by the
-        * iclog and we can only use it while we still have an active reference
-        * to the iclog. i.e. once we call xlog_state_release_iclog() we can no
-        * longer safely reference the ctx.
-        */
-       spin_lock(&log->l_icloglock);
-       if (xlog_is_shutdown(log)) {
-               spin_unlock(&log->l_icloglock);
-               goto out_abort;
-       }
-       ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
-                     commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
-       list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
-
-       /*
-        * now the checkpoint commit is complete and we've attached the
-        * callbacks to the iclog we can assign the commit LSN to the context
-        * and wake up anyone who is waiting for the commit to complete.
-        */
-       spin_lock(&cil->xc_push_lock);
-       wake_up_all(&cil->xc_commit_wait);
-       spin_unlock(&cil->xc_push_lock);
-
        /*
         * If the checkpoint spans multiple iclogs, wait for all previous iclogs
         * to complete before we submit the commit_iclog. We can't use state
@@ -995,17 +998,18 @@ xlog_cil_push_work(
         * iclog header lsn and compare it to the commit lsn to determine if we
         * need to wait on iclogs or not.
         */
+       spin_lock(&log->l_icloglock);
        if (ctx->start_lsn != ctx->commit_lsn) {
                xfs_lsn_t       plsn;
 
-               plsn = be64_to_cpu(commit_iclog->ic_prev->ic_header.h_lsn);
+               plsn = be64_to_cpu(ctx->commit_iclog->ic_prev->ic_header.h_lsn);
                if (plsn && XFS_LSN_CMP(plsn, ctx->commit_lsn) < 0) {
                        /*
                         * Waiting on ic_force_wait orders the completion of
                         * iclogs older than ic_prev. Hence we only need to wait
                         * on the most recent older iclog here.
                         */
-                       xlog_wait_on_iclog(commit_iclog->ic_prev);
+                       xlog_wait_on_iclog(ctx->commit_iclog->ic_prev);
                        spin_lock(&log->l_icloglock);
                }
 
@@ -1013,7 +1017,7 @@ xlog_cil_push_work(
                 * We need to issue a pre-flush so that the ordering for this
                 * checkpoint is correctly preserved down to stable storage.
                 */
-               commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+               ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
        }
 
        /*
@@ -1021,8 +1025,8 @@ xlog_cil_push_work(
         * journal IO vs metadata writeback IO is correctly ordered on stable
         * storage.
         */
-       commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
-       xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn);
+       ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
+       xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn);
 
        /* Not safe to reference ctx now! */
 
@@ -1037,9 +1041,15 @@ out_skip:
 
 out_abort_free_ticket:
        xfs_log_ticket_ungrant(log, tic);
-out_abort:
        ASSERT(xlog_is_shutdown(log));
-       xlog_cil_committed(ctx);
+       if (!ctx->commit_iclog) {
+               xlog_cil_committed(ctx);
+               return;
+       }
+       spin_lock(&log->l_icloglock);
+       xlog_state_release_iclog(log, ctx->commit_iclog, 0);
+       /* Not safe to reference ctx now! */
+       spin_unlock(&log->l_icloglock);
 }
 
 /*
index 09156399b46c05aac8dd4158790ad4d9ce62e5c7..e0934e6aaf8ab3df534bc199ac669769b1aa1055 100644 (file)
@@ -240,6 +240,7 @@ struct xfs_cil_ctx {
        xfs_csn_t               sequence;       /* chkpt sequence # */
        xfs_lsn_t               start_lsn;      /* first LSN of chkpt commit */
        xfs_lsn_t               commit_lsn;     /* chkpt commit record lsn */
+       struct xlog_in_core     *commit_iclog;
        struct xlog_ticket      *ticket;        /* chkpt ticket */
        int                     nvecs;          /* number of regions */
        int                     space_used;     /* aggregate size of regions */
@@ -514,7 +515,7 @@ void        xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
 void   xlog_print_trans(struct xfs_trans *);
 int    xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
                struct xfs_log_vec *log_vector, struct xlog_ticket *tic,
-               struct xlog_in_core **commit_iclog, uint optype);
+               uint optype);
 void   xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
 void   xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);