xfs: commit CoW-based atomic writes atomically

author John Garry <john.g.garry@oracle.com>

Wed, 7 May 2025 21:18:31 +0000 (14:18 -0700)

committer Darrick J. Wong <djwong@kernel.org>

Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)
author John Garry <john.g.garry@oracle.com>
Wed, 7 May 2025 21:18:31 +0000 (14:18 -0700)
committer Darrick J. Wong <djwong@kernel.org>
Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c

index d3bd6a86c8fe9b4188a244c8da73cede09135db6..34bba96d30ca7ec2e49228eeaa58a769a6d24002 100644 (file)
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -91,6 +91,7 @@ xfs_log_calc_trans_resv_for_minlogblocks(
          */
         if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
                 xfs_trans_resv_calc(mp, resv);
+               resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
                 return;
         }
  
@@ -107,6 +108,9 @@ xfs_log_calc_trans_resv_for_minlogblocks(
  
         xfs_trans_resv_calc(mp, resv);
  
+       /* Copy the dynamic transaction reservation types from the running fs */
+       resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
+
         if (xfs_has_reflink(mp)) {
                 /*
                  * In the early days of reflink, typical log operation counts
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c

index 580d00ae28573dcf27181db55999d8ac30b72f5c..a841432abf834c271b6021626a9509ef06a3f882 100644 (file)
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -1284,6 +1284,15 @@ xfs_calc_namespace_reservations(
         resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
  }
  
+STATIC void
+xfs_calc_default_atomic_ioend_reservation(
+       struct xfs_mount        *mp,
+       struct xfs_trans_resv   *resp)
+{
+       /* Pick a default that will scale reasonably for the log size. */
+       resp->tr_atomic_ioend = resp->tr_itruncate;
+}
+
  void
  xfs_trans_resv_calc(
         struct xfs_mount        *mp,
@@ -1378,4 +1387,10 @@ xfs_trans_resv_calc(
         resp->tr_itruncate.tr_logcount += logcount_adj;
         resp->tr_write.tr_logcount += logcount_adj;
         resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
+
+       /*
+        * Now that we've finished computing the static reservations, we can
+        * compute the dynamic reservation for atomic writes.
+        */
+       xfs_calc_default_atomic_ioend_reservation(mp, resp);
  }
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h

index d9d0032cbbc5d4a8a3e1ccdfaa289b1304a4112a..670045d417a65fd6ac04a0fb449351d30ba2e648 100644 (file)
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -48,6 +48,7 @@ struct xfs_trans_resv {
         struct xfs_trans_res    tr_qm_dqalloc;  /* allocate quota on disk */
         struct xfs_trans_res    tr_sb;          /* modify superblock */
         struct xfs_trans_res    tr_fsyncts;     /* update timestamps on fsync */
+       struct xfs_trans_res    tr_atomic_ioend; /* untorn write completion */
  };
  
  /* shorthand way of accessing reservation structure */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index e8acd6ca8f278656ca688ee3dc50572f1ee8a87a..32883ec8ca2e0e452a821e5885df86bb56c02f18 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -576,7 +576,10 @@ xfs_dio_write_end_io(
         nofs_flag = memalloc_nofs_save();
  
         if (flags & IOMAP_DIO_COW) {
-               error = xfs_reflink_end_cow(ip, offset, size);
+               if (iocb->ki_flags & IOCB_ATOMIC)
+                       error = xfs_reflink_end_atomic_cow(ip, offset, size);
+               else
+                       error = xfs_reflink_end_cow(ip, offset, size);
                 if (error)
                         goto out;
         }
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index f5d3389160984f316cf0dc1aadb16f40dc1d1d7f..218dee76768b71968eea38673e64654362b9b71d 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -984,6 +984,62 @@ xfs_reflink_end_cow(
         return error;
  }
  
+/*
+ * Fully remap all of the file's data fork at once, which is the critical part
+ * in achieving atomic behaviour.
+ * The regular CoW end path does not use function as to keep the block
+ * reservation per transaction as low as possible.
+ */
+int
+xfs_reflink_end_atomic_cow(
+       struct xfs_inode                *ip,
+       xfs_off_t                       offset,
+       xfs_off_t                       count)
+{
+       xfs_fileoff_t                   offset_fsb;
+       xfs_fileoff_t                   end_fsb;
+       int                             error = 0;
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_trans                *tp;
+       unsigned int                    resblks;
+
+       trace_xfs_reflink_end_cow(ip, offset, count);
+
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       end_fsb = XFS_B_TO_FSB(mp, offset + count);
+
+       /*
+        * Each remapping operation could cause a btree split, so in the worst
+        * case that's one for each block.
+        */
+       resblks = (end_fsb - offset_fsb) *
+                       XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_atomic_ioend, resblks, 0,
+                       XFS_TRANS_RESERVE, &tp);
+       if (error)
+               return error;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, 0);
+
+       while (end_fsb > offset_fsb && !error) {
+               error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
+                               end_fsb);
+       }
+       if (error) {
+               trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
+               goto out_cancel;
+       }
+       error = xfs_trans_commit(tp);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+out_cancel:
+       xfs_trans_cancel(tp);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+}
+
  /*
   * Free all CoW staging blocks that are still referenced by the ondisk refcount
   * metadata.  The ondisk metadata does not track which inode created the
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h

index 379619f2424784bb8820c664754e8320878519ea..412e9b6f2082fc4d24118365158a9bd3c91e5921 100644 (file)
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -45,6 +45,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
                 xfs_off_t count, bool cancel_real);
  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                 xfs_off_t count);
+int xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
+               xfs_off_t count);
  extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
  extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
                 struct file *file_out, loff_t pos_out, loff_t len,
author	John Garry <john.g.garry@oracle.com>
	Wed, 7 May 2025 21:18:31 +0000 (14:18 -0700)
committer	Darrick J. Wong <djwong@kernel.org>
	Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)
fs/xfs/libxfs/xfs_log_rlimit.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_trans_resv.c		patch \| blob \| blame \| history
fs/xfs/libxfs/xfs_trans_resv.h		patch \| blob \| blame \| history
fs/xfs/xfs_file.c		patch \| blob \| blame \| history
fs/xfs/xfs_reflink.c		patch \| blob \| blame \| history
fs/xfs/xfs_reflink.h		patch \| blob \| blame \| history