xfs: allow read IO and FICLONE to run concurrently

author Catherine Hoang <catherine.hoang@oracle.com>

Tue, 17 Oct 2023 20:12:08 +0000 (13:12 -0700)

committer Chandan Babu R <chandanbabu@kernel.org>

Mon, 23 Oct 2023 06:32:26 +0000 (12:02 +0530)
author Catherine Hoang <catherine.hoang@oracle.com>
Tue, 17 Oct 2023 20:12:08 +0000 (13:12 -0700)
committer Chandan Babu R <chandanbabu@kernel.org>
Mon, 23 Oct 2023 06:32:26 +0000 (12:02 +0530)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 203700278ddbb620a6e9d7c95b68a86f4ea52c63..e33e5e13b95f462ffe33c232739af6c4762758a0 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -214,6 +214,43 @@ xfs_ilock_iocb(
         return 0;
  }
  
+static int
+xfs_ilock_iocb_for_write(
+       struct kiocb            *iocb,
+       unsigned int            *lock_mode)
+{
+       ssize_t                 ret;
+       struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
+
+       ret = xfs_ilock_iocb(iocb, *lock_mode);
+       if (ret)
+               return ret;
+
+       if (*lock_mode == XFS_IOLOCK_EXCL)
+               return 0;
+       if (!xfs_iflags_test(ip, XFS_IREMAPPING))
+               return 0;
+
+       xfs_iunlock(ip, *lock_mode);
+       *lock_mode = XFS_IOLOCK_EXCL;
+       return xfs_ilock_iocb(iocb, *lock_mode);
+}
+
+static unsigned int
+xfs_ilock_for_write_fault(
+       struct xfs_inode        *ip)
+{
+       /* get a shared lock if no remapping in progress */
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       if (!xfs_iflags_test(ip, XFS_IREMAPPING))
+               return XFS_MMAPLOCK_SHARED;
+
+       /* wait for remapping to complete */
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       return XFS_MMAPLOCK_EXCL;
+}
+
  STATIC ssize_t
  xfs_file_dio_read(
         struct kiocb            *iocb,
@@ -551,7 +588,7 @@ xfs_file_dio_write_aligned(
         unsigned int            iolock = XFS_IOLOCK_SHARED;
         ssize_t                 ret;
  
-       ret = xfs_ilock_iocb(iocb, iolock);
+       ret = xfs_ilock_iocb_for_write(iocb, &iolock);
         if (ret)
                 return ret;
         ret = xfs_file_write_checks(iocb, from, &iolock);
@@ -618,7 +655,7 @@ retry_exclusive:
                 flags = IOMAP_DIO_FORCE_WAIT;
         }
  
-       ret = xfs_ilock_iocb(iocb, iolock);
+       ret = xfs_ilock_iocb_for_write(iocb, &iolock);
         if (ret)
                 return ret;
  
@@ -1180,7 +1217,7 @@ xfs_file_remap_range(
         if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
                 xfs_log_force_inode(dest);
  out_unlock:
-       xfs_iunlock2_io_mmap(src, dest);
+       xfs_iunlock2_remapping(src, dest);
         if (ret)
                 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
         return remapped > 0 ? remapped : ret;
@@ -1328,6 +1365,7 @@ __xfs_filemap_fault(
         struct inode            *inode = file_inode(vmf->vma->vm_file);
         struct xfs_inode        *ip = XFS_I(inode);
         vm_fault_t              ret;
+       unsigned int            lock_mode = 0;
  
         trace_xfs_filemap_fault(ip, order, write_fault);
  
@@ -1336,25 +1374,24 @@ __xfs_filemap_fault(
                 file_update_time(vmf->vma->vm_file);
         }
  
+       if (IS_DAX(inode) || write_fault)
+               lock_mode = xfs_ilock_for_write_fault(XFS_I(inode));
+
         if (IS_DAX(inode)) {
                 pfn_t pfn;
  
-               xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
                 ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
                 if (ret & VM_FAULT_NEEDDSYNC)
                         ret = dax_finish_sync_fault(vmf, order, pfn);
-               xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+       } else if (write_fault) {
+               ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
         } else {
-               if (write_fault) {
-                       xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-                       ret = iomap_page_mkwrite(vmf,
-                                       &xfs_page_mkwrite_iomap_ops);
-                       xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-               } else {
-                       ret = filemap_fault(vmf);
-               }
+               ret = filemap_fault(vmf);
         }
  
+       if (lock_mode)
+               xfs_iunlock(XFS_I(inode), lock_mode);
+
         if (write_fault)
                 sb_end_pagefault(inode->i_sb);
         return ret;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index fb85c5c8174523c60d11884e989f7d2331e6e14b..f9d29acd72b9eea15fd108420d5d86a839696cce 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3628,6 +3628,23 @@ xfs_iunlock2_io_mmap(
                 inode_unlock(VFS_I(ip1));
  }
  
+/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */
+void
+xfs_iunlock2_remapping(
+       struct xfs_inode        *ip1,
+       struct xfs_inode        *ip2)
+{
+       xfs_iflags_clear(ip1, XFS_IREMAPPING);
+
+       if (ip1 != ip2)
+               xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED);
+       xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
+
+       if (ip1 != ip2)
+               inode_unlock_shared(VFS_I(ip1));
+       inode_unlock(VFS_I(ip2));
+}
+
  /*
   * Reload the incore inode list for this inode.  Caller should ensure that
   * the link count cannot change, either by taking ILOCK_SHARED or otherwise
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 0c5bdb91152e1cf920cfb5cb76268d0b025640fc..3dc47937da5d17d81e46fd590435f574d11558dc 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -347,6 +347,14 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
  /* Quotacheck is running but inode has not been added to quota counts. */
  #define XFS_IQUOTAUNCHECKED    (1 << 14)
  
+/*
+ * Remap in progress. Callers that wish to update file data while
+ * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake
+ * the lock in exclusive mode. Relocking the file will block until
+ * IREMAPPING is cleared.
+ */
+#define XFS_IREMAPPING         (1U << 15)
+
  /* All inode state flags related to inode reclaim. */
  #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
                                  XFS_IRECLAIM | \
@@ -595,6 +603,7 @@ void xfs_end_io(struct work_struct *work);
  
  int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
  void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
+void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2);
  
  static inline bool
  xfs_inode_unlinked_incomplete(
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index eb9102453affbf34c9378a7b354de22e16f6d7a3..658edee8381dcdca656135a74a9b5a1b6e13a8cb 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep(
         if (ret)
                 goto out_unlock;
  
+       xfs_iflags_set(src, XFS_IREMAPPING);
+       if (inode_in != inode_out)
+               xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
+
         return 0;
  out_unlock:
         xfs_iunlock2_io_mmap(src, dest);
author	Catherine Hoang <catherine.hoang@oracle.com>
	Tue, 17 Oct 2023 20:12:08 +0000 (13:12 -0700)
committer	Chandan Babu R <chandanbabu@kernel.org>
	Mon, 23 Oct 2023 06:32:26 +0000 (12:02 +0530)
fs/xfs/xfs_file.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.h		patch \| blob \| blame \| history
fs/xfs/xfs_reflink.c		patch \| blob \| blame \| history