xfs: update atomic write limits
authorJohn Garry <john.g.garry@oracle.com>
Wed, 7 May 2025 21:18:33 +0000 (14:18 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Wed, 7 May 2025 21:25:32 +0000 (14:25 -0700)
Update the limits returned from xfs_get_atomic_write_{min, max, max_opt)().

No reflink support always means no CoW-based atomic writes.

For updating xfs_get_atomic_write_min(), we support blocksize only and that
depends on HW or reflink support.

For updating xfs_get_atomic_write_max(), for no reflink, we are limited to
blocksize but only if HW support. Otherwise we are limited to combined
limit in mp->m_atomic_write_unit_max.

For updating xfs_get_atomic_write_max_opt(), ultimately we are limited by
the bdev atomic write limit. If xfs_get_atomic_write_max() does not report
 > 1x blocksize, then just continue to report 0 as before.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: update comments in the helper functions]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
fs/xfs/xfs_file.c
fs/xfs/xfs_iops.c

index f4a66ff857481377eedb1bce0e1b17b18608bb80..48254a72071bc8213b08e1051a9712a0937cbb11 100644 (file)
@@ -1557,7 +1557,7 @@ xfs_file_open(
        if (xfs_is_shutdown(XFS_M(inode->i_sb)))
                return -EIO;
        file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
-       if (xfs_inode_can_hw_atomic_write(XFS_I(inode)))
+       if (xfs_get_atomic_write_min(XFS_I(inode)) > 0)
                file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
        return generic_file_open(inode, file);
 }
index 77a0606e9dc9c311cb6a78b6152a61c4c0e16249..8cddbb7c149bec5add19cfe6b3c0ca7afb9a9364 100644 (file)
@@ -605,27 +605,67 @@ unsigned int
 xfs_get_atomic_write_min(
        struct xfs_inode        *ip)
 {
-       if (!xfs_inode_can_hw_atomic_write(ip))
-               return 0;
+       struct xfs_mount        *mp = ip->i_mount;
+
+       /*
+        * If we can complete an atomic write via atomic out of place writes,
+        * then advertise a minimum size of one fsblock.  Without this
+        * mechanism, we can only guarantee atomic writes up to a single LBA.
+        *
+        * If out of place writes are not available, we can guarantee an atomic
+        * write of exactly one single fsblock if the bdev will make that
+        * guarantee for us.
+        */
+       if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp))
+               return mp->m_sb.sb_blocksize;
 
-       return ip->i_mount->m_sb.sb_blocksize;
+       return 0;
 }
 
 unsigned int
 xfs_get_atomic_write_max(
        struct xfs_inode        *ip)
 {
-       if (!xfs_inode_can_hw_atomic_write(ip))
+       struct xfs_mount        *mp = ip->i_mount;
+
+       /*
+        * If out of place writes are not available, we can guarantee an atomic
+        * write of exactly one single fsblock if the bdev will make that
+        * guarantee for us.
+        */
+       if (!xfs_can_sw_atomic_write(mp)) {
+               if (xfs_inode_can_hw_atomic_write(ip))
+                       return mp->m_sb.sb_blocksize;
                return 0;
+       }
 
-       return ip->i_mount->m_sb.sb_blocksize;
+       /*
+        * If we can complete an atomic write via atomic out of place writes,
+        * then advertise a maximum size of whatever we can complete through
+        * that means.  Hardware support is reported via max_opt, not here.
+        */
+       if (XFS_IS_REALTIME_INODE(ip))
+               return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
+       return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
 }
 
 unsigned int
 xfs_get_atomic_write_max_opt(
        struct xfs_inode        *ip)
 {
-       return 0;
+       unsigned int            awu_max = xfs_get_atomic_write_max(ip);
+
+       /* if the max is 1x block, then just keep behaviour that opt is 0 */
+       if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
+               return 0;
+
+       /*
+        * Advertise the maximum size of an atomic write that we can tell the
+        * block device to perform for us.  In general the bdev limit will be
+        * less than our out of place write limit, but we don't want to exceed
+        * the awu_max.
+        */
+       return min(awu_max, xfs_inode_buftarg(ip)->bt_bdev_awu_max);
 }
 
 static void