xfs: fix SEEK_DATA for speculative COW fork preallocation
authorChristoph Hellwig <hch@lst.de>
Mon, 18 Feb 2019 17:38:46 +0000 (09:38 -0800)
committerDarrick J. Wong <darrick.wong@oracle.com>
Thu, 21 Feb 2019 15:55:07 +0000 (07:55 -0800)
We speculatively allocate extents in the COW fork to reduce
fragmentation.  But when we write data into such COW fork blocks that
do now shadow an allocation in the data fork SEEK_DATA will not
correctly report it, as it only looks at the data fork extents.
The only reason why that hasn't been an issue so far is because
we even use these speculative COW fork preallocations over holes in
the data fork at all for buffered writes, and blocks in the COW
fork that are written by direct writes are moved into the data
fork immediately at I/O completion time.

Add a new set of iomap_ops for SEEK_HOLE/SEEK_DATA which looks into
both the COW and data fork, and reports all COW extents as unwritten
to the iomap layer.  While this isn't strictly true for COW fork
extents that were already converted to real extents, the practical
semantics that you can't read data from them until they are moved
into the data fork are very similar, and this will force the iomap
layer into probing the extents for actually present data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/xfs_file.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h

index e47425071e654473f4b34e7899015cecce19ef5e..1d07dcfbbff35e70f73672b340740cca93109372 100644 (file)
@@ -1068,10 +1068,10 @@ xfs_file_llseek(
        default:
                return generic_file_llseek(file, offset, whence);
        case SEEK_HOLE:
-               offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
+               offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
                break;
        case SEEK_DATA:
-               offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
+               offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
                break;
        }
 
index 284c5e68f695ff264d3c3f95c116efb1edb63d99..df6eda336f1713264f4cdd4e2b71824ec0dd33b0 100644 (file)
@@ -1068,6 +1068,92 @@ const struct iomap_ops xfs_iomap_ops = {
        .iomap_end              = xfs_file_iomap_end,
 };
 
+static int
+xfs_seek_iomap_begin(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       unsigned                flags,
+       struct iomap            *iomap)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + length);
+       xfs_fileoff_t           cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF;
+       struct xfs_iext_cursor  icur;
+       struct xfs_bmbt_irec    imap, cmap;
+       int                     error = 0;
+       unsigned                lockmode;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       lockmode = xfs_ilock_data_map_shared(ip);
+       if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+               if (error)
+                       goto out_unlock;
+       }
+
+       if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) {
+               /*
+                * If we found a data extent we are done.
+                */
+               if (imap.br_startoff <= offset_fsb)
+                       goto done;
+               data_fsb = imap.br_startoff;
+       } else {
+               /*
+                * Fake a hole until the end of the file.
+                */
+               data_fsb = min(XFS_B_TO_FSB(mp, offset + length),
+                              XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
+       }
+
+       /*
+        * If a COW fork extent covers the hole, report it - capped to the next
+        * data fork extent:
+        */
+       if (xfs_inode_has_cow_data(ip) &&
+           xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
+               cow_fsb = cmap.br_startoff;
+       if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
+               if (data_fsb < cow_fsb + cmap.br_blockcount)
+                       end_fsb = min(end_fsb, data_fsb);
+               xfs_trim_extent(&cmap, offset_fsb, end_fsb);
+               error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true);
+               /*
+                * This is a COW extent, so we must probe the page cache
+                * because there could be dirty page cache being backed
+                * by this extent.
+                */
+               iomap->type = IOMAP_UNWRITTEN;
+               goto out_unlock;
+       }
+
+       /*
+        * Else report a hole, capped to the next found data or COW extent.
+        */
+       if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb)
+               imap.br_blockcount = cow_fsb - offset_fsb;
+       else
+               imap.br_blockcount = data_fsb - offset_fsb;
+       imap.br_startoff = offset_fsb;
+       imap.br_startblock = HOLESTARTBLOCK;
+       imap.br_state = XFS_EXT_NORM;
+done:
+       xfs_trim_extent(&imap, offset_fsb, end_fsb);
+       error = xfs_bmbt_to_iomap(ip, iomap, &imap, false);
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+       return error;
+}
+
+const struct iomap_ops xfs_seek_iomap_ops = {
+       .iomap_begin            = xfs_seek_iomap_begin,
+};
+
 static int
 xfs_xattr_iomap_begin(
        struct inode            *inode,
index 37b584c3069bc547bd1afcf9e6ca40fb8719a836..5c2f6aa6d78ffa810bdaeae1ed06cb85f465d1b6 100644 (file)
@@ -40,6 +40,7 @@ xfs_aligned_fsb_count(
 }
 
 extern const struct iomap_ops xfs_iomap_ops;
+extern const struct iomap_ops xfs_seek_iomap_ops;
 extern const struct iomap_ops xfs_xattr_iomap_ops;
 
 #endif /* __XFS_IOMAP_H__*/