9p: Fix DIO read through netfs
authorDominique Martinet <asmadeus@codewreck.org>
Thu, 8 Aug 2024 13:29:38 +0000 (14:29 +0100)
committerChristian Brauner <brauner@kernel.org>
Tue, 13 Aug 2024 11:53:09 +0000 (13:53 +0200)
If a program is watching a file on a 9p mount, it won't see any change in
size if the file being exported by the server is changed directly in the
source filesystem, presumably because 9p doesn't have change notifications,
and because netfs skips the reads if the file is empty.

Fix this by attempting to read the full size specified when a DIO read is
requested (such as when 9p is operating in unbuffered mode) and dealing
with a short read if the EOF was less than the expected read.

To make this work, filesystems using netfslib must not set
NETFS_SREQ_CLEAR_TAIL if performing a DIO read where that read hit the EOF.
I don't want to mandatorily clear this flag in netfslib for DIO because,
say, ceph might make a read from an object that is not completely filled,
but does not reside at the end of file - and so we need to clear the
excess.

This can be tested by watching an empty file over 9p within a VM (such as
in the ktest framework):

        while true; do read content; if [ -n "$content" ]; then echo $content; break; fi; done < /host/tmp/foo

then writing something into the empty file.  The watcher should immediately
display the file content and break out of the loop.  Without this fix, it
remains in the loop indefinitely.

Fixes: 80105ed2fd27 ("9p: Use netfslib read/write_iter")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218916
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/1229195.1723211769@warthog.procyon.org.uk
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/9p/vfs_addr.c
fs/afs/file.c
fs/ceph/addr.c
fs/netfs/io.c
fs/nfs/fscache.c
fs/smb/client/file.c

index a97ceb105cd8dd02060afa539acd8ee6749357cb..24fdc74caeba47b0ba5d782f4a1e68175c00256e 100644 (file)
@@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
 
        /* if we just extended the file size, any portion not in
         * cache won't be on server and is zeroes */
-       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+       if (subreq->rreq->origin != NETFS_DIO_READ)
+               __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
 
        netfs_subreq_terminated(subreq, err ?: total, false);
 }
index c3f0c45ae9a9b6f6582accd6e9365862808fb7e1..ec1be0091fdb562d49a84dc14c2330c7265e9a65 100644 (file)
@@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op)
 
        req->error = error;
        if (subreq) {
-               __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+               if (subreq->rreq->origin != NETFS_DIO_READ)
+                       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
                netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
                req->subreq = NULL;
        } else if (req->done) {
index cc0a2240de98ee35937928a9e49e0ff87b249790..c4744a02db753c761491b5bf4ca12977342db114 100644 (file)
@@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req)
        if (err >= 0) {
                if (sparse && err > 0)
                        err = ceph_sparse_ext_map_end(op);
-               if (err < subreq->len)
+               if (err < subreq->len &&
+                   subreq->rreq->origin != NETFS_DIO_READ)
                        __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
                if (IS_ENCRYPTED(inode) && err > 0) {
                        err = ceph_fscrypt_decrypt_extents(inode,
@@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
        size_t len;
        int mode;
 
-       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+       if (rreq->origin != NETFS_DIO_READ)
+               __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
        __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
 
        if (subreq->start >= inode->i_size)
index c179a1c73fa703a5849975f44682222625f023c2..5367caf3fa28630cedf8364ac63ec3d1db7699ad 100644 (file)
@@ -530,7 +530,8 @@ incomplete:
 
        if (transferred_or_error == 0) {
                if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
-                       subreq->error = -ENODATA;
+                       if (rreq->origin != NETFS_DIO_READ)
+                               subreq->error = -ENODATA;
                        goto failed;
                }
        } else {
@@ -601,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
                        }
                        if (subreq->len > ictx->zero_point - subreq->start)
                                subreq->len = ictx->zero_point - subreq->start;
+
+                       /* We limit buffered reads to the EOF, but let the
+                        * server deal with larger-than-EOF DIO/unbuffered
+                        * reads.
+                        */
+                       if (subreq->len > rreq->i_size - subreq->start)
+                               subreq->len = rreq->i_size - subreq->start;
                }
-               if (subreq->len > rreq->i_size - subreq->start)
-                       subreq->len = rreq->i_size - subreq->start;
                if (rreq->rsize && subreq->len > rreq->rsize)
                        subreq->len = rreq->rsize;
 
@@ -739,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
        do {
                _debug("submit %llx + %llx >= %llx",
                       rreq->start, rreq->submitted, rreq->i_size);
-               if (rreq->origin == NETFS_DIO_READ &&
-                   rreq->start + rreq->submitted >= rreq->i_size)
-                       break;
                if (!netfs_rreq_submit_slice(rreq, &io_iter))
                        break;
+               if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags))
+                       break;
                if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
                    test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
                        break;
index bf29a65c5027f47ab6f9732c6ab7c011086d38b2..7a558dea75c4092663a7a2230b4430c7a89efea0 100644 (file)
@@ -363,7 +363,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
                return;
 
        sreq = netfs->sreq;
-       if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
+       if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+           sreq->rreq->origin != NETFS_DIO_READ)
                __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
 
        if (hdr->error)
index b2405dd4d4d4da6e515c554b9c3e9649ec59c4cb..3f3842e7b44a76d07437d8eeb879fc0e697e75c7 100644 (file)
@@ -217,7 +217,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
                        goto out;
        }
 
-       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+       if (subreq->rreq->origin != NETFS_DIO_READ)
+               __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
 
        rc = rdata->server->ops->async_readv(rdata);
 out: