Merge tag 'afs-fixes-20210913' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowe...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Sep 2021 22:49:02 +0000 (15:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Sep 2021 22:49:02 +0000 (15:49 -0700)
Pull AFS fixes from David Howells:
 "Fixes for AFS problems that can cause data corruption due to
  interaction with another client modifying data cached locally:

   - When d_revalidating a dentry, don't look at the inode to which it
     points. Only check the directory to which the dentry belongs. This
     was confusing things and causing the silly-rename cleanup code to
     remove the file now at the dentry of a file that got deleted.

   - Fix mmap data coherency. When a callback break is received that
     relates to a file that we have cached, the data content may have
     been changed (there are other reasons, such as the user's rights
     having been changed). However, we're checking it lazily, only on
     entry to the kernel, which doesn't happen if we have a writeable
     shared mapped page on that file.

     We make the kernel keep track of mmapped files and clear all PTEs
     mapping to that file as soon as the callback comes in by calling
     unmap_mapping_pages() (we don't necessarily want to zap the
     pagecache). This causes the kernel to be reentered when userspace
     tries to access the mmapped address range again - and at that point
     we can query the server and, if we need to, zap the page cache.

     Ideally, I would check each file at the point of notification, but
     that involves poking the server[*] - which is holding an exclusive
     lock on the vnode it is changing, waiting for all the clients it
     notified to reply. This could then deadlock against the server.
     Further, invalidating the pagecache might call ->launder_page(),
     which would try to write to the file, which would definitely
     deadlock. (AFS doesn't lease file access).

     [*] Checking to see if the file content has changed is a matter of
         comparing the current data version number, but we have to ask
         the server for that. We also need to get a new callback promise
         and we need to poke the server for that too.

   - Add some more points at which the inode is validated, since we're
     doing it lazily, notably in ->read_iter() and ->page_mkwrite(), but
     also when performing some directory operations.

     Ideally, checking in ->read_iter() would be done in some derivation
     of filemap_read(). If we're going to call the server to read the
     file, then we get the file status fetch as part of that.

   - The above is now causing us to make a lot more calls to
     afs_validate() to check the inode - and afs_validate() takes the
     RCU read lock each time to make a quick check (ie.
     afs_check_validity()). This is entirely for the purpose of checking
     cb_s_break to see if the server we're using reinitialised its list
     of callbacks - however this isn't a very common event, so most of
     the time we're taking this needlessly.

     Add a new cell-wide counter to count the number of
     reinitialisations done by any server and check that - and only if
     that changes, take the RCU read lock and check the server list (the
     server list may change, but the cell a file is part of won't).

   - Don't update vnode->cb_s_break and ->cb_v_break inside the validity
     checking loop. The cb_lock is done with read_seqretry, so we might
     go round the loop a second time after resetting those values - and
     that could cause someone else checking validity to miss something
     (I think).

  Also included are patches for fixes for some bugs encountered whilst
  debugging this:

   - Fix a leak of afs_read objects and fix a leak of keys hidden by
     that.

   - Fix a leak of pages that couldn't be added to extend a writeback.

   - Fix the maintenance of i_blocks when i_size is changed by a local
     write or a local dir edit"

Link: https://bugzilla.kernel.org/show_bug.cgi?id=214217
Link: https://lore.kernel.org/r/163111665183.283156.17200205573146438918.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/163113612442.352844.11162345591911691150.stgit@warthog.procyon.org.uk/
* tag 'afs-fixes-20210913' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  afs: Fix updating of i_blocks on file/dir extension
  afs: Fix corruption in reads at fpos 2G-4G from an OpenAFS server
  afs: Try to avoid taking RCU read lock when checking vnode validity
  afs: Fix mmap coherency vs 3rd-party changes
  afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation
  afs: Add missing vnode validation checks
  afs: Fix page leak
  afs: Fix missing put on afs_read objects and missing get on the key therein

17 files changed:
fs/afs/callback.c
fs/afs/cell.c
fs/afs/dir.c
fs/afs/dir_edit.c
fs/afs/file.c
fs/afs/fs_probe.c
fs/afs/fsclient.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/protocol_afs.h [new file with mode: 0644]
fs/afs/protocol_yfs.h
fs/afs/rotate.c
fs/afs/server.c
fs/afs/super.c
fs/afs/write.c
include/trace/events/afs.h
mm/memory.c

index 7d9b23d981bf1aaac48478b3843e39a5c7f1f9bc..1b4d5809808d0d232d6a66d1c4a2b74d8bd980f7 100644 (file)
 #include <linux/sched.h>
 #include "internal.h"
 
+/*
+ * Handle invalidation of an mmap'd file.  We invalidate all the PTEs referring
+ * to the pages in this file's pagecache, forcing the kernel to go through
+ * ->fault() or ->page_mkwrite() - at which point we can handle invalidation
+ * more fully.
+ */
+void afs_invalidate_mmap_work(struct work_struct *work)
+{
+       struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work);
+
+       unmap_mapping_pages(vnode->vfs_inode.i_mapping, 0, 0, false);
+}
+
+void afs_server_init_callback_work(struct work_struct *work)
+{
+       struct afs_server *server = container_of(work, struct afs_server, initcb_work);
+       struct afs_vnode *vnode;
+       struct afs_cell *cell = server->cell;
+
+       down_read(&cell->fs_open_mmaps_lock);
+
+       list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) {
+               if (vnode->cb_server == server) {
+                       clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+                       queue_work(system_unbound_wq, &vnode->cb_work);
+               }
+       }
+
+       up_read(&cell->fs_open_mmaps_lock);
+}
+
 /*
  * Allow the fileserver to request callback state (re-)initialisation.
  * Unfortunately, UUIDs are not guaranteed unique.
@@ -29,8 +60,11 @@ void afs_init_callback_state(struct afs_server *server)
        rcu_read_lock();
        do {
                server->cb_s_break++;
-               server = rcu_dereference(server->uuid_next);
-       } while (0);
+               atomic_inc(&server->cell->fs_s_break);
+               if (!list_empty(&server->cell->fs_open_mmaps))
+                       queue_work(system_unbound_wq, &server->initcb_work);
+
+       } while ((server = rcu_dereference(server->uuid_next)));
        rcu_read_unlock();
 }
 
@@ -44,11 +78,17 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
        clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
        if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
                vnode->cb_break++;
+               vnode->cb_v_break = vnode->volume->cb_v_break;
                afs_clear_permits(vnode);
 
                if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
                        afs_lock_may_be_available(vnode);
 
+               if (reason != afs_cb_break_for_deleted &&
+                   vnode->status.type == AFS_FTYPE_FILE &&
+                   atomic_read(&vnode->cb_nr_mmap))
+                       queue_work(system_unbound_wq, &vnode->cb_work);
+
                trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
        } else {
                trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
index 887b673f6223022f2b9699503ef80796736f52be..d88407fb9bc09a8bb0e60ad7c9db320a028cab51 100644 (file)
@@ -166,6 +166,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
        seqlock_init(&cell->volume_lock);
        cell->fs_servers = RB_ROOT;
        seqlock_init(&cell->fs_lock);
+       INIT_LIST_HEAD(&cell->fs_open_mmaps);
+       init_rwsem(&cell->fs_open_mmaps_lock);
        rwlock_init(&cell->vl_servers_lock);
        cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
 
index ac829e63c5704cdf4cf421393728fb803a1ad927..4579bbda46346ce4f464a8e6cbeba6329ef1e261 100644 (file)
@@ -1077,9 +1077,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
  */
 static int afs_d_revalidate_rcu(struct dentry *dentry)
 {
-       struct afs_vnode *dvnode, *vnode;
+       struct afs_vnode *dvnode;
        struct dentry *parent;
-       struct inode *dir, *inode;
+       struct inode *dir;
        long dir_version, de_version;
 
        _enter("%p", dentry);
@@ -1109,18 +1109,6 @@ static int afs_d_revalidate_rcu(struct dentry *dentry)
                        return -ECHILD;
        }
 
-       /* Check to see if the vnode referred to by the dentry still
-        * has a callback.
-        */
-       if (d_really_is_positive(dentry)) {
-               inode = d_inode_rcu(dentry);
-               if (inode) {
-                       vnode = AFS_FS_I(inode);
-                       if (!afs_check_validity(vnode))
-                               return -ECHILD;
-               }
-       }
-
        return 1; /* Still valid */
 }
 
@@ -1156,17 +1144,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        if (IS_ERR(key))
                key = NULL;
 
-       if (d_really_is_positive(dentry)) {
-               inode = d_inode(dentry);
-               if (inode) {
-                       vnode = AFS_FS_I(inode);
-                       afs_validate(vnode, key);
-                       if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-                               goto out_bad;
-               }
-       }
-
-       /* lock down the parent dentry so we can peer at it */
+       /* Hold the parent dentry so we can peer at it */
        parent = dget_parent(dentry);
        dir = AFS_FS_I(d_inode(parent));
 
@@ -1175,7 +1153,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
        if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
                _debug("%pd: parent dir deleted", dentry);
-               goto out_bad_parent;
+               goto not_found;
        }
 
        /* We only need to invalidate a dentry if the server's copy changed
@@ -1201,12 +1179,12 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        case 0:
                /* the filename maps to something */
                if (d_really_is_negative(dentry))
-                       goto out_bad_parent;
+                       goto not_found;
                inode = d_inode(dentry);
                if (is_bad_inode(inode)) {
                        printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
                               dentry);
-                       goto out_bad_parent;
+                       goto not_found;
                }
 
                vnode = AFS_FS_I(inode);
@@ -1228,9 +1206,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
                               dentry, fid.unique,
                               vnode->fid.unique,
                               vnode->vfs_inode.i_generation);
-                       write_seqlock(&vnode->cb_lock);
-                       set_bit(AFS_VNODE_DELETED, &vnode->flags);
-                       write_sequnlock(&vnode->cb_lock);
                        goto not_found;
                }
                goto out_valid;
@@ -1245,7 +1220,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        default:
                _debug("failed to iterate dir %pd: %d",
                       parent, ret);
-               goto out_bad_parent;
+               goto not_found;
        }
 
 out_valid:
@@ -1256,16 +1231,9 @@ out_valid_noupdate:
        _leave(" = 1 [valid]");
        return 1;
 
-       /* the dirent, if it exists, now points to a different vnode */
 not_found:
-       spin_lock(&dentry->d_lock);
-       dentry->d_flags |= DCACHE_NFSFS_RENAMED;
-       spin_unlock(&dentry->d_lock);
-
-out_bad_parent:
        _debug("dropping dentry %pd2", dentry);
        dput(parent);
-out_bad:
        key_put(key);
 
        _leave(" = 0 [bad]");
@@ -1792,6 +1760,10 @@ static int afs_link(struct dentry *from, struct inode *dir,
                goto error;
        }
 
+       ret = afs_validate(vnode, op->key);
+       if (ret < 0)
+               goto error_op;
+
        afs_op_set_vnode(op, 0, dvnode);
        afs_op_set_vnode(op, 1, vnode);
        op->file[0].dv_delta = 1;
@@ -1805,6 +1777,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
        op->create.reason       = afs_edit_dir_for_link;
        return afs_do_sync_operation(op);
 
+error_op:
+       afs_put_operation(op);
 error:
        d_drop(dentry);
        _leave(" = %d", ret);
@@ -1989,6 +1963,11 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
        if (IS_ERR(op))
                return PTR_ERR(op);
 
+       ret = afs_validate(vnode, op->key);
+       op->error = ret;
+       if (ret < 0)
+               goto error;
+
        afs_op_set_vnode(op, 0, orig_dvnode);
        afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */
        op->file[0].dv_delta = 1;
index f4600c1353adf79fa002c8864ae574bee9c694ec..540b9fc96824adaa16d2d318d709ca67c75ef30c 100644 (file)
@@ -263,7 +263,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
                if (b == nr_blocks) {
                        _debug("init %u", b);
                        afs_edit_init_block(meta, block, b);
-                       i_size_write(&vnode->vfs_inode, (b + 1) * AFS_DIR_BLOCK_SIZE);
+                       afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
                }
 
                /* Only lower dir pages have a counter in the header. */
@@ -296,7 +296,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
 new_directory:
        afs_edit_init_block(meta, meta, 0);
        i_size = AFS_DIR_BLOCK_SIZE;
-       i_size_write(&vnode->vfs_inode, i_size);
+       afs_set_i_size(vnode, i_size);
        slot = AFS_DIR_RESV_BLOCKS0;
        page = page0;
        block = meta;
index db035ae2a13451dc97fc10e97a6aeb50088898a4..e6c447ae91f38ab82a0dab2edc876a6afdb0ae76 100644 (file)
@@ -24,12 +24,16 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
 static int afs_releasepage(struct page *page, gfp_t gfp_flags);
 
 static void afs_readahead(struct readahead_control *ractl);
+static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+static void afs_vm_open(struct vm_area_struct *area);
+static void afs_vm_close(struct vm_area_struct *area);
+static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff);
 
 const struct file_operations afs_file_operations = {
        .open           = afs_open,
        .release        = afs_release,
        .llseek         = generic_file_llseek,
-       .read_iter      = generic_file_read_iter,
+       .read_iter      = afs_file_read_iter,
        .write_iter     = afs_file_write,
        .mmap           = afs_file_mmap,
        .splice_read    = generic_file_splice_read,
@@ -59,8 +63,10 @@ const struct address_space_operations afs_fs_aops = {
 };
 
 static const struct vm_operations_struct afs_vm_ops = {
+       .open           = afs_vm_open,
+       .close          = afs_vm_close,
        .fault          = filemap_fault,
-       .map_pages      = filemap_map_pages,
+       .map_pages      = afs_vm_map_pages,
        .page_mkwrite   = afs_page_mkwrite,
 };
 
@@ -295,7 +301,7 @@ static void afs_req_issue_op(struct netfs_read_subrequest *subreq)
        fsreq->subreq   = subreq;
        fsreq->pos      = subreq->start + subreq->transferred;
        fsreq->len      = subreq->len   - subreq->transferred;
-       fsreq->key      = subreq->rreq->netfs_priv;
+       fsreq->key      = key_get(subreq->rreq->netfs_priv);
        fsreq->vnode    = vnode;
        fsreq->iter     = &fsreq->def_iter;
 
@@ -304,6 +310,7 @@ static void afs_req_issue_op(struct netfs_read_subrequest *subreq)
                        fsreq->pos, fsreq->len);
 
        afs_fetch_data(fsreq->vnode, fsreq);
+       afs_put_read(fsreq);
 }
 
 static int afs_symlink_readpage(struct page *page)
@@ -490,15 +497,88 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
        return 1;
 }
 
+static void afs_add_open_mmap(struct afs_vnode *vnode)
+{
+       if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) {
+               down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+
+               list_add_tail(&vnode->cb_mmap_link,
+                             &vnode->volume->cell->fs_open_mmaps);
+
+               up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+       }
+}
+
+static void afs_drop_open_mmap(struct afs_vnode *vnode)
+{
+       if (!atomic_dec_and_test(&vnode->cb_nr_mmap))
+               return;
+
+       down_write(&vnode->volume->cell->fs_open_mmaps_lock);
+
+       if (atomic_read(&vnode->cb_nr_mmap) == 0)
+               list_del_init(&vnode->cb_mmap_link);
+
+       up_write(&vnode->volume->cell->fs_open_mmaps_lock);
+       flush_work(&vnode->cb_work);
+}
+
 /*
  * Handle setting up a memory mapping on an AFS file.
  */
 static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
+       struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
        int ret;
 
+       afs_add_open_mmap(vnode);
+
        ret = generic_file_mmap(file, vma);
        if (ret == 0)
                vma->vm_ops = &afs_vm_ops;
+       else
+               afs_drop_open_mmap(vnode);
        return ret;
 }
+
+static void afs_vm_open(struct vm_area_struct *vma)
+{
+       afs_add_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+}
+
+static void afs_vm_close(struct vm_area_struct *vma)
+{
+       afs_drop_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+}
+
+static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff)
+{
+       struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file));
+       struct afs_file *af = vmf->vma->vm_file->private_data;
+
+       switch (afs_validate(vnode, af->key)) {
+       case 0:
+               return filemap_map_pages(vmf, start_pgoff, end_pgoff);
+       case -ENOMEM:
+               return VM_FAULT_OOM;
+       case -EINTR:
+       case -ERESTARTSYS:
+               return VM_FAULT_RETRY;
+       case -ESTALE:
+       default:
+               return VM_FAULT_SIGBUS;
+       }
+}
+
+static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
+       struct afs_file *af = iocb->ki_filp->private_data;
+       int ret;
+
+       ret = afs_validate(vnode, af->key);
+       if (ret < 0)
+               return ret;
+
+       return generic_file_read_iter(iocb, iter);
+}
index e7e98ad63a91ae8ea126a2ee646814a7ec20ab41..c0031a3ab42f5a921fe94feb7440d4bc1030fac4 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/slab.h>
 #include "afs_fs.h"
 #include "internal.h"
+#include "protocol_afs.h"
 #include "protocol_yfs.h"
 
 static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
@@ -102,7 +103,7 @@ void afs_fileserver_probe_result(struct afs_call *call)
        struct afs_addr_list *alist = call->alist;
        struct afs_server *server = call->server;
        unsigned int index = call->addr_ix;
-       unsigned int rtt_us = 0;
+       unsigned int rtt_us = 0, cap0;
        int ret = call->error;
 
        _enter("%pU,%u", &server->uuid, index);
@@ -159,6 +160,11 @@ responded:
                        clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
                        alist->addrs[index].srx_service = call->service_id;
                }
+               cap0 = ntohl(call->tmp);
+               if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
+                       set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
+               else
+                       clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
        }
 
        if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
index dd3f45d906d23cf9ea8926b876a51099fd525cae..4943413d9c5f7ff3011ea2fad230e5a84a6e48e5 100644 (file)
@@ -456,9 +456,7 @@ void afs_fs_fetch_data(struct afs_operation *op)
        struct afs_read *req = op->fetch.req;
        __be32 *bp;
 
-       if (upper_32_bits(req->pos) ||
-           upper_32_bits(req->len) ||
-           upper_32_bits(req->pos + req->len))
+       if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
                return afs_fs_fetch_data64(op);
 
        _enter("");
@@ -1113,9 +1111,7 @@ void afs_fs_store_data(struct afs_operation *op)
               (unsigned long long)op->store.pos,
               (unsigned long long)op->store.i_size);
 
-       if (upper_32_bits(op->store.pos) ||
-           upper_32_bits(op->store.size) ||
-           upper_32_bits(op->store.i_size))
+       if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
                return afs_fs_store_data64(op);
 
        call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData,
@@ -1229,7 +1225,7 @@ static void afs_fs_setattr_size(struct afs_operation *op)
               key_serial(op->key), vp->fid.vid, vp->fid.vnode);
 
        ASSERT(attr->ia_valid & ATTR_SIZE);
-       if (upper_32_bits(attr->ia_size))
+       if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
                return afs_fs_setattr_size64(op);
 
        call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData_as_Status,
@@ -1657,20 +1653,33 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
                        return ret;
 
                count = ntohl(call->tmp);
-
                call->count = count;
                call->count2 = count;
-               afs_extract_discard(call, count * sizeof(__be32));
+               if (count == 0) {
+                       call->unmarshall = 4;
+                       call->tmp = 0;
+                       break;
+               }
+
+               /* Extract the first word of the capabilities to call->tmp */
+               afs_extract_to_tmp(call);
                call->unmarshall++;
                fallthrough;
 
-               /* Extract capabilities words */
        case 2:
                ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
-               /* TODO: Examine capabilities */
+               afs_extract_discard(call, (count - 1) * sizeof(__be32));
+               call->unmarshall++;
+               fallthrough;
+
+               /* Extract remaining capabilities words */
+       case 3:
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
 
                call->unmarshall++;
                break;
index 80b6c8d967d5cf1f34f7f6202294decf109c9165..8fcffea2daf50b2d8ac8a72f013c2159480f2407 100644 (file)
@@ -53,16 +53,6 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
                dump_stack();
 }
 
-/*
- * Set the file size and block count.  Estimate the number of 512 bytes blocks
- * used, rounded up to nearest 1K for consistency with other AFS clients.
- */
-static void afs_set_i_size(struct afs_vnode *vnode, u64 size)
-{
-       i_size_write(&vnode->vfs_inode, size);
-       vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
-}
-
 /*
  * Initialise an inode from the vnode status.
  */
@@ -587,22 +577,32 @@ static void afs_zap_data(struct afs_vnode *vnode)
 }
 
 /*
- * Get the server reinit counter for a vnode's current server.
+ * Check to see if we have a server currently serving this volume and that it
+ * hasn't been reinitialised or dropped from the list.
  */
-static bool afs_get_s_break_rcu(struct afs_vnode *vnode, unsigned int *_s_break)
+static bool afs_check_server_good(struct afs_vnode *vnode)
 {
-       struct afs_server_list *slist = rcu_dereference(vnode->volume->servers);
+       struct afs_server_list *slist;
        struct afs_server *server;
+       bool good;
        int i;
 
+       if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break))
+               return true;
+
+       rcu_read_lock();
+
+       slist = rcu_dereference(vnode->volume->servers);
        for (i = 0; i < slist->nr_servers; i++) {
                server = slist->servers[i].server;
                if (server == vnode->cb_server) {
-                       *_s_break = READ_ONCE(server->cb_s_break);
-                       return true;
+                       good = (vnode->cb_s_break == server->cb_s_break);
+                       rcu_read_unlock();
+                       return good;
                }
        }
 
+       rcu_read_unlock();
        return false;
 }
 
@@ -611,57 +611,46 @@ static bool afs_get_s_break_rcu(struct afs_vnode *vnode, unsigned int *_s_break)
  */
 bool afs_check_validity(struct afs_vnode *vnode)
 {
-       struct afs_volume *volume = vnode->volume;
        enum afs_cb_break_reason need_clear = afs_cb_break_no_break;
        time64_t now = ktime_get_real_seconds();
-       bool valid;
-       unsigned int cb_break, cb_s_break, cb_v_break;
+       unsigned int cb_break;
        int seq = 0;
 
        do {
                read_seqbegin_or_lock(&vnode->cb_lock, &seq);
-               cb_v_break = READ_ONCE(volume->cb_v_break);
                cb_break = vnode->cb_break;
 
-               if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) &&
-                   afs_get_s_break_rcu(vnode, &cb_s_break)) {
-                       if (vnode->cb_s_break != cb_s_break ||
-                           vnode->cb_v_break != cb_v_break) {
-                               vnode->cb_s_break = cb_s_break;
-                               vnode->cb_v_break = cb_v_break;
-                               need_clear = afs_cb_break_for_vsbreak;
-                               valid = false;
-                       } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+               if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+                       if (vnode->cb_v_break != vnode->volume->cb_v_break)
+                               need_clear = afs_cb_break_for_v_break;
+                       else if (!afs_check_server_good(vnode))
+                               need_clear = afs_cb_break_for_s_reinit;
+                       else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
                                need_clear = afs_cb_break_for_zap;
-                               valid = false;
-                       } else if (vnode->cb_expires_at - 10 <= now) {
+                       else if (vnode->cb_expires_at - 10 <= now)
                                need_clear = afs_cb_break_for_lapsed;
-                               valid = false;
-                       } else {
-                               valid = true;
-                       }
                } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-                       valid = true;
+                       ;
                } else {
-                       vnode->cb_v_break = cb_v_break;
-                       valid = false;
+                       need_clear = afs_cb_break_no_promise;
                }
 
        } while (need_seqretry(&vnode->cb_lock, seq));
 
        done_seqretry(&vnode->cb_lock, seq);
 
-       if (need_clear != afs_cb_break_no_break) {
-               write_seqlock(&vnode->cb_lock);
-               if (cb_break == vnode->cb_break)
-                       __afs_break_callback(vnode, need_clear);
-               else
-                       trace_afs_cb_miss(&vnode->fid, need_clear);
-               write_sequnlock(&vnode->cb_lock);
-               valid = false;
-       }
+       if (need_clear == afs_cb_break_no_break)
+               return true;
 
-       return valid;
+       write_seqlock(&vnode->cb_lock);
+       if (need_clear == afs_cb_break_no_promise)
+               vnode->cb_v_break = vnode->volume->cb_v_break;
+       else if (cb_break == vnode->cb_break)
+               __afs_break_callback(vnode, need_clear);
+       else
+               trace_afs_cb_miss(&vnode->fid, need_clear);
+       write_sequnlock(&vnode->cb_lock);
+       return false;
 }
 
 /*
@@ -675,21 +664,20 @@ bool afs_check_validity(struct afs_vnode *vnode)
  */
 int afs_validate(struct afs_vnode *vnode, struct key *key)
 {
-       bool valid;
        int ret;
 
        _enter("{v={%llx:%llu} fl=%lx},%x",
               vnode->fid.vid, vnode->fid.vnode, vnode->flags,
               key_serial(key));
 
-       rcu_read_lock();
-       valid = afs_check_validity(vnode);
-       rcu_read_unlock();
-
-       if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-               clear_nlink(&vnode->vfs_inode);
+       if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) {
+               if (vnode->vfs_inode.i_nlink)
+                       clear_nlink(&vnode->vfs_inode);
+               goto valid;
+       }
 
-       if (valid)
+       if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) &&
+           afs_check_validity(vnode))
                goto valid;
 
        down_write(&vnode->validate_lock);
index 5ed416f4ff335baec6471086032b146b09c5399f..0ad97a8fc0d494d9fb40d02b4efaf7d7d8ba0ff8 100644 (file)
@@ -390,6 +390,9 @@ struct afs_cell {
        /* Active fileserver interaction state. */
        struct rb_root          fs_servers;     /* afs_server (by server UUID) */
        seqlock_t               fs_lock;        /* For fs_servers  */
+       struct rw_semaphore     fs_open_mmaps_lock;
+       struct list_head        fs_open_mmaps;  /* List of vnodes that are mmapped */
+       atomic_t                fs_s_break;     /* Counter of CB.InitCallBackState messages */
 
        /* VL server list. */
        rwlock_t                vl_servers_lock; /* Lock on vl_servers */
@@ -503,6 +506,7 @@ struct afs_server {
        struct hlist_node       addr4_link;     /* Link in net->fs_addresses4 */
        struct hlist_node       addr6_link;     /* Link in net->fs_addresses6 */
        struct hlist_node       proc_link;      /* Link in net->fs_proc */
+       struct work_struct      initcb_work;    /* Work for CB.InitCallBackState* */
        struct afs_server       *gc_next;       /* Next server in manager's list */
        time64_t                unuse_time;     /* Time at which last unused */
        unsigned long           flags;
@@ -516,6 +520,7 @@ struct afs_server {
 #define AFS_SERVER_FL_IS_YFS   16              /* Server is YFS not AFS */
 #define AFS_SERVER_FL_NO_IBULK 17              /* Fileserver doesn't support FS.InlineBulkStatus */
 #define AFS_SERVER_FL_NO_RM2   18              /* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAS_FS64 19              /* Fileserver supports FS.{Fetch,Store}Data64 */
        atomic_t                ref;            /* Object refcount */
        atomic_t                active;         /* Active user count */
        u32                     addr_version;   /* Address list version */
@@ -657,7 +662,11 @@ struct afs_vnode {
        afs_lock_type_t         lock_type : 8;
 
        /* outstanding callback notification on this file */
+       struct work_struct      cb_work;        /* Work for mmap'd files */
+       struct list_head        cb_mmap_link;   /* Link in cell->fs_open_mmaps */
        void                    *cb_server;     /* Server with callback/filelock */
+       atomic_t                cb_nr_mmap;     /* Number of mmaps */
+       unsigned int            cb_fs_s_break;  /* Mass server break counter (cell->fs_s_break) */
        unsigned int            cb_s_break;     /* Mass break counter on ->server */
        unsigned int            cb_v_break;     /* Mass break counter on ->volume */
        unsigned int            cb_break;       /* Break counter on vnode */
@@ -965,6 +974,8 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
 /*
  * callback.c
  */
+extern void afs_invalidate_mmap_work(struct work_struct *);
+extern void afs_server_init_callback_work(struct work_struct *work);
 extern void afs_init_callback_state(struct afs_server *);
 extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
 extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason);
@@ -1585,6 +1596,16 @@ static inline void afs_update_dentry_version(struct afs_operation *op,
                        (void *)(unsigned long)dir_vp->scb.status.data_version;
 }
 
+/*
+ * Set the file size and block count.  Estimate the number of 512 bytes blocks
+ * used, rounded up to nearest 1K for consistency with other AFS clients.
+ */
+static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size)
+{
+       i_size_write(&vnode->vfs_inode, size);
+       vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
+}
+
 /*
  * Check for a conflicting operation on a directory that we just unlinked from.
  * If someone managed to sneak a link or an unlink in on the file we just
diff --git a/fs/afs/protocol_afs.h b/fs/afs/protocol_afs.h
new file mode 100644 (file)
index 0000000..0c39358
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AFS protocol bits
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+
+#define AFSCAPABILITIESMAX 196 /* Maximum number of words in a capability set */
+
+/* AFS3 Fileserver capabilities word 0 */
+#define AFS3_VICED_CAPABILITY_ERRORTRANS       0x0001 /* Uses UAE errors */
+#define AFS3_VICED_CAPABILITY_64BITFILES       0x0002 /* FetchData64 & StoreData64 supported */
+#define AFS3_VICED_CAPABILITY_WRITELOCKACL     0x0004 /* Can lock a file even without lock perm */
+#define AFS3_VICED_CAPABILITY_SANEACLS         0x0008 /* ACLs reviewed for sanity - don't use */
index b5bd03b1d3c7f0d271671a134f1cafb697825f42..e4cd89c44c465484f1de243cb9caaf120a0d05e8 100644 (file)
@@ -168,3 +168,9 @@ enum yfs_lock_type {
        yfs_LockMandatoryWrite  = 0x101,
        yfs_LockMandatoryExtend = 0x102,
 };
+
+/* RXYFS Viced Capability Flags */
+#define YFS_VICED_CAPABILITY_ERRORTRANS                0x0001 /* Deprecated v0.195 */
+#define YFS_VICED_CAPABILITY_64BITFILES                0x0002 /* Deprecated v0.195 */
+#define YFS_VICED_CAPABILITY_WRITELOCKACL      0x0004 /* Can lock a file even without lock perm */
+#define YFS_VICED_CAPABILITY_SANEACLS          0x0008 /* Deprecated v0.195 */
index d83f13c44b9213878ddb20ad4c914152994565d5..79e1a5f6701bed9822bb17e324740b6da13b94da 100644 (file)
@@ -374,6 +374,7 @@ selected_server:
        if (vnode->cb_server != server) {
                vnode->cb_server = server;
                vnode->cb_s_break = server->cb_s_break;
+               vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
                vnode->cb_v_break = vnode->volume->cb_v_break;
                clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
        }
index 684a2b02b9ff70ae056aab6a3807f624bb26c47a..6e5b9a19b234e0db75d3980086251c2825398daf 100644 (file)
@@ -235,6 +235,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
        server->addr_version = alist->version;
        server->uuid = *uuid;
        rwlock_init(&server->fs_lock);
+       INIT_WORK(&server->initcb_work, afs_server_init_callback_work);
        init_waitqueue_head(&server->probe_wq);
        INIT_LIST_HEAD(&server->probe_link);
        spin_lock_init(&server->probe_lock);
@@ -467,6 +468,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
        if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
                afs_give_up_callbacks(net, server);
 
+       flush_work(&server->initcb_work);
        afs_put_server(net, server, afs_server_trace_destroy);
 }
 
index e38bb1e7a4d227bed688b396efa138a1dec9d7f4..d110def8aa8eb993212c00ed161089eba89a7545 100644 (file)
@@ -698,6 +698,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
        vnode->lock_state       = AFS_VNODE_LOCK_NONE;
 
        init_rwsem(&vnode->rmdir_lock);
+       INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work);
 
        _leave(" = %p", &vnode->vfs_inode);
        return &vnode->vfs_inode;
index c0534697268ef808e42e6a5762c44888e3148a3b..2dfe3b3a53d69ab08ead53808f0ab95c5a063873 100644 (file)
@@ -137,7 +137,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                write_seqlock(&vnode->cb_lock);
                i_size = i_size_read(&vnode->vfs_inode);
                if (maybe_i_size > i_size)
-                       i_size_write(&vnode->vfs_inode, maybe_i_size);
+                       afs_set_i_size(vnode, maybe_i_size);
                write_sequnlock(&vnode->cb_lock);
        }
 
@@ -471,13 +471,18 @@ static void afs_extend_writeback(struct address_space *mapping,
                        }
 
                        /* Has the page moved or been split? */
-                       if (unlikely(page != xas_reload(&xas)))
+                       if (unlikely(page != xas_reload(&xas))) {
+                               put_page(page);
                                break;
+                       }
 
-                       if (!trylock_page(page))
+                       if (!trylock_page(page)) {
+                               put_page(page);
                                break;
+                       }
                        if (!PageDirty(page) || PageWriteback(page)) {
                                unlock_page(page);
+                               put_page(page);
                                break;
                        }
 
@@ -487,6 +492,7 @@ static void afs_extend_writeback(struct address_space *mapping,
                        t = afs_page_dirty_to(page, priv);
                        if (f != 0 && !new_content) {
                                unlock_page(page);
+                               put_page(page);
                                break;
                        }
 
@@ -801,6 +807,7 @@ int afs_writepages(struct address_space *mapping,
 ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
+       struct afs_file *af = iocb->ki_filp->private_data;
        ssize_t result;
        size_t count = iov_iter_count(from);
 
@@ -816,6 +823,10 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
        if (!count)
                return 0;
 
+       result = afs_validate(vnode, af->key);
+       if (result < 0)
+               return result;
+
        result = generic_file_write_iter(iocb, from);
 
        _leave(" = %zd", result);
@@ -829,13 +840,18 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
  */
 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-       struct inode *inode = file_inode(file);
-       struct afs_vnode *vnode = AFS_FS_I(inode);
+       struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+       struct afs_file *af = file->private_data;
+       int ret;
 
        _enter("{%llx:%llu},{n=%pD},%d",
               vnode->fid.vid, vnode->fid.vnode, file,
               datasync);
 
+       ret = afs_validate(vnode, af->key);
+       if (ret < 0)
+               return ret;
+
        return file_write_and_wait_range(file, start, end);
 }
 
@@ -849,11 +865,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        struct file *file = vmf->vma->vm_file;
        struct inode *inode = file_inode(file);
        struct afs_vnode *vnode = AFS_FS_I(inode);
+       struct afs_file *af = file->private_data;
        unsigned long priv;
        vm_fault_t ret = VM_FAULT_RETRY;
 
        _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
 
+       afs_validate(vnode, af->key);
+
        sb_start_pagefault(inode->i_sb);
 
        /* Wait for the page to be written to the cache before we allow it to
index 9f73ed2cf06116b8fcfee52d701f77bb93af713b..bca73e8c8cdec8db0ab38f9cc4bfa46b3d4b51a2 100644 (file)
@@ -306,11 +306,13 @@ enum afs_flock_operation {
 
 enum afs_cb_break_reason {
        afs_cb_break_no_break,
+       afs_cb_break_no_promise,
        afs_cb_break_for_callback,
        afs_cb_break_for_deleted,
        afs_cb_break_for_lapsed,
+       afs_cb_break_for_s_reinit,
        afs_cb_break_for_unlink,
-       afs_cb_break_for_vsbreak,
+       afs_cb_break_for_v_break,
        afs_cb_break_for_volume_callback,
        afs_cb_break_for_zap,
 };
@@ -602,11 +604,13 @@ enum afs_cb_break_reason {
 
 #define afs_cb_break_reasons                                           \
        EM(afs_cb_break_no_break,               "no-break")             \
+       EM(afs_cb_break_no_promise,             "no-promise")           \
        EM(afs_cb_break_for_callback,           "break-cb")             \
        EM(afs_cb_break_for_deleted,            "break-del")            \
        EM(afs_cb_break_for_lapsed,             "break-lapsed")         \
+       EM(afs_cb_break_for_s_reinit,           "s-reinit")             \
        EM(afs_cb_break_for_unlink,             "break-unlink")         \
-       EM(afs_cb_break_for_vsbreak,            "break-vs")             \
+       EM(afs_cb_break_for_v_break,            "break-v")              \
        EM(afs_cb_break_for_volume_callback,    "break-v-cb")           \
        E_(afs_cb_break_for_zap,                "break-zap")
 
index 25fc46e872142a11692e3b44402cab89ca644fb3..adf9b9ef8277da3dab7500ee26b0067bd6763e71 100644 (file)
@@ -3403,6 +3403,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
                unmap_mapping_range_tree(&mapping->i_mmap, &details);
        i_mmap_unlock_write(mapping);
 }
+EXPORT_SYMBOL_GPL(unmap_mapping_pages);
 
 /**
  * unmap_mapping_range - unmap the portion of all mmaps in the specified