Merge tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Sep 2019 00:00:27 +0000 (17:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Sep 2019 00:00:27 +0000 (17:00 -0700)
Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Add a new knfsd file cache, so that we don't have to open and close
     on each (NFSv2/v3) READ or WRITE. This can speed up read and write
     in some cases. It also replaces our readahead cache.

   - Prevent silent data loss on write errors, by treating write errors
     like server reboots for the purposes of write caching, thus forcing
     clients to resend their writes.

   - Tweak the code that allocates sessions to be more forgiving, so
     that NFSv4.1 mounts are less likely to hang when a server already
     has a lot of clients.

   - Eliminate an arbitrary limit on NFSv4 ACL sizes; they should now be
     limited only by the backend filesystem and the maximum RPC size.

   - Allow the server to enforce use of the correct kerberos credentials
     when a client reclaims state after a reboot.

  And some miscellaneous smaller bugfixes and cleanup"

* tag 'nfsd-5.4' of git://linux-nfs.org/~bfields/linux: (34 commits)
  sunrpc: clean up indentation issue
  nfsd: fix nfs read eof detection
  nfsd: Make nfsd_reset_boot_verifier_locked static
  nfsd: degraded slot-count more gracefully as allocation nears exhaustion.
  nfsd: handle drc over-allocation gracefully.
  nfsd: add support for upcall version 2
  nfsd: add a "GetVersion" upcall for nfsdcld
  nfsd: Reset the boot verifier on all write I/O errors
  nfsd: Don't garbage collect files that might contain write errors
  nfsd: Support the server resetting the boot verifier
  nfsd: nfsd_file cache entries should be per net namespace
  nfsd: eliminate an unnecessary acl size limit
  Deprecate nfsd fault injection
  nfsd: remove duplicated include from filecache.c
  nfsd: Fix the documentation for svcxdr_tmpalloc()
  nfsd: Fix up some unused variable warnings
  nfsd: close cached files prior to a REMOVE or RENAME that would replace target
  nfsd: rip out the raparms cache
  nfsd: have nfsd_test_lock use the nfsd_file cache
  nfsd: hook up nfs4_preprocess_stateid_op to the nfsd_file cache
  ...

40 files changed:
fs/file_table.c
fs/locks.c
fs/nfsd/Kconfig
fs/nfsd/Makefile
fs/nfsd/acl.h
fs/nfsd/blocklayout.c
fs/nfsd/export.c
fs/nfsd/filecache.c [new file with mode: 0644]
fs/nfsd/filecache.h [new file with mode: 0644]
fs/nfsd/netns.h
fs/nfsd/nfs3proc.c
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4layouts.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsctl.c
fs/nfsd/nfsproc.c
fs/nfsd/nfssvc.c
fs/nfsd/state.h
fs/nfsd/trace.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nfsd/xdr3.h
fs/nfsd/xdr4.h
fs/notify/fsnotify.h
fs/notify/group.c
fs/notify/mark.c
include/linux/fs.h
include/linux/fsnotify_backend.h
include/linux/sunrpc/cache.h
include/linux/sunrpc/svc_rdma.h
include/uapi/linux/nfsd/cld.h
net/sunrpc/cache.c
net/sunrpc/svc.c
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_transport.c

index b07b53f24ff5d8abde224be8847c3f2423d2babd..30d55c9a1744a6244d50e8ae3497a1563e7e14a1 100644 (file)
@@ -327,6 +327,7 @@ void flush_delayed_fput(void)
 {
        delayed_fput(NULL);
 }
+EXPORT_SYMBOL_GPL(flush_delayed_fput);
 
 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
 
index a364ebc5cec3b1b974da670c72e9af0a42e07c2a..6970f55daf54341f307a5052eb44b9e2d039ead8 100644 (file)
@@ -212,6 +212,7 @@ struct file_lock_list_struct {
 static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
 DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
 
+
 /*
  * The blocked_hash is used to find POSIX lock loops for deadlock detection.
  * It is protected by blocked_lock_lock.
@@ -1991,6 +1992,64 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 }
 EXPORT_SYMBOL(generic_setlease);
 
+#if IS_ENABLED(CONFIG_SRCU)
+/*
+ * Kernel subsystems can register to be notified on any attempt to set
+ * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
+ * to close files that it may have cached when there is an attempt to set a
+ * conflicting lease.
+ */
+static struct srcu_notifier_head lease_notifier_chain;
+
+static inline void
+lease_notifier_chain_init(void)
+{
+       srcu_init_notifier_head(&lease_notifier_chain);
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+       if (arg != F_UNLCK)
+               srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+       return srcu_notifier_chain_register(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+       srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#else /* !IS_ENABLED(CONFIG_SRCU) */
+static inline void
+lease_notifier_chain_init(void)
+{
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+       return 0;
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#endif /* IS_ENABLED(CONFIG_SRCU) */
+
 /**
  * vfs_setlease        -       sets a lease on an open file
  * @filp:      file pointer
@@ -2011,6 +2070,8 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
+       if (lease)
+               setlease_notifier(arg, *lease);
        if (filp->f_op->setlease)
                return filp->f_op->setlease(filp, arg, lease, priv);
        else
@@ -2924,6 +2985,7 @@ static int __init filelock_init(void)
                INIT_HLIST_HEAD(&fll->hlist);
        }
 
+       lease_notifier_chain_init();
        return 0;
 }
 core_initcall(filelock_init);
index d25f6bbe7006594065807f0462571999458ee618..10cefb0c07c742bf42b608d257173ed4438b505b 100644 (file)
@@ -3,6 +3,7 @@ config NFSD
        tristate "NFS server support"
        depends on INET
        depends on FILE_LOCKING
+       depends on FSNOTIFY
        select LOCKD
        select SUNRPC
        select EXPORTFS
@@ -147,7 +148,7 @@ config NFSD_V4_SECURITY_LABEL
 
 config NFSD_FAULT_INJECTION
        bool "NFS server manual fault injection"
-       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
+       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
        help
          This option enables support for manually injecting faults
          into the NFS server.  This is intended to be used for
index 2bfb58eefad1c2e015178140fc5c9566cf9348fb..6a40b1afe7031a9fe6f58c485fc122366e337d81 100644 (file)
@@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD)    += nfsd.o
 nfsd-y                 += trace.o
 
 nfsd-y                         += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
-                          export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+                          export.o auth.o lockd.o nfscache.o nfsxdr.o \
+                          stats.o filecache.o
 nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
index 4cd7c69a6cb979c6257f7e87d2576a1fbcdadb8c..ba14d2f4b64f4b71aeb9c3160a2d84db3e5aab48 100644 (file)
@@ -39,14 +39,6 @@ struct nfs4_acl;
 struct svc_fh;
 struct svc_rqst;
 
-/*
- * Maximum ACL we'll accept from a client; chosen (somewhat
- * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
- * high-order allocation.  This allows 204 ACEs on x86_64:
- */
-#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
-                       / sizeof(struct nfs4_ace))
-
 int nfs4_acl_bytes(int entries);
 int nfs4_acl_get_whotype(char *, u32);
 __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
index 66d4c55eb48e9f1af826c85573b10f4fb55a8fb9..9bbaa671c0799d70d8bad77b47dd7e9eb83fdf4c 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "blocklayoutxdr.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_PNFS
 
@@ -404,7 +405,7 @@ static void
 nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
 {
        struct nfs4_client *clp = ls->ls_stid.sc_client;
-       struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+       struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
 
        bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
                        nfsd4_scsi_pr_key(clp), 0, true);
index baa01956a5b3dda259202a0486e99f89a3543376..15422c951fd1603d1fbd56c0dcf5c0dba8dcfb91 100644 (file)
@@ -22,6 +22,7 @@
 #include "nfsfh.h"
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_EXPORT
 
@@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
                return NULL;
 }
 
+static void expkey_flush(void)
+{
+       /*
+        * Take the nfsd_mutex here to ensure that the file cache is not
+        * destroyed while we're in the middle of flushing.
+        */
+       mutex_lock(&nfsd_mutex);
+       nfsd_file_cache_purge(current->nsproxy->net_ns);
+       mutex_unlock(&nfsd_mutex);
+}
+
 static const struct cache_detail svc_expkey_cache_template = {
        .owner          = THIS_MODULE,
        .hash_size      = EXPKEY_HASHMAX,
@@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
        .init           = expkey_init,
        .update         = expkey_update,
        .alloc          = expkey_alloc,
+       .flush          = expkey_flush,
 };
 
 static int
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644 (file)
index 0000000..ef55e9b
--- /dev/null
@@ -0,0 +1,934 @@
+/*
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "netns.h"
+#include "filecache.h"
+#include "trace.h"
+
+#define NFSDDBG_FACILITY       NFSDDBG_FH
+
+/* FIXME: dynamically size this for the machine somehow? */
+#define NFSD_FILE_HASH_BITS                   12
+#define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
+#define NFSD_LAUNDRETTE_DELAY               (2 * HZ)
+
+#define NFSD_FILE_LRU_RESCAN                (0)
+#define NFSD_FILE_SHUTDOWN                  (1)
+#define NFSD_FILE_LRU_THRESHOLD                     (4096UL)
+#define NFSD_FILE_LRU_LIMIT                 (NFSD_FILE_LRU_THRESHOLD << 2)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK     (NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+struct nfsd_fcache_bucket {
+       struct hlist_head       nfb_head;
+       spinlock_t              nfb_lock;
+       unsigned int            nfb_count;
+       unsigned int            nfb_maxcount;
+};
+
+static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+
+static struct kmem_cache               *nfsd_file_slab;
+static struct kmem_cache               *nfsd_file_mark_slab;
+static struct nfsd_fcache_bucket       *nfsd_file_hashtbl;
+static struct list_lru                 nfsd_file_lru;
+static long                            nfsd_file_lru_flags;
+static struct fsnotify_group           *nfsd_file_fsnotify_group;
+static atomic_long_t                   nfsd_filecache_count;
+static struct delayed_work             nfsd_filecache_laundrette;
+
+enum nfsd_file_laundrette_ctl {
+       NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
+       NFSD_FILE_LAUNDRETTE_MAY_FLUSH
+};
+
+static void
+nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
+{
+       long count = atomic_long_read(&nfsd_filecache_count);
+
+       if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
+               return;
+
+       /* Be more aggressive about scanning if over the threshold */
+       if (count > NFSD_FILE_LRU_THRESHOLD)
+               mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
+       else
+               schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
+
+       if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
+               return;
+
+       /* ...and don't delay flushing if we're out of control */
+       if (count >= NFSD_FILE_LRU_LIMIT)
+               flush_delayed_work(&nfsd_filecache_laundrette);
+}
+
+static void
+nfsd_file_slab_free(struct rcu_head *rcu)
+{
+       struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
+
+       put_cred(nf->nf_cred);
+       kmem_cache_free(nfsd_file_slab, nf);
+}
+
+static void
+nfsd_file_mark_free(struct fsnotify_mark *mark)
+{
+       struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
+                                                 nfm_mark);
+
+       kmem_cache_free(nfsd_file_mark_slab, nfm);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_get(struct nfsd_file_mark *nfm)
+{
+       if (!atomic_inc_not_zero(&nfm->nfm_ref))
+               return NULL;
+       return nfm;
+}
+
+static void
+nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+{
+       if (atomic_dec_and_test(&nfm->nfm_ref)) {
+
+               fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+               fsnotify_put_mark(&nfm->nfm_mark);
+       }
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+{
+       int                     err;
+       struct fsnotify_mark    *mark;
+       struct nfsd_file_mark   *nfm = NULL, *new;
+       struct inode *inode = nf->nf_inode;
+
+       do {
+               mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
+               mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
+                               nfsd_file_fsnotify_group);
+               if (mark) {
+                       nfm = nfsd_file_mark_get(container_of(mark,
+                                                struct nfsd_file_mark,
+                                                nfm_mark));
+                       mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+                       fsnotify_put_mark(mark);
+                       if (likely(nfm))
+                               break;
+               } else
+                       mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+
+               /* allocate a new nfm */
+               new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+               if (!new)
+                       return NULL;
+               fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
+               new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
+               atomic_set(&new->nfm_ref, 1);
+
+               err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
+
+               /*
+                * If the add was successful, then return the object.
+                * Otherwise, we need to put the reference we hold on the
+                * nfm_mark. The fsnotify code will take a reference and put
+                * it on failure, so we can't just free it directly. It's also
+                * not safe to call fsnotify_destroy_mark on it as the
+                * mark->group will be NULL. Thus, we can't let the nfm_ref
+                * counter drive the destruction at this point.
+                */
+               if (likely(!err))
+                       nfm = new;
+               else
+                       fsnotify_put_mark(&new->nfm_mark);
+       } while (unlikely(err == -EEXIST));
+
+       return nfm;
+}
+
+static struct nfsd_file *
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+               struct net *net)
+{
+       struct nfsd_file *nf;
+
+       nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+       if (nf) {
+               INIT_HLIST_NODE(&nf->nf_node);
+               INIT_LIST_HEAD(&nf->nf_lru);
+               nf->nf_file = NULL;
+               nf->nf_cred = get_current_cred();
+               nf->nf_net = net;
+               nf->nf_flags = 0;
+               nf->nf_inode = inode;
+               nf->nf_hashval = hashval;
+               atomic_set(&nf->nf_ref, 1);
+               nf->nf_may = may & NFSD_FILE_MAY_MASK;
+               if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+                       if (may & NFSD_MAY_WRITE)
+                               __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+                       if (may & NFSD_MAY_READ)
+                               __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+               }
+               nf->nf_mark = NULL;
+               trace_nfsd_file_alloc(nf);
+       }
+       return nf;
+}
+
+static bool
+nfsd_file_free(struct nfsd_file *nf)
+{
+       bool flush = false;
+
+       trace_nfsd_file_put_final(nf);
+       if (nf->nf_mark)
+               nfsd_file_mark_put(nf->nf_mark);
+       if (nf->nf_file) {
+               get_file(nf->nf_file);
+               filp_close(nf->nf_file, NULL);
+               fput(nf->nf_file);
+               flush = true;
+       }
+       call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+       return flush;
+}
+
+static bool
+nfsd_file_check_writeback(struct nfsd_file *nf)
+{
+       struct file *file = nf->nf_file;
+       struct address_space *mapping;
+
+       if (!file || !(file->f_mode & FMODE_WRITE))
+               return false;
+       mapping = file->f_mapping;
+       return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+               mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+}
+
+static int
+nfsd_file_check_write_error(struct nfsd_file *nf)
+{
+       struct file *file = nf->nf_file;
+
+       if (!file || !(file->f_mode & FMODE_WRITE))
+               return 0;
+       return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+}
+
+static bool
+nfsd_file_in_use(struct nfsd_file *nf)
+{
+       return nfsd_file_check_writeback(nf) ||
+                       nfsd_file_check_write_error(nf);
+}
+
+static void
+nfsd_file_do_unhash(struct nfsd_file *nf)
+{
+       lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+       trace_nfsd_file_unhash(nf);
+
+       if (nfsd_file_check_write_error(nf))
+               nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+       --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+       hlist_del_rcu(&nf->nf_node);
+       if (!list_empty(&nf->nf_lru))
+               list_lru_del(&nfsd_file_lru, &nf->nf_lru);
+       atomic_long_dec(&nfsd_filecache_count);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+       if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+               nfsd_file_do_unhash(nf);
+               return true;
+       }
+       return false;
+}
+
+/*
+ * Return true if the file was unhashed.
+ */
+static bool
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+{
+       lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+       trace_nfsd_file_unhash_and_release_locked(nf);
+       if (!nfsd_file_unhash(nf))
+               return false;
+       /* keep final reference for nfsd_file_lru_dispose */
+       if (atomic_add_unless(&nf->nf_ref, -1, 1))
+               return true;
+
+       list_add(&nf->nf_lru, dispose);
+       return true;
+}
+
+static int
+nfsd_file_put_noref(struct nfsd_file *nf)
+{
+       int count;
+       trace_nfsd_file_put(nf);
+
+       count = atomic_dec_return(&nf->nf_ref);
+       if (!count) {
+               WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+               nfsd_file_free(nf);
+       }
+       return count;
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+       bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+       bool unused = !nfsd_file_in_use(nf);
+
+       set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+       if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
+               nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+       if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+               return nf;
+       return NULL;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+       struct nfsd_file *nf;
+
+       while(!list_empty(dispose)) {
+               nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+               list_del(&nf->nf_lru);
+               nfsd_file_put_noref(nf);
+       }
+}
+
+static void
+nfsd_file_dispose_list_sync(struct list_head *dispose)
+{
+       bool flush = false;
+       struct nfsd_file *nf;
+
+       while(!list_empty(dispose)) {
+               nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+               list_del(&nf->nf_lru);
+               if (!atomic_dec_and_test(&nf->nf_ref))
+                       continue;
+               if (nfsd_file_free(nf))
+                       flush = true;
+       }
+       if (flush)
+               flush_delayed_fput();
+}
+
+/*
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static enum lru_status
+nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+                spinlock_t *lock, void *arg)
+       __releases(lock)
+       __acquires(lock)
+{
+       struct list_head *head = arg;
+       struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+       /*
+        * Do a lockless refcount check. The hashtable holds one reference, so
+        * we look to see if anything else has a reference, or if any have
+        * been put since the shrinker last ran. Those don't get unhashed and
+        * released.
+        *
+        * Note that in the put path, we set the flag and then decrement the
+        * counter. Here we check the counter and then test and clear the flag.
+        * That order is deliberate to ensure that we can do this locklessly.
+        */
+       if (atomic_read(&nf->nf_ref) > 1)
+               goto out_skip;
+
+       /*
+        * Don't throw out files that are still undergoing I/O or
+        * that have uncleared errors pending.
+        */
+       if (nfsd_file_check_writeback(nf))
+               goto out_skip;
+
+       if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+               goto out_rescan;
+
+       if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+               goto out_skip;
+
+       list_lru_isolate_move(lru, &nf->nf_lru, head);
+       return LRU_REMOVED;
+out_rescan:
+       set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
+out_skip:
+       return LRU_SKIP;
+}
+
+static void
+nfsd_file_lru_dispose(struct list_head *head)
+{
+       while(!list_empty(head)) {
+               struct nfsd_file *nf = list_first_entry(head,
+                               struct nfsd_file, nf_lru);
+               list_del_init(&nf->nf_lru);
+               spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+               nfsd_file_do_unhash(nf);
+               spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+               nfsd_file_put_noref(nf);
+       }
+}
+
+static unsigned long
+nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+{
+       return list_lru_count(&nfsd_file_lru);
+}
+
+static unsigned long
+nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+{
+       LIST_HEAD(head);
+       unsigned long ret;
+
+       ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
+       nfsd_file_lru_dispose(&head);
+       return ret;
+}
+
+static struct shrinker nfsd_file_shrinker = {
+       .scan_objects = nfsd_file_lru_scan,
+       .count_objects = nfsd_file_lru_count,
+       .seeks = 1,
+};
+
+static void
+__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+                       struct list_head *dispose)
+{
+       struct nfsd_file        *nf;
+       struct hlist_node       *tmp;
+
+       spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+               if (inode == nf->nf_inode)
+                       nfsd_file_unhash_and_release_locked(nf, dispose);
+       }
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put. Also ensure that any of the
+ * fputs also have their final __fput done as well.
+ */
+void
+nfsd_file_close_inode_sync(struct inode *inode)
+{
+       unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
+                                               NFSD_FILE_HASH_BITS);
+       LIST_HEAD(dispose);
+
+       __nfsd_file_close_inode(inode, hashval, &dispose);
+       trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+       nfsd_file_dispose_list_sync(&dispose);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+       unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
+                                               NFSD_FILE_HASH_BITS);
+       LIST_HEAD(dispose);
+
+       __nfsd_file_close_inode(inode, hashval, &dispose);
+       trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+       nfsd_file_dispose_list(&dispose);
+}
+
+/**
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+ * Walk the LRU list and close any entries that have not been used since
+ * the last scan.
+ *
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static void
+nfsd_file_delayed_close(struct work_struct *work)
+{
+       LIST_HEAD(head);
+
+       list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
+
+       if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
+               nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
+
+       if (!list_empty(&head)) {
+               nfsd_file_lru_dispose(&head);
+               flush_delayed_fput();
+       }
+}
+
+static int
+nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+                           void *data)
+{
+       struct file_lock *fl = data;
+
+       /* Only close files for F_SETLEASE leases */
+       if (fl->fl_flags & FL_LEASE)
+               nfsd_file_close_inode_sync(file_inode(fl->fl_file));
+       return 0;
+}
+
+static struct notifier_block nfsd_file_lease_notifier = {
+       .notifier_call = nfsd_file_lease_notifier_call,
+};
+
+static int
+nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
+                               struct inode *inode,
+                               u32 mask, const void *data, int data_type,
+                               const struct qstr *file_name, u32 cookie,
+                               struct fsnotify_iter_info *iter_info)
+{
+       trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+       /* Should be no marks on non-regular files */
+       if (!S_ISREG(inode->i_mode)) {
+               WARN_ON_ONCE(1);
+               return 0;
+       }
+
+       /* don't close files if this was not the last link */
+       if (mask & FS_ATTRIB) {
+               if (inode->i_nlink)
+                       return 0;
+       }
+
+       nfsd_file_close_inode(inode);
+       return 0;
+}
+
+
+static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+       .handle_event = nfsd_file_fsnotify_handle_event,
+       .free_mark = nfsd_file_mark_free,
+};
+
+int
+nfsd_file_cache_init(void)
+{
+       int             ret = -ENOMEM;
+       unsigned int    i;
+
+       clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+       if (nfsd_file_hashtbl)
+               return 0;
+
+       nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+                               sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+       if (!nfsd_file_hashtbl) {
+               pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+               goto out_err;
+       }
+
+       nfsd_file_slab = kmem_cache_create("nfsd_file",
+                               sizeof(struct nfsd_file), 0, 0, NULL);
+       if (!nfsd_file_slab) {
+               pr_err("nfsd: unable to create nfsd_file_slab\n");
+               goto out_err;
+       }
+
+       nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
+                                       sizeof(struct nfsd_file_mark), 0, 0, NULL);
+       if (!nfsd_file_mark_slab) {
+               pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
+               goto out_err;
+       }
+
+
+       ret = list_lru_init(&nfsd_file_lru);
+       if (ret) {
+               pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
+               goto out_err;
+       }
+
+       ret = register_shrinker(&nfsd_file_shrinker);
+       if (ret) {
+               pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
+               goto out_lru;
+       }
+
+       ret = lease_register_notifier(&nfsd_file_lease_notifier);
+       if (ret) {
+               pr_err("nfsd: unable to register lease notifier: %d\n", ret);
+               goto out_shrinker;
+       }
+
+       nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
+       if (IS_ERR(nfsd_file_fsnotify_group)) {
+               pr_err("nfsd: unable to create fsnotify group: %ld\n",
+                       PTR_ERR(nfsd_file_fsnotify_group));
+               nfsd_file_fsnotify_group = NULL;
+               goto out_notifier;
+       }
+
+       for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+               INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+               spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+       }
+
+       INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
+out:
+       return ret;
+out_notifier:
+       lease_unregister_notifier(&nfsd_file_lease_notifier);
+out_shrinker:
+       unregister_shrinker(&nfsd_file_shrinker);
+out_lru:
+       list_lru_destroy(&nfsd_file_lru);
+out_err:
+       kmem_cache_destroy(nfsd_file_slab);
+       nfsd_file_slab = NULL;
+       kmem_cache_destroy(nfsd_file_mark_slab);
+       nfsd_file_mark_slab = NULL;
+       kfree(nfsd_file_hashtbl);
+       nfsd_file_hashtbl = NULL;
+       goto out;
+}
+
+/*
+ * Note this can deadlock with nfsd_file_lru_cb.
+ */
+void
+nfsd_file_cache_purge(struct net *net)
+{
+       unsigned int            i;
+       struct nfsd_file        *nf;
+       struct hlist_node       *next;
+       LIST_HEAD(dispose);
+       bool del;
+
+       if (!nfsd_file_hashtbl)
+               return;
+
+       for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+               struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+
+               spin_lock(&nfb->nfb_lock);
+               hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
+                       if (net && nf->nf_net != net)
+                               continue;
+                       del = nfsd_file_unhash_and_release_locked(nf, &dispose);
+
+                       /*
+                        * Deadlock detected! Something marked this entry as
+                        * unhased, but hasn't removed it from the hash list.
+                        */
+                       WARN_ON_ONCE(!del);
+               }
+               spin_unlock(&nfb->nfb_lock);
+               nfsd_file_dispose_list(&dispose);
+       }
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+       LIST_HEAD(dispose);
+
+       set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+       lease_unregister_notifier(&nfsd_file_lease_notifier);
+       unregister_shrinker(&nfsd_file_shrinker);
+       /*
+        * make sure all callers of nfsd_file_lru_cb are done before
+        * calling nfsd_file_cache_purge
+        */
+       cancel_delayed_work_sync(&nfsd_filecache_laundrette);
+       nfsd_file_cache_purge(NULL);
+       list_lru_destroy(&nfsd_file_lru);
+       rcu_barrier();
+       fsnotify_put_group(nfsd_file_fsnotify_group);
+       nfsd_file_fsnotify_group = NULL;
+       kmem_cache_destroy(nfsd_file_slab);
+       nfsd_file_slab = NULL;
+       fsnotify_wait_marks_destroyed();
+       kmem_cache_destroy(nfsd_file_mark_slab);
+       nfsd_file_mark_slab = NULL;
+       kfree(nfsd_file_hashtbl);
+       nfsd_file_hashtbl = NULL;
+}
+
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+       int i;
+
+       if (!uid_eq(c1->fsuid, c2->fsuid))
+               return false;
+       if (!gid_eq(c1->fsgid, c2->fsgid))
+               return false;
+       if (c1->group_info == NULL || c2->group_info == NULL)
+               return c1->group_info == c2->group_info;
+       if (c1->group_info->ngroups != c2->group_info->ngroups)
+               return false;
+       for (i = 0; i < c1->group_info->ngroups; i++) {
+               if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+                       return false;
+       }
+       return true;
+}
+
+static struct nfsd_file *
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+                       unsigned int hashval, struct net *net)
+{
+       struct nfsd_file *nf;
+       unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+
+       hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+                                nf_node) {
+               if ((need & nf->nf_may) != need)
+                       continue;
+               if (nf->nf_inode != inode)
+                       continue;
+               if (nf->nf_net != net)
+                       continue;
+               if (!nfsd_match_cred(nf->nf_cred, current_cred()))
+                       continue;
+               if (nfsd_file_get(nf) != NULL)
+                       return nf;
+       }
+       return NULL;
+}
+
+/**
+ * nfsd_file_is_cached - are there any cached open files for this fh?
+ * @inode: inode of the file to check
+ *
+ * Scan the hashtable for open files that match this fh. Returns true if there
+ * are any, and false if not.
+ */
+bool
+nfsd_file_is_cached(struct inode *inode)
+{
+       bool                    ret = false;
+       struct nfsd_file        *nf;
+       unsigned int            hashval;
+
+        hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+                                nf_node) {
+               if (inode == nf->nf_inode) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       trace_nfsd_file_is_cached(inode, hashval, (int)ret);
+       return ret;
+}
+
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                 unsigned int may_flags, struct nfsd_file **pnf)
+{
+       __be32  status;
+       struct net *net = SVC_NET(rqstp);
+       struct nfsd_file *nf, *new;
+       struct inode *inode;
+       unsigned int hashval;
+
+       /* FIXME: skip this if fh_dentry is already set? */
+       status = fh_verify(rqstp, fhp, S_IFREG,
+                               may_flags|NFSD_MAY_OWNER_OVERRIDE);
+       if (status != nfs_ok)
+               return status;
+
+       inode = d_inode(fhp->fh_dentry);
+       hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+retry:
+       rcu_read_lock();
+       nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+       rcu_read_unlock();
+       if (nf)
+               goto wait_for_construction;
+
+       new = nfsd_file_alloc(inode, may_flags, hashval, net);
+       if (!new) {
+               trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
+                                       NULL, nfserr_jukebox);
+               return nfserr_jukebox;
+       }
+
+       spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+       if (nf == NULL)
+               goto open_file;
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       nfsd_file_slab_free(&new->nf_rcu);
+
+wait_for_construction:
+       wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+       /* Did construction of this file fail? */
+       if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+               nfsd_file_put_noref(nf);
+               goto retry;
+       }
+
+       this_cpu_inc(nfsd_file_cache_hits);
+
+       if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+               bool write = (may_flags & NFSD_MAY_WRITE);
+
+               if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+                   (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+                       status = nfserrno(nfsd_open_break_lease(
+                                       file_inode(nf->nf_file), may_flags));
+                       if (status == nfs_ok) {
+                               clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+                               if (write)
+                                       clear_bit(NFSD_FILE_BREAK_WRITE,
+                                                 &nf->nf_flags);
+                       }
+               }
+       }
+out:
+       if (status == nfs_ok) {
+               *pnf = nf;
+       } else {
+               nfsd_file_put(nf);
+               nf = NULL;
+       }
+
+       trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
+       return status;
+open_file:
+       nf = new;
+       /* Take reference for the hashtable */
+       atomic_inc(&nf->nf_ref);
+       __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+       __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+       list_lru_add(&nfsd_file_lru, &nf->nf_lru);
+       hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+       ++nfsd_file_hashtbl[hashval].nfb_count;
+       nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+                       nfsd_file_hashtbl[hashval].nfb_count);
+       spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+       atomic_long_inc(&nfsd_filecache_count);
+
+       nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+       if (nf->nf_mark)
+               status = nfsd_open_verified(rqstp, fhp, S_IFREG,
+                               may_flags, &nf->nf_file);
+       else
+               status = nfserr_jukebox;
+       /*
+        * If construction failed, or we raced with a call to unlink()
+        * then unhash.
+        */
+       if (status != nfs_ok || inode->i_nlink == 0) {
+               bool do_free;
+               spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+               do_free = nfsd_file_unhash(nf);
+               spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+               if (do_free)
+                       nfsd_file_put_noref(nf);
+       }
+       clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+       smp_mb__after_atomic();
+       wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+       goto out;
+}
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+{
+       unsigned int i, count = 0, longest = 0;
+       unsigned long hits = 0;
+
+       /*
+        * No need for spinlocks here since we're not terribly interested in
+        * accuracy. We do take the nfsd_mutex simply to ensure that we
+        * don't end up racing with server shutdown
+        */
+       mutex_lock(&nfsd_mutex);
+       if (nfsd_file_hashtbl) {
+               for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+                       count += nfsd_file_hashtbl[i].nfb_count;
+                       longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+               }
+       }
+       mutex_unlock(&nfsd_mutex);
+
+       for_each_possible_cpu(i)
+               hits += per_cpu(nfsd_file_cache_hits, i);
+
+       seq_printf(m, "total entries: %u\n", count);
+       seq_printf(m, "longest chain: %u\n", longest);
+       seq_printf(m, "cache hits:    %lu\n", hits);
+       return 0;
+}
+
+int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, nfsd_file_cache_stats_show, NULL);
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644 (file)
index 0000000..851d9ab
--- /dev/null
@@ -0,0 +1,61 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+
+#include <linux/fsnotify_backend.h>
+
+/*
+ * This is the fsnotify_mark container that nfsd attaches to the files that it
+ * is holding open. Note that we have a separate refcount here aside from the
+ * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
+ * the inode, and for each nfsd_file to hold a reference to it.
+ *
+ * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
+ * how to put that reference. If there are still outstanding nfsd_files that
+ * reference the mark, then we would want to call fsnotify_put_mark on it.
+ * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
+ * can't really tell the difference, we use the nfm_mark to keep track of how
+ * many nfsd_files hold references to the mark. When that counter goes to zero
+ * then we know to call fsnotify_destroy_mark on it.
+ */
+struct nfsd_file_mark {
+       struct fsnotify_mark    nfm_mark;
+       atomic_t                nfm_ref;
+};
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+struct nfsd_file {
+       struct hlist_node       nf_node;
+       struct list_head        nf_lru;
+       struct rcu_head         nf_rcu;
+       struct file             *nf_file;
+       const struct cred       *nf_cred;
+       struct net              *nf_net;
+#define NFSD_FILE_HASHED       (0)
+#define NFSD_FILE_PENDING      (1)
+#define NFSD_FILE_BREAK_READ   (2)
+#define NFSD_FILE_BREAK_WRITE  (3)
+#define NFSD_FILE_REFERENCED   (4)
+       unsigned long           nf_flags;
+       struct inode            *nf_inode;
+       unsigned int            nf_hashval;
+       atomic_t                nf_ref;
+       unsigned char           nf_may;
+       struct nfsd_file_mark   *nf_mark;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_purge(struct net *);
+void nfsd_file_cache_shutdown(void);
+void nfsd_file_put(struct nfsd_file *nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+void nfsd_file_close_inode_sync(struct inode *inode);
+bool nfsd_file_is_cached(struct inode *inode);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                 unsigned int may_flags, struct nfsd_file **nfp);
+int    nfsd_file_cache_stats_open(struct inode *, struct file *);
+#endif /* _FS_NFSD_FILECACHE_H */
index bdfe5bcb3dcd0430a6a33f0738a41be75eb157dc..9a4ef815fb8c1a29cc5ba046b3ce170707aefd11 100644 (file)
@@ -104,6 +104,7 @@ struct nfsd_net {
 
        /* Time of server startup */
        struct timespec64 nfssvc_boot;
+       seqlock_t boot_lock;
 
        /*
         * Max number of connections this nfsd container will allow. Defaults
@@ -179,4 +180,7 @@ struct nfsd_net {
 extern void nfsd_netns_free_versions(struct nfsd_net *nn);
 
 extern unsigned int nfsd_net_id;
+
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_boot_verifier(struct nfsd_net *nn);
 #endif /* __NFSD_NETNS_H__ */
index 9bc32af4e2daff14817c4306833009c1d9ab92aa..cea68d8411ac520a02f671ef418027ce5064b831 100644 (file)
@@ -172,13 +172,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
        nfserr = nfsd_read(rqstp, &resp->fh,
                                  argp->offset,
                                  rqstp->rq_vec, argp->vlen,
-                                 &resp->count);
-       if (nfserr == 0) {
-               struct inode    *inode = d_inode(resp->fh.fh_dentry);
-               resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
-                                                       inode->i_size);
-       }
-
+                                 &resp->count,
+                                 &resp->eof);
        RETURN_STATUS(nfserr);
 }
 
index fcf31822c74c0b6e04616e45fbe915eacd9fc3ac..86e5658651f1066e359949571deb0c8ee9eb0a77 100644 (file)
@@ -27,6 +27,7 @@ static u32    nfs3_ftypes[] = {
        NF3SOCK, NF3BAD,  NF3LNK, NF3BAD,
 };
 
+
 /*
  * XDR functions for basic NFS types
  */
@@ -751,14 +752,16 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_writeres *resp = rqstp->rq_resp;
        struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       __be32 verf[2];
 
        p = encode_wcc_data(rqstp, p, &resp->fh);
        if (resp->status == 0) {
                *p++ = htonl(resp->count);
                *p++ = htonl(resp->committed);
                /* unique identifier, y2038 overflow can be ignored */
-               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+               nfsd_copy_boot_verifier(verf, nn);
+               *p++ = verf[0];
+               *p++ = verf[1];
        }
        return xdr_ressize_check(rqstp, p);
 }
@@ -1125,13 +1128,15 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_commitres *resp = rqstp->rq_resp;
        struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       __be32 verf[2];
 
        p = encode_wcc_data(rqstp, p, &resp->fh);
        /* Write verifier */
        if (resp->status == 0) {
                /* unique identifier, y2038 overflow can be ignored */
-               *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
-               *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+               nfsd_copy_boot_verifier(verf, nn);
+               *p++ = verf[0];
+               *p++ = verf[1];
        }
        return xdr_ressize_check(rqstp, p);
 }
index 397eb7820929b31f8827adc74c5aa1d8540f32d9..524111420b48c2deb9de7a536bd0eeae43fb88d2 100644 (file)
@@ -512,11 +512,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb != NULL) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
 
        return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
@@ -604,11 +602,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
 }
 #endif /* CONFIG_NFSD_PNFS */
@@ -663,11 +660,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
 }
 
@@ -759,11 +755,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
        if (unlikely(status))
                return status;
 
-       if (cb) {
-               status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_seq_status))
-                       return status;
-       }
+       status = decode_cb_sequence4res(xdr, cb);
+       if (unlikely(status || cb->cb_seq_status))
+               return status;
+
        return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
 }
 /*
index a79e24b79095a26f68aa9dee1fd91db400ff243f..2681c70283ce24b25166f6edd9d033197001b453 100644 (file)
@@ -169,8 +169,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
        spin_unlock(&fp->fi_lock);
 
        if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
-               vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
-       fput(ls->ls_file);
+               vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
+       nfsd_file_put(ls->ls_file);
 
        if (ls->ls_recalled)
                atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -197,7 +197,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
        fl->fl_end = OFFSET_MAX;
        fl->fl_owner = ls;
        fl->fl_pid = current->tgid;
-       fl->fl_file = ls->ls_file;
+       fl->fl_file = ls->ls_file->nf_file;
 
        status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
        if (status) {
@@ -236,13 +236,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
                        NFSPROC4_CLNT_CB_LAYOUT);
 
        if (parent->sc_type == NFS4_DELEG_STID)
-               ls->ls_file = get_file(fp->fi_deleg_file);
+               ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
        else
                ls->ls_file = find_any_file(fp);
        BUG_ON(!ls->ls_file);
 
        if (nfsd4_layout_setlease(ls)) {
-               fput(ls->ls_file);
+               nfsd_file_put(ls->ls_file);
                put_nfs4_file(fp);
                kmem_cache_free(nfs4_layout_stateid_cache, ls);
                return NULL;
@@ -626,7 +626,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
 
        argv[0] = (char *)nfsd_recall_failed;
        argv[1] = addr_str;
-       argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
+       argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
        argv[3] = NULL;
 
        error = call_usermodehelper(nfsd_recall_failed, argv, envp,
index 8beda999e1346e8917bc3b624f33fed22e710d7b..4e3e77b7641157ea6425535319500eb87dd7546b 100644 (file)
@@ -568,17 +568,11 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 {
-       __be32 verf[2];
-       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+       __be32 *verf = (__be32 *)verifier->data;
 
-       /*
-        * This is opaque to client, so no need to byte-swap. Use
-        * __force to keep sparse happy. y2038 time_t overflow is
-        * irrelevant in this usage.
-        */
-       verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
-       verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
-       memcpy(verifier->data, verf, sizeof(verifier->data));
+       BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
+
+       nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
 }
 
 static __be32
@@ -761,7 +755,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_read *read = &u->read;
        __be32 status;
 
-       read->rd_filp = NULL;
+       read->rd_nf = NULL;
        if (read->rd_offset >= OFFSET_MAX)
                return nfserr_inval;
 
@@ -782,7 +776,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        /* check stateid */
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                        &read->rd_stateid, RD_STATE,
-                                       &read->rd_filp, &read->rd_tmp_file);
+                                       &read->rd_nf);
        if (status) {
                dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
                goto out;
@@ -798,8 +792,8 @@ out:
 static void
 nfsd4_read_release(union nfsd4_op_u *u)
 {
-       if (u->read.rd_filp)
-               fput(u->read.rd_filp);
+       if (u->read.rd_nf)
+               nfsd_file_put(u->read.rd_nf);
        trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
                             u->read.rd_offset, u->read.rd_length);
 }
@@ -954,7 +948,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
                status = nfs4_preprocess_stateid_op(rqstp, cstate,
                                &cstate->current_fh, &setattr->sa_stateid,
-                               WR_STATE, NULL, NULL);
+                               WR_STATE, NULL);
                if (status) {
                        dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
                        return status;
@@ -993,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        struct nfsd4_write *write = &u->write;
        stateid_t *stateid = &write->wr_stateid;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        __be32 status = nfs_ok;
        unsigned long cnt;
        int nvecs;
@@ -1005,7 +999,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        trace_nfsd_write_start(rqstp, &cstate->current_fh,
                               write->wr_offset, cnt);
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-                                               stateid, WR_STATE, &filp, NULL);
+                                               stateid, WR_STATE, &nf);
        if (status) {
                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
                return status;
@@ -1018,10 +1012,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                      &write->wr_head, write->wr_buflen);
        WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
-       status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+       status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
                                write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
                                write->wr_how_written);
-       fput(filp);
+       nfsd_file_put(nf);
 
        write->wr_bytes_written = cnt;
        trace_nfsd_write_done(rqstp, &cstate->current_fh,
@@ -1031,8 +1025,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-                 stateid_t *src_stateid, struct file **src,
-                 stateid_t *dst_stateid, struct file **dst)
+                 stateid_t *src_stateid, struct nfsd_file **src,
+                 stateid_t *dst_stateid, struct nfsd_file **dst)
 {
        __be32 status;
 
@@ -1040,22 +1034,22 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                return nfserr_nofilehandle;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
-                                           src_stateid, RD_STATE, src, NULL);
+                                           src_stateid, RD_STATE, src);
        if (status) {
                dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
                goto out;
        }
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
-                                           dst_stateid, WR_STATE, dst, NULL);
+                                           dst_stateid, WR_STATE, dst);
        if (status) {
                dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
                goto out_put_src;
        }
 
        /* fix up for NFS-specific error code */
-       if (!S_ISREG(file_inode(*src)->i_mode) ||
-           !S_ISREG(file_inode(*dst)->i_mode)) {
+       if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+           !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
                status = nfserr_wrong_type;
                goto out_put_dst;
        }
@@ -1063,9 +1057,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
        return status;
 out_put_dst:
-       fput(*dst);
+       nfsd_file_put(*dst);
 out_put_src:
-       fput(*src);
+       nfsd_file_put(*src);
        goto out;
 }
 
@@ -1074,7 +1068,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                union nfsd4_op_u *u)
 {
        struct nfsd4_clone *clone = &u->clone;
-       struct file *src, *dst;
+       struct nfsd_file *src, *dst;
        __be32 status;
 
        status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@@ -1082,11 +1076,11 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (status)
                goto out;
 
-       status = nfsd4_clone_file_range(src, clone->cl_src_pos,
-                       dst, clone->cl_dst_pos, clone->cl_count);
+       status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
+                       dst->nf_file, clone->cl_dst_pos, clone->cl_count);
 
-       fput(dst);
-       fput(src);
+       nfsd_file_put(dst);
+       nfsd_file_put(src);
 out:
        return status;
 }
@@ -1176,8 +1170,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
        do {
                if (kthread_should_stop())
                        break;
-               bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos,
-                               copy->file_dst, dst_pos, bytes_total);
+               bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
+                               src_pos, copy->nf_dst->nf_file, dst_pos,
+                               bytes_total);
                if (bytes_copied <= 0)
                        break;
                bytes_total -= bytes_copied;
@@ -1204,8 +1199,8 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
                status = nfs_ok;
        }
 
-       fput(copy->file_src);
-       fput(copy->file_dst);
+       nfsd_file_put(copy->nf_src);
+       nfsd_file_put(copy->nf_dst);
        return status;
 }
 
@@ -1218,16 +1213,16 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
        memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
        memcpy(&dst->fh, &src->fh, sizeof(src->fh));
        dst->cp_clp = src->cp_clp;
-       dst->file_dst = get_file(src->file_dst);
-       dst->file_src = get_file(src->file_src);
+       dst->nf_dst = nfsd_file_get(src->nf_dst);
+       dst->nf_src = nfsd_file_get(src->nf_src);
        memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
 }
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
        nfs4_free_cp_state(copy);
-       fput(copy->file_dst);
-       fput(copy->file_src);
+       nfsd_file_put(copy->nf_dst);
+       nfsd_file_put(copy->nf_src);
        spin_lock(&copy->cp_clp->async_lock);
        list_del(&copy->copies);
        spin_unlock(&copy->cp_clp->async_lock);
@@ -1264,8 +1259,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_copy *async_copy = NULL;
 
        status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
-                                  &copy->file_src, &copy->cp_dst_stateid,
-                                  &copy->file_dst);
+                                  &copy->nf_src, &copy->cp_dst_stateid,
+                                  &copy->nf_dst);
        if (status)
                goto out;
 
@@ -1347,21 +1342,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                struct nfsd4_fallocate *fallocate, int flags)
 {
        __be32 status;
-       struct file *file;
+       struct nfsd_file *nf;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &fallocate->falloc_stateid,
-                                           WR_STATE, &file, NULL);
+                                           WR_STATE, &nf);
        if (status != nfs_ok) {
                dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
                return status;
        }
 
-       status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+       status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
                                     fallocate->falloc_offset,
                                     fallocate->falloc_length,
                                     flags);
-       fput(file);
+       nfsd_file_put(nf);
        return status;
 }
 static __be32
@@ -1406,11 +1401,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfsd4_seek *seek = &u->seek;
        int whence;
        __be32 status;
-       struct file *file;
+       struct nfsd_file *nf;
 
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &seek->seek_stateid,
-                                           RD_STATE, &file, NULL);
+                                           RD_STATE, &nf);
        if (status) {
                dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
                return status;
@@ -1432,14 +1427,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         * Note:  This call does change file->f_pos, but nothing in NFSD
         *        should ever file->f_pos.
         */
-       seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+       seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
        if (seek->seek_pos < 0)
                status = nfserrno(seek->seek_pos);
-       else if (seek->seek_pos >= i_size_read(file_inode(file)))
+       else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
                seek->seek_eof = true;
 
 out:
-       fput(file);
+       nfsd_file_put(nf);
        return status;
 }
 
index 87679557d0d6a1e05aa41ebbe7b1926fb3ae95bf..cdc75ad4438b6442fb8bf76b9687b662f3a44bc6 100644 (file)
@@ -59,8 +59,13 @@ struct nfsd4_client_tracking_ops {
        void (*remove)(struct nfs4_client *);
        int (*check)(struct nfs4_client *);
        void (*grace_done)(struct nfsd_net *);
+       uint8_t version;
+       size_t msglen;
 };
 
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
+
 /* Globals */
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 
@@ -173,6 +178,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
                const char *dname, int len, struct nfsd_net *nn)
 {
        struct xdr_netobj name;
+       struct xdr_netobj princhash = { .len = 0, .data = NULL };
        struct nfs4_client_reclaim *crp;
 
        name.data = kmemdup(dname, len, GFP_KERNEL);
@@ -182,7 +188,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
                return;
        }
        name.len = len;
-       crp = nfs4_client_to_reclaim(name, nn);
+       crp = nfs4_client_to_reclaim(name, princhash, nn);
        if (!crp) {
                kfree(name.data);
                return;
@@ -482,6 +488,7 @@ static int
 load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
        struct xdr_netobj name;
+       struct xdr_netobj princhash = { .len = 0, .data = NULL };
 
        if (child->d_name.len != HEXDIR_LEN - 1) {
                printk("%s: illegal name %pd in recovery directory\n",
@@ -496,7 +503,7 @@ load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
                goto out;
        }
        name.len = HEXDIR_LEN;
-       if (!nfs4_client_to_reclaim(name, nn))
+       if (!nfs4_client_to_reclaim(name, princhash, nn))
                kfree(name.data);
 out:
        return 0;
@@ -718,6 +725,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
        .remove         = nfsd4_remove_clid_dir,
        .check          = nfsd4_check_legacy_client,
        .grace_done     = nfsd4_recdir_purge_old,
+       .version        = 1,
+       .msglen         = 0,
 };
 
 /* Globals */
@@ -731,25 +740,32 @@ struct cld_net {
        struct list_head         cn_list;
        unsigned int             cn_xid;
        bool                     cn_has_legacy;
+       struct crypto_shash     *cn_tfm;
 };
 
 struct cld_upcall {
        struct list_head         cu_list;
        struct cld_net          *cu_net;
        struct completion        cu_done;
-       struct cld_msg           cu_msg;
+       union {
+               struct cld_msg_hdr       cu_hdr;
+               struct cld_msg           cu_msg;
+               struct cld_msg_v2        cu_msg_v2;
+       } cu_u;
 };
 
 static int
-__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
        int ret;
        struct rpc_pipe_msg msg;
-       struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
+       struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
+       struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
+                                         nfsd_net_id);
 
        memset(&msg, 0, sizeof(msg));
        msg.data = cmsg;
-       msg.len = sizeof(*cmsg);
+       msg.len = nn->client_tracking_ops->msglen;
 
        ret = rpc_queue_upcall(pipe, &msg);
        if (ret < 0) {
@@ -765,7 +781,7 @@ out:
 }
 
 static int
-cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
        int ret;
 
@@ -781,11 +797,11 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
 }
 
 static ssize_t
-__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
                struct nfsd_net *nn)
 {
-       uint8_t cmd;
-       struct xdr_netobj name;
+       uint8_t cmd, princhashlen;
+       struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
        uint16_t namelen;
        struct cld_net *cn = nn->cld_net;
 
@@ -794,22 +810,48 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
                return -EFAULT;
        }
        if (cmd == Cld_GraceStart) {
-               if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
-                       return -EFAULT;
-               name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
-               if (IS_ERR_OR_NULL(name.data))
-                       return -EFAULT;
-               name.len = namelen;
+               if (nn->client_tracking_ops->version >= 2) {
+                       const struct cld_clntinfo __user *ci;
+
+                       ci = &cmsg->cm_u.cm_clntinfo;
+                       if (get_user(namelen, &ci->cc_name.cn_len))
+                               return -EFAULT;
+                       name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+                       if (IS_ERR_OR_NULL(name.data))
+                               return -EFAULT;
+                       name.len = namelen;
+                       get_user(princhashlen, &ci->cc_princhash.cp_len);
+                       if (princhashlen > 0) {
+                               princhash.data = memdup_user(
+                                               &ci->cc_princhash.cp_data,
+                                               princhashlen);
+                               if (IS_ERR_OR_NULL(princhash.data))
+                                       return -EFAULT;
+                               princhash.len = princhashlen;
+                       } else
+                               princhash.len = 0;
+               } else {
+                       const struct cld_name __user *cnm;
+
+                       cnm = &cmsg->cm_u.cm_name;
+                       if (get_user(namelen, &cnm->cn_len))
+                               return -EFAULT;
+                       name.data = memdup_user(&cnm->cn_id, namelen);
+                       if (IS_ERR_OR_NULL(name.data))
+                               return -EFAULT;
+                       name.len = namelen;
+               }
                if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
                        name.len = name.len - 5;
                        memmove(name.data, name.data + 5, name.len);
                        cn->cn_has_legacy = true;
                }
-               if (!nfs4_client_to_reclaim(name, nn)) {
+               if (!nfs4_client_to_reclaim(name, princhash, nn)) {
                        kfree(name.data);
+                       kfree(princhash.data);
                        return -EFAULT;
                }
-               return sizeof(*cmsg);
+               return nn->client_tracking_ops->msglen;
        }
        return -EFAULT;
 }
@@ -818,21 +860,22 @@ static ssize_t
 cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
        struct cld_upcall *tmp, *cup;
-       struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
+       struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
+       struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
        uint32_t xid;
        struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
                                                nfsd_net_id);
        struct cld_net *cn = nn->cld_net;
        int16_t status;
 
-       if (mlen != sizeof(*cmsg)) {
+       if (mlen != nn->client_tracking_ops->msglen) {
                dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
-                       sizeof(*cmsg));
+                       nn->client_tracking_ops->msglen);
                return -EINVAL;
        }
 
        /* copy just the xid so we can try to find that */
-       if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+       if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
                dprintk("%s: error when copying xid from userspace", __func__);
                return -EFAULT;
        }
@@ -842,7 +885,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         * list (for -EINPROGRESS, we just want to make sure the xid is
         * valid, not remove the upcall from the list)
         */
-       if (get_user(status, &cmsg->cm_status)) {
+       if (get_user(status, &hdr->cm_status)) {
                dprintk("%s: error when copying status from userspace", __func__);
                return -EFAULT;
        }
@@ -851,7 +894,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        cup = NULL;
        spin_lock(&cn->cn_lock);
        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-               if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+               if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
                        cup = tmp;
                        if (status != -EINPROGRESS)
                                list_del_init(&cup->cu_list);
@@ -869,7 +912,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        if (status == -EINPROGRESS)
                return __cld_pipe_inprogress_downcall(cmsg, nn);
 
-       if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+       if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
                return -EFAULT;
 
        complete(&cup->cu_done);
@@ -881,7 +924,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 {
        struct cld_msg *cmsg = msg->data;
        struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
-                                                cu_msg);
+                                                cu_u.cu_msg);
 
        /* errno >= 0 means we got a downcall */
        if (msg->errno >= 0)
@@ -1007,14 +1050,17 @@ nfsd4_remove_cld_pipe(struct net *net)
 
        nfsd4_cld_unregister_net(net, cn->cn_pipe);
        rpc_destroy_pipe_data(cn->cn_pipe);
+       if (cn->cn_tfm)
+               crypto_free_shash(cn->cn_tfm);
        kfree(nn->cld_net);
        nn->cld_net = NULL;
 }
 
 static struct cld_upcall *
-alloc_cld_upcall(struct cld_net *cn)
+alloc_cld_upcall(struct nfsd_net *nn)
 {
        struct cld_upcall *new, *tmp;
+       struct cld_net *cn = nn->cld_net;
 
        new = kzalloc(sizeof(*new), GFP_KERNEL);
        if (!new)
@@ -1024,20 +1070,20 @@ alloc_cld_upcall(struct cld_net *cn)
 restart_search:
        spin_lock(&cn->cn_lock);
        list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-               if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+               if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
                        cn->cn_xid++;
                        spin_unlock(&cn->cn_lock);
                        goto restart_search;
                }
        }
        init_completion(&new->cu_done);
-       new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
-       put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+       new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+       put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
        new->cu_net = cn;
        list_add(&new->cu_list, &cn->cn_list);
        spin_unlock(&cn->cn_lock);
 
-       dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+       dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
 
        return new;
 }
@@ -1066,20 +1112,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Create;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1090,6 +1136,75 @@ out_err:
                                "record on stable storage: %d\n", ret);
 }
 
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+       int ret;
+       struct cld_upcall *cup;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct cld_net *cn = nn->cld_net;
+       struct cld_msg_v2 *cmsg;
+       struct crypto_shash *tfm = cn->cn_tfm;
+       struct xdr_netobj cksum;
+       char *principal = NULL;
+       SHASH_DESC_ON_STACK(desc, tfm);
+
+       /* Don't upcall if it's already stored */
+       if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+               return;
+
+       cup = alloc_cld_upcall(nn);
+       if (!cup) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+
+       cmsg = &cup->cu_u.cu_msg_v2;
+       cmsg->cm_cmd = Cld_Create;
+       cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+       memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+                       clp->cl_name.len);
+       if (clp->cl_cred.cr_raw_principal)
+               principal = clp->cl_cred.cr_raw_principal;
+       else if (clp->cl_cred.cr_principal)
+               principal = clp->cl_cred.cr_principal;
+       if (principal) {
+               desc->tfm = tfm;
+               cksum.len = crypto_shash_digestsize(tfm);
+               cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+               if (cksum.data == NULL) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               ret = crypto_shash_digest(desc, principal, strlen(principal),
+                                         cksum.data);
+               shash_desc_zero(desc);
+               if (ret) {
+                       kfree(cksum.data);
+                       goto out;
+               }
+               cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
+               memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+                      cksum.data, cksum.len);
+               kfree(cksum.data);
+       } else
+               cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+       ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
+       if (!ret) {
+               ret = cmsg->cm_status;
+               set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+       }
+
+out:
+       free_cld_upcall(cup);
+out_err:
+       if (ret)
+               pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+                               ret);
+}
+
 /* Ask daemon to create a new record */
 static void
 nfsd4_cld_remove(struct nfs4_client *clp)
@@ -1103,20 +1218,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
        if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Remove;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1145,21 +1260,21 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
        if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
                return 0;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                printk(KERN_ERR "NFSD: Unable to check client record on "
                                "stable storage: %d\n", -ENOMEM);
                return -ENOMEM;
        }
 
-       cup->cu_msg.cm_cmd = Cld_Check;
-       cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-       memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+       cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+       cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+       memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
                        clp->cl_name.len);
 
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret) {
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
                set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
        }
 
@@ -1216,6 +1331,79 @@ found:
        return 0;
 }
 
+static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+       struct nfs4_client_reclaim *crp;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct cld_net *cn = nn->cld_net;
+       int status;
+       char dname[HEXDIR_LEN];
+       struct xdr_netobj name;
+       struct crypto_shash *tfm = cn->cn_tfm;
+       struct xdr_netobj cksum;
+       char *principal = NULL;
+       SHASH_DESC_ON_STACK(desc, tfm);
+
+       /* did we already find that this client is stable? */
+       if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+               return 0;
+
+       /* look for it in the reclaim hashtable otherwise */
+       crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+       if (crp)
+               goto found;
+
+       if (cn->cn_has_legacy) {
+               status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+               if (status)
+                       return -ENOENT;
+
+               name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+               if (!name.data) {
+                       dprintk("%s: failed to allocate memory for name.data\n",
+                                       __func__);
+                       return -ENOENT;
+               }
+               name.len = HEXDIR_LEN;
+               crp = nfsd4_find_reclaim_client(name, nn);
+               kfree(name.data);
+               if (crp)
+                       goto found;
+
+       }
+       return -ENOENT;
+found:
+       if (crp->cr_princhash.len) {
+               if (clp->cl_cred.cr_raw_principal)
+                       principal = clp->cl_cred.cr_raw_principal;
+               else if (clp->cl_cred.cr_principal)
+                       principal = clp->cl_cred.cr_principal;
+               if (principal == NULL)
+                       return -ENOENT;
+               desc->tfm = tfm;
+               cksum.len = crypto_shash_digestsize(tfm);
+               cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+               if (cksum.data == NULL)
+                       return -ENOENT;
+               status = crypto_shash_digest(desc, principal, strlen(principal),
+                                            cksum.data);
+               shash_desc_zero(desc);
+               if (status) {
+                       kfree(cksum.data);
+                       return -ENOENT;
+               }
+               if (memcmp(crp->cr_princhash.data, cksum.data,
+                               crp->cr_princhash.len)) {
+                       kfree(cksum.data);
+                       return -ENOENT;
+               }
+               kfree(cksum.data);
+       }
+       crp->cr_clp = clp;
+       return 0;
+}
+
 static int
 nfsd4_cld_grace_start(struct nfsd_net *nn)
 {
@@ -1223,16 +1411,16 @@ nfsd4_cld_grace_start(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceStart;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1250,17 +1438,17 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceDone;
-       cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+       cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1279,16 +1467,16 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
        struct cld_upcall *cup;
        struct cld_net *cn = nn->cld_net;
 
-       cup = alloc_cld_upcall(cn);
+       cup = alloc_cld_upcall(nn);
        if (!cup) {
                ret = -ENOMEM;
                goto out_err;
        }
 
-       cup->cu_msg.cm_cmd = Cld_GraceDone;
-       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+       cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
        if (!ret)
-               ret = cup->cu_msg.cm_status;
+               ret = cup->cu_u.cu_msg.cm_status;
 
        free_cld_upcall(cup);
 out_err:
@@ -1336,6 +1524,53 @@ cld_running(struct nfsd_net *nn)
        return pipe->nreaders || pipe->nwriters;
 }
 
+static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+       int ret = 0;
+       struct cld_upcall *cup;
+       struct cld_net *cn = nn->cld_net;
+       uint8_t version;
+
+       cup = alloc_cld_upcall(nn);
+       if (!cup) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+       cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+       ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+       if (!ret) {
+               ret = cup->cu_u.cu_msg.cm_status;
+               if (ret)
+                       goto out_free;
+               version = cup->cu_u.cu_msg.cm_u.cm_version;
+               dprintk("%s: userspace returned version %u\n",
+                               __func__, version);
+               if (version < 1)
+                       version = 1;
+               else if (version > CLD_UPCALL_VERSION)
+                       version = CLD_UPCALL_VERSION;
+
+               switch (version) {
+               case 1:
+                       nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+                       break;
+               case 2:
+                       nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+                       break;
+               default:
+                       break;
+               }
+       }
+out_free:
+       free_cld_upcall(cup);
+out_err:
+       if (ret)
+               dprintk("%s: Unable to get version from userspace: %d\n",
+                       __func__, ret);
+       return ret;
+}
+
 static int
 nfsd4_cld_tracking_init(struct net *net)
 {
@@ -1351,6 +1586,11 @@ nfsd4_cld_tracking_init(struct net *net)
        status = __nfsd4_init_cld_pipe(net);
        if (status)
                goto err_shutdown;
+       nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0);
+       if (IS_ERR(nn->cld_net->cn_tfm)) {
+               status = PTR_ERR(nn->cld_net->cn_tfm);
+               goto err_remove;
+       }
 
        /*
         * rpc pipe upcalls take 30 seconds to time out, so we don't want to
@@ -1368,10 +1608,14 @@ nfsd4_cld_tracking_init(struct net *net)
                goto err_remove;
        }
 
+       status = nfsd4_cld_get_version(nn);
+       if (status == -EOPNOTSUPP)
+               pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
+
        status = nfsd4_cld_grace_start(nn);
        if (status) {
                if (status == -EOPNOTSUPP)
-                       printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
+                       pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
                nfs4_release_reclaim(nn);
                goto err_remove;
        } else
@@ -1403,6 +1647,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
        .remove         = nfsd4_cld_remove,
        .check          = nfsd4_cld_check_v0,
        .grace_done     = nfsd4_cld_grace_done_v0,
+       .version        = 1,
+       .msglen         = sizeof(struct cld_msg),
 };
 
 /* For newer nfsdcld's */
@@ -1413,6 +1659,20 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
        .remove         = nfsd4_cld_remove,
        .check          = nfsd4_cld_check,
        .grace_done     = nfsd4_cld_grace_done,
+       .version        = 1,
+       .msglen         = sizeof(struct cld_msg),
+};
+
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+       .init           = nfsd4_cld_tracking_init,
+       .exit           = nfsd4_cld_tracking_exit,
+       .create         = nfsd4_cld_create_v2,
+       .remove         = nfsd4_cld_remove,
+       .check          = nfsd4_cld_check_v2,
+       .grace_done     = nfsd4_cld_grace_done,
+       .version        = 2,
+       .msglen         = sizeof(struct cld_msg_v2),
 };
 
 /* upcall via usermodehelper */
@@ -1760,6 +2020,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
        .remove         = nfsd4_umh_cltrack_remove,
        .check          = nfsd4_umh_cltrack_check,
        .grace_done     = nfsd4_umh_cltrack_grace_done,
+       .version        = 1,
+       .msglen         = 0,
 };
 
 int
index 7857942c5ca6572b8e7b6c2107bca795e2f6c85c..c65aeaa812d42889c34bc655cadbdbf212c26054 100644 (file)
@@ -50,6 +50,7 @@
 
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -429,18 +430,18 @@ put_nfs4_file(struct nfs4_file *fi)
        }
 }
 
-static struct file *
+static struct nfsd_file *
 __nfs4_get_fd(struct nfs4_file *f, int oflag)
 {
        if (f->fi_fds[oflag])
-               return get_file(f->fi_fds[oflag]);
+               return nfsd_file_get(f->fi_fds[oflag]);
        return NULL;
 }
 
-static struct file *
+static struct nfsd_file *
 find_writeable_file_locked(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        lockdep_assert_held(&f->fi_lock);
 
@@ -450,10 +451,10 @@ find_writeable_file_locked(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *
+static struct nfsd_file *
 find_writeable_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = find_writeable_file_locked(f);
@@ -462,9 +463,10 @@ find_writeable_file(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *find_readable_file_locked(struct nfs4_file *f)
+static struct nfsd_file *
+find_readable_file_locked(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        lockdep_assert_held(&f->fi_lock);
 
@@ -474,10 +476,10 @@ static struct file *find_readable_file_locked(struct nfs4_file *f)
        return ret;
 }
 
-static struct file *
+static struct nfsd_file *
 find_readable_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = find_readable_file_locked(f);
@@ -486,10 +488,10 @@ find_readable_file(struct nfs4_file *f)
        return ret;
 }
 
-struct file *
+struct nfsd_file *
 find_any_file(struct nfs4_file *f)
 {
-       struct file *ret;
+       struct nfsd_file *ret;
 
        spin_lock(&f->fi_lock);
        ret = __nfs4_get_fd(f, O_RDWR);
@@ -590,17 +592,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
        might_lock(&fp->fi_lock);
 
        if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
-               struct file *f1 = NULL;
-               struct file *f2 = NULL;
+               struct nfsd_file *f1 = NULL;
+               struct nfsd_file *f2 = NULL;
 
                swap(f1, fp->fi_fds[oflag]);
                if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
                        swap(f2, fp->fi_fds[O_RDWR]);
                spin_unlock(&fp->fi_lock);
                if (f1)
-                       fput(f1);
+                       nfsd_file_put(f1);
                if (f2)
-                       fput(f2);
+                       nfsd_file_put(f2);
        }
 }
 
@@ -933,25 +935,25 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
 
 static void put_deleg_file(struct nfs4_file *fp)
 {
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
 
        spin_lock(&fp->fi_lock);
        if (--fp->fi_delegees == 0)
-               swap(filp, fp->fi_deleg_file);
+               swap(nf, fp->fi_deleg_file);
        spin_unlock(&fp->fi_lock);
 
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
 }
 
 static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
 {
        struct nfs4_file *fp = dp->dl_stid.sc_file;
-       struct file *filp = fp->fi_deleg_file;
+       struct nfsd_file *nf = fp->fi_deleg_file;
 
        WARN_ON_ONCE(!fp->fi_delegees);
 
-       vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+       vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
        put_deleg_file(fp);
 }
 
@@ -1289,11 +1291,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
 {
        struct nfs4_ol_stateid *stp = openlockstateid(stid);
        struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
-       struct file *file;
+       struct nfsd_file *nf;
 
-       file = find_any_file(stp->st_stid.sc_file);
-       if (file)
-               filp_close(file, (fl_owner_t)lo);
+       nf = find_any_file(stp->st_stid.sc_file);
+       if (nf) {
+               get_file(nf->nf_file);
+               filp_close(nf->nf_file, (fl_owner_t)lo);
+               nfsd_file_put(nf);
+       }
        nfs4_free_ol_stateid(stid);
 }
 
@@ -1563,21 +1568,39 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
  * re-negotiate active sessions and reduce their slot usage to make
  * room for new connections. For now we just fail the create session.
  */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
+static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
 {
        u32 slotsize = slot_bytes(ca);
        u32 num = ca->maxreqs;
        unsigned long avail, total_avail;
+       unsigned int scale_factor;
 
        spin_lock(&nfsd_drc_lock);
-       total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+       if (nfsd_drc_max_mem > nfsd_drc_mem_used)
+               total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+       else
+               /* We have handed out more space than we chose in
+                * set_max_drc() to allow.  That isn't really a
+                * problem as long as that doesn't make us think we
+                * have lots more due to integer overflow.
+                */
+               total_avail = 0;
        avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
        /*
-        * Never use more than a third of the remaining memory,
-        * unless it's the only way to give this client a slot:
+        * Never use more than a fraction of the remaining memory,
+        * unless it's the only way to give this client a slot.
+        * The chosen fraction is either 1/8 or 1/number of threads,
+        * whichever is smaller.  This ensures there are adequate
+        * slots to support multiple clients per thread.
+        * Give the client one slot even if that would require
+        * over-allocation--it is better than failure.
         */
-       avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
+       scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
+
+       avail = clamp_t(unsigned long, avail, slotsize,
+                       total_avail/scale_factor);
        num = min_t(int, num, avail / slotsize);
+       num = max_t(int, num, 1);
        nfsd_drc_mem_used += num * slotsize;
        spin_unlock(&nfsd_drc_lock);
 
@@ -2323,9 +2346,9 @@ static void states_stop(struct seq_file *s, void *v)
        spin_unlock(&clp->cl_lock);
 }
 
-static void nfs4_show_superblock(struct seq_file *s, struct file *f)
+static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
 {
-       struct inode *inode = file_inode(f);
+       struct inode *inode = f->nf_inode;
 
        seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
                                        MAJOR(inode->i_sb->s_dev),
@@ -2343,7 +2366,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_ol_stateid *ols;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
        struct nfs4_stateowner *oo;
        unsigned int access, deny;
 
@@ -2370,7 +2393,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
        seq_printf(s, ", ");
        nfs4_show_owner(s, oo);
        seq_printf(s, " }\n");
-       fput(file);
+       nfsd_file_put(file);
 
        return 0;
 }
@@ -2379,7 +2402,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_ol_stateid *ols;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
        struct nfs4_stateowner *oo;
 
        ols = openlockstateid(st);
@@ -2401,7 +2424,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
        seq_printf(s, ", ");
        nfs4_show_owner(s, oo);
        seq_printf(s, " }\n");
-       fput(file);
+       nfsd_file_put(file);
 
        return 0;
 }
@@ -2410,7 +2433,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_delegation *ds;
        struct nfs4_file *nf;
-       struct file *file;
+       struct nfsd_file *file;
 
        ds = delegstateid(st);
        nf = st->sc_file;
@@ -2433,7 +2456,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
 static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
 {
        struct nfs4_layout_stateid *ls;
-       struct file *file;
+       struct nfsd_file *file;
 
        ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
        file = ls->ls_file;
@@ -3169,10 +3192,10 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
         * performance.  When short on memory we therefore prefer to
         * decrease number of slots instead of their size.  Clients that
         * request larger slots than they need will get poor results:
+        * Note that we always allow at least one slot, because our
+        * accounting is soft and provides no guarantees either way.
         */
-       ca->maxreqs = nfsd4_get_drc_mem(ca);
-       if (!ca->maxreqs)
-               return nfserr_jukebox;
+       ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
 
        return nfs_ok;
 }
@@ -4651,7 +4674,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
                struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
                struct nfsd4_open *open)
 {
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        __be32 status;
        int oflag = nfs4_access_to_omode(open->op_share_access);
        int access = nfs4_access_to_access(open->op_share_access);
@@ -4687,18 +4710,18 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
 
        if (!fp->fi_fds[oflag]) {
                spin_unlock(&fp->fi_lock);
-               status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
+               status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
                if (status)
                        goto out_put_access;
                spin_lock(&fp->fi_lock);
                if (!fp->fi_fds[oflag]) {
-                       fp->fi_fds[oflag] = filp;
-                       filp = NULL;
+                       fp->fi_fds[oflag] = nf;
+                       nf = NULL;
                }
        }
        spin_unlock(&fp->fi_lock);
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
 
        status = nfsd4_truncate(rqstp, cur_fh, open);
        if (status)
@@ -4767,7 +4790,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
        fl->fl_end = OFFSET_MAX;
        fl->fl_owner = (fl_owner_t)dp;
        fl->fl_pid = current->tgid;
-       fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
+       fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
        return fl;
 }
 
@@ -4777,7 +4800,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 {
        int status = 0;
        struct nfs4_delegation *dp;
-       struct file *filp;
+       struct nfsd_file *nf;
        struct file_lock *fl;
 
        /*
@@ -4788,8 +4811,8 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (fp->fi_had_conflict)
                return ERR_PTR(-EAGAIN);
 
-       filp = find_readable_file(fp);
-       if (!filp) {
+       nf = find_readable_file(fp);
+       if (!nf) {
                /* We should always have a readable file here */
                WARN_ON_ONCE(1);
                return ERR_PTR(-EBADF);
@@ -4799,17 +4822,17 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (nfs4_delegation_exists(clp, fp))
                status = -EAGAIN;
        else if (!fp->fi_deleg_file) {
-               fp->fi_deleg_file = filp;
+               fp->fi_deleg_file = nf;
                /* increment early to prevent fi_deleg_file from being
                 * cleared */
                fp->fi_delegees = 1;
-               filp = NULL;
+               nf = NULL;
        } else
                fp->fi_delegees++;
        spin_unlock(&fp->fi_lock);
        spin_unlock(&state_lock);
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
        if (status)
                return ERR_PTR(status);
 
@@ -4822,7 +4845,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (!fl)
                goto out_clnt_odstate;
 
-       status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+       status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
        if (fl)
                locks_free_lock(fl);
        if (status)
@@ -4842,7 +4865,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 
        return dp;
 out_unlock:
-       vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp);
+       vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
 out_clnt_odstate:
        put_clnt_odstate(dp->dl_clnt_odstate);
        nfs4_put_stid(&dp->dl_stid);
@@ -5513,7 +5536,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
        return nfs_ok;
 }
 
-static struct file *
+static struct nfsd_file *
 nfs4_find_file(struct nfs4_stid *s, int flags)
 {
        if (!s)
@@ -5523,7 +5546,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
        case NFS4_DELEG_STID:
                if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
                        return NULL;
-               return get_file(s->sc_file->fi_deleg_file);
+               return nfsd_file_get(s->sc_file->fi_deleg_file);
        case NFS4_OPEN_STID:
        case NFS4_LOCK_STID:
                if (flags & RD_STATE)
@@ -5549,32 +5572,28 @@ nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
 
 static __be32
 nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
-               struct file **filpp, bool *tmp_file, int flags)
+               struct nfsd_file **nfp, int flags)
 {
        int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
-       struct file *file;
+       struct nfsd_file *nf;
        __be32 status;
 
-       file = nfs4_find_file(s, flags);
-       if (file) {
+       nf = nfs4_find_file(s, flags);
+       if (nf) {
                status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
                                acc | NFSD_MAY_OWNER_OVERRIDE);
                if (status) {
-                       fput(file);
-                       return status;
+                       nfsd_file_put(nf);
+                       goto out;
                }
-
-               *filpp = file;
        } else {
-               status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+               status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
                if (status)
                        return status;
-
-               if (tmp_file)
-                       *tmp_file = true;
        }
-
-       return 0;
+       *nfp = nf;
+out:
+       return status;
 }
 
 /*
@@ -5583,7 +5602,7 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
 __be32
 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-               stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
+               stateid_t *stateid, int flags, struct nfsd_file **nfp)
 {
        struct inode *ino = d_inode(fhp->fh_dentry);
        struct net *net = SVC_NET(rqstp);
@@ -5591,10 +5610,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
        struct nfs4_stid *s = NULL;
        __be32 status;
 
-       if (filpp)
-               *filpp = NULL;
-       if (tmp_file)
-               *tmp_file = false;
+       if (nfp)
+               *nfp = NULL;
 
        if (grace_disallows_io(net, ino))
                return nfserr_grace;
@@ -5631,8 +5648,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
        status = nfs4_check_fh(fhp, s);
 
 done:
-       if (!status && filpp)
-               status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
+       if (status == nfs_ok && nfp)
+               status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
 out:
        if (s)
                nfs4_put_stid(s);
@@ -6392,7 +6409,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct nfs4_ol_stateid *lock_stp = NULL;
        struct nfs4_ol_stateid *open_stp = NULL;
        struct nfs4_file *fp;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        struct nfsd4_blocked_lock *nbl = NULL;
        struct file_lock *file_lock = NULL;
        struct file_lock *conflock = NULL;
@@ -6474,8 +6491,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        /* Fallthrough */
                case NFS4_READ_LT:
                        spin_lock(&fp->fi_lock);
-                       filp = find_readable_file_locked(fp);
-                       if (filp)
+                       nf = find_readable_file_locked(fp);
+                       if (nf)
                                get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
                        spin_unlock(&fp->fi_lock);
                        fl_type = F_RDLCK;
@@ -6486,8 +6503,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        /* Fallthrough */
                case NFS4_WRITE_LT:
                        spin_lock(&fp->fi_lock);
-                       filp = find_writeable_file_locked(fp);
-                       if (filp)
+                       nf = find_writeable_file_locked(fp);
+                       if (nf)
                                get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
                        spin_unlock(&fp->fi_lock);
                        fl_type = F_WRLCK;
@@ -6497,7 +6514,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                goto out;
        }
 
-       if (!filp) {
+       if (!nf) {
                status = nfserr_openmode;
                goto out;
        }
@@ -6513,7 +6530,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        file_lock->fl_type = fl_type;
        file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
        file_lock->fl_pid = current->tgid;
-       file_lock->fl_file = filp;
+       file_lock->fl_file = nf->nf_file;
        file_lock->fl_flags = fl_flags;
        file_lock->fl_lmops = &nfsd_posix_mng_ops;
        file_lock->fl_start = lock->lk_offset;
@@ -6535,7 +6552,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                spin_unlock(&nn->blocked_locks_lock);
        }
 
-       err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
+       err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
        switch (err) {
        case 0: /* success! */
                nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@@ -6570,8 +6587,8 @@ out:
                }
                free_blocked_lock(nbl);
        }
-       if (filp)
-               fput(filp);
+       if (nf)
+               nfsd_file_put(nf);
        if (lock_stp) {
                /* Bump seqid manually if the 4.0 replay owner is openowner */
                if (cstate->replay_owner &&
@@ -6606,11 +6623,11 @@ out:
  */
 static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
 {
-       struct file *file;
-       __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+       struct nfsd_file *nf;
+       __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
        if (!err) {
-               err = nfserrno(vfs_test_lock(file, lock));
-               fput(file);
+               err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+               nfsd_file_put(nf);
        }
        return err;
 }
@@ -6698,7 +6715,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        struct nfsd4_locku *locku = &u->locku;
        struct nfs4_ol_stateid *stp;
-       struct file *filp = NULL;
+       struct nfsd_file *nf = NULL;
        struct file_lock *file_lock = NULL;
        __be32 status;
        int err;
@@ -6716,8 +6733,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                        &stp, nn);
        if (status)
                goto out;
-       filp = find_any_file(stp->st_stid.sc_file);
-       if (!filp) {
+       nf = find_any_file(stp->st_stid.sc_file);
+       if (!nf) {
                status = nfserr_lock_range;
                goto put_stateid;
        }
@@ -6725,13 +6742,13 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (!file_lock) {
                dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
                status = nfserr_jukebox;
-               goto fput;
+               goto put_file;
        }
 
        file_lock->fl_type = F_UNLCK;
        file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
        file_lock->fl_pid = current->tgid;
-       file_lock->fl_file = filp;
+       file_lock->fl_file = nf->nf_file;
        file_lock->fl_flags = FL_POSIX;
        file_lock->fl_lmops = &nfsd_posix_mng_ops;
        file_lock->fl_start = locku->lu_offset;
@@ -6740,14 +6757,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                                locku->lu_length);
        nfs4_transform_lock_offset(file_lock);
 
-       err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
+       err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
        if (err) {
                dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
                goto out_nfserr;
        }
        nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
-fput:
-       fput(filp);
+put_file:
+       nfsd_file_put(nf);
 put_stateid:
        mutex_unlock(&stp->st_mutex);
        nfs4_put_stid(&stp->st_stid);
@@ -6759,7 +6776,7 @@ out:
 
 out_nfserr:
        status = nfserrno(err);
-       goto fput;
+       goto put_file;
 }
 
 /*
@@ -6772,17 +6789,17 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 {
        struct file_lock *fl;
        int status = false;
-       struct file *filp = find_any_file(fp);
+       struct nfsd_file *nf = find_any_file(fp);
        struct inode *inode;
        struct file_lock_context *flctx;
 
-       if (!filp) {
+       if (!nf) {
                /* Any valid lock stateid should have some sort of access */
                WARN_ON_ONCE(1);
                return status;
        }
 
-       inode = locks_inode(filp);
+       inode = locks_inode(nf->nf_file);
        flctx = inode->i_flctx;
 
        if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -6795,7 +6812,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
                }
                spin_unlock(&flctx->flc_lock);
        }
-       fput(filp);
+       nfsd_file_put(nf);
        return status;
 }
 
@@ -6888,7 +6905,8 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
  * will be freed in nfs4_remove_reclaim_record in the normal case).
  */
 struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
+               struct nfsd_net *nn)
 {
        unsigned int strhashval;
        struct nfs4_client_reclaim *crp;
@@ -6901,6 +6919,8 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
                list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
                crp->cr_name.data = name.data;
                crp->cr_name.len = name.len;
+               crp->cr_princhash.data = princhash.data;
+               crp->cr_princhash.len = princhash.len;
                crp->cr_clp = NULL;
                nn->reclaim_str_hashtbl_size++;
        }
@@ -6912,6 +6932,7 @@ nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
 {
        list_del(&crp->cr_strhash);
        kfree(crp->cr_name.data);
+       kfree(crp->cr_princhash.data);
        kfree(crp);
        nn->reclaim_str_hashtbl_size--;
 }
index 442811809f3dbf02eadce650678cac140b829ba0..533d0fc3c96b8fa43c2465489d00ab8f326085e0 100644 (file)
@@ -49,6 +49,7 @@
 #include "cache.h"
 #include "netns.h"
 #include "pnfs.h"
+#include "filecache.h"
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
@@ -203,6 +204,13 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
        return p;
 }
 
+static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
+{
+       unsigned int this = (char *)argp->end - (char *)argp->p;
+
+       return this + argp->pagelen;
+}
+
 static int zero_clientid(clientid_t *clid)
 {
        return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -211,10 +219,10 @@ static int zero_clientid(clientid_t *clid)
 /**
  * svcxdr_tmpalloc - allocate memory to be freed after compound processing
  * @argp: NFSv4 compound argument structure
- * @p: pointer to be freed (with kfree())
+ * @len: length of buffer to allocate
  *
- * Marks @p to be freed when processing the compound operation
- * described in @argp finishes.
+ * Allocates a buffer of size @len to be freed when processing the compound
+ * operation described in @argp finishes.
  */
 static void *
 svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
@@ -347,7 +355,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                READ_BUF(4); len += 4;
                nace = be32_to_cpup(p++);
 
-               if (nace > NFS4_ACL_MAX)
+               if (nace > compoundargs_bytes_left(argp)/20)
+                       /*
+                        * Even with 4-byte names there wouldn't be
+                        * space for that many aces; something fishy is
+                        * going on:
+                        */
                        return nfserr_fbig;
 
                *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
@@ -1418,7 +1431,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
                            struct nfsd4_create_session *sess)
 {
        DECODE_HEAD;
-       u32 dummy;
 
        READ_BUF(16);
        COPYMEM(&sess->clientid, 8);
@@ -1427,7 +1439,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 
        /* Fore channel attrs */
        READ_BUF(28);
-       dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+       p++; /* headerpadsz is always 0 */
        sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
        sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
        sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1444,7 +1456,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
 
        /* Back channel attrs */
        READ_BUF(28);
-       dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+       p++; /* headerpadsz is always 0 */
        sess->back_channel.maxreq_sz = be32_to_cpup(p++);
        sess->back_channel.maxresp_sz = be32_to_cpup(p++);
        sess->back_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1736,7 +1748,6 @@ static __be32
 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
 {
        DECODE_HEAD;
-       unsigned int tmp;
 
        status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
        if (status)
@@ -1751,7 +1762,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
        p = xdr_decode_hyper(p, &copy->cp_count);
        p++; /* ca_consecutive: we always do consecutive copies */
        copy->cp_synchronous = be32_to_cpup(p++);
-       tmp = be32_to_cpup(p); /* Source server list not supported */
+       /* tmp = be32_to_cpup(p); Source server list not supported */
 
        DECODE_TAIL;
 }
@@ -3217,9 +3228,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
        if (!p)
                return nfserr_resource;
        encode_cinfo(p, &create->cr_cinfo);
-       nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
+       return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
                        create->cr_bmval[1], create->cr_bmval[2]);
-       return 0;
 }
 
 static __be32
@@ -3462,7 +3472,7 @@ static __be32 nfsd4_encode_splice_read(
 
        len = maxcount;
        nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
-                                 file, read->rd_offset, &maxcount);
+                                 file, read->rd_offset, &maxcount, &eof);
        read->rd_length = maxcount;
        if (nfserr) {
                /*
@@ -3474,9 +3484,6 @@ static __be32 nfsd4_encode_splice_read(
                return nfserr;
        }
 
-       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
-                               d_inode(read->rd_fhp->fh_dentry)->i_size);
-
        *(p++) = htonl(eof);
        *(p++) = htonl(maxcount);
 
@@ -3547,15 +3554,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 
        len = maxcount;
        nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
-                           resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+                           resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
+                           &eof);
        read->rd_length = maxcount;
        if (nfserr)
                return nfserr;
        xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
-                               d_inode(read->rd_fhp->fh_dentry)->i_size);
-
        tmp = htonl(eof);
        write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
        tmp = htonl(maxcount);
@@ -3574,11 +3579,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 {
        unsigned long maxcount;
        struct xdr_stream *xdr = &resp->xdr;
-       struct file *file = read->rd_filp;
+       struct file *file;
        int starting_len = xdr->buf->len;
-       struct raparms *ra = NULL;
        __be32 *p;
 
+       if (nfserr)
+               return nfserr;
+       file = read->rd_nf->nf_file;
+
        p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
        if (!p) {
                WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@@ -3596,18 +3604,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
                         (xdr->buf->buflen - xdr->buf->len));
        maxcount = min_t(unsigned long, maxcount, read->rd_length);
 
-       if (read->rd_tmp_file)
-               ra = nfsd_init_raparms(file);
-
        if (file->f_op->splice_read &&
            test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
                nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
        else
                nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
 
-       if (ra)
-               nfsd_put_raparams(file, ra);
-
        if (nfserr)
                xdr_truncate_encode(xdr, starting_len);
 
index 2c215171c0eb65584cc461c0cc64c587daa53bc5..11b42c523f045b15875a98c988a737c6107797fc 100644 (file)
@@ -1476,6 +1476,7 @@ static __net_init int nfsd_init_net(struct net *net)
 
        atomic_set(&nn->ntf_refcnt, 0);
        init_waitqueue_head(&nn->ntf_wq);
+       seqlock_init(&nn->boot_lock);
 
        mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
        if (IS_ERR(mnt)) {
index 0d20fd161225a3d97b7a7a046bb4cc8d53f404f4..c83ddac22f38fe024029c308b078f6ca9f73e5b2 100644 (file)
@@ -172,6 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
        struct nfsd_readargs *argp = rqstp->rq_argp;
        struct nfsd_readres *resp = rqstp->rq_resp;
        __be32  nfserr;
+       u32 eof;
 
        dprintk("nfsd: READ    %s %d bytes at %d\n",
                SVCFH_fmt(&argp->fh),
@@ -195,7 +196,8 @@ nfsd_proc_read(struct svc_rqst *rqstp)
        nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
                                  argp->offset,
                                  rqstp->rq_vec, argp->vlen,
-                                 &resp->count);
+                                 &resp->count,
+                                 &eof);
 
        if (nfserr) return nfserr;
        return fh_getattr(&resp->fh, &resp->stat);
index 18d94ea984ba4add43d1af0ca90ed155fc67c700..fdf7ed4bd5dd22e6014c8bf6d15e8326808fe87d 100644 (file)
@@ -27,6 +27,7 @@
 #include "cache.h"
 #include "vfs.h"
 #include "netns.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_SVC
 
@@ -313,22 +314,17 @@ static int nfsd_startup_generic(int nrservs)
        if (nfsd_users++)
                return 0;
 
-       /*
-        * Readahead param cache - will no-op if it already exists.
-        * (Note therefore results will be suboptimal if number of
-        * threads is modified after nfsd start.)
-        */
-       ret = nfsd_racache_init(2*nrservs);
+       ret = nfsd_file_cache_init();
        if (ret)
                goto dec_users;
 
        ret = nfs4_state_start();
        if (ret)
-               goto out_racache;
+               goto out_file_cache;
        return 0;
 
-out_racache:
-       nfsd_racache_shutdown();
+out_file_cache:
+       nfsd_file_cache_shutdown();
 dec_users:
        nfsd_users--;
        return ret;
@@ -340,7 +336,7 @@ static void nfsd_shutdown_generic(void)
                return;
 
        nfs4_state_shutdown();
-       nfsd_racache_shutdown();
+       nfsd_file_cache_shutdown();
 }
 
 static bool nfsd_needs_lockd(struct nfsd_net *nn)
@@ -348,6 +344,35 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
        return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
 }
 
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+{
+       int seq = 0;
+
+       do {
+               read_seqbegin_or_lock(&nn->boot_lock, &seq);
+               /*
+                * This is opaque to client, so no need to byte-swap. Use
+                * __force to keep sparse happy. y2038 time_t overflow is
+                * irrelevant in this usage
+                */
+               verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+               verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
+       } while (need_seqretry(&nn->boot_lock, seq));
+       done_seqretry(&nn->boot_lock, seq);
+}
+
+static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+{
+       ktime_get_real_ts64(&nn->nfssvc_boot);
+}
+
+void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+{
+       write_seqlock(&nn->boot_lock);
+       nfsd_reset_boot_verifier_locked(nn);
+       write_sequnlock(&nn->boot_lock);
+}
+
 static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
 {
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -391,6 +416,7 @@ static void nfsd_shutdown_net(struct net *net)
 {
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
+       nfsd_file_cache_purge(net);
        nfs4_state_shutdown_net(net);
        if (nn->lockd_up) {
                lockd_down(net);
@@ -599,7 +625,7 @@ int nfsd_create_serv(struct net *net)
 #endif
        }
        atomic_inc(&nn->ntf_refcnt);
-       ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
+       nfsd_reset_boot_verifier(nn);
        return 0;
 }
 
index 5dbd16946e8efa5e6e1a5c00ce201a292de02baf..46f56afb6cb838400425a5ac72b3a86807bce625 100644 (file)
@@ -378,6 +378,7 @@ struct nfs4_client_reclaim {
        struct list_head        cr_strhash;     /* hash by cr_name */
        struct nfs4_client      *cr_clp;        /* pointer to associated clp */
        struct xdr_netobj       cr_name;        /* recovery dir name */
+       struct xdr_netobj       cr_princhash;
 };
 
 /* A reasonable value for REPLAY_ISIZE was estimated as follows:  
@@ -506,7 +507,7 @@ struct nfs4_file {
        };
        struct list_head        fi_clnt_odstate;
        /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
-       struct file *           fi_fds[3];
+       struct nfsd_file        *fi_fds[3];
        /*
         * Each open or lock stateid contributes 0-4 to the counts
         * below depending on which bits are set in st_access_bitmap:
@@ -516,7 +517,7 @@ struct nfs4_file {
         */
        atomic_t                fi_access[2];
        u32                     fi_share_deny;
-       struct file             *fi_deleg_file;
+       struct nfsd_file        *fi_deleg_file;
        int                     fi_delegees;
        struct knfsd_fh         fi_fhandle;
        bool                    fi_had_conflict;
@@ -565,7 +566,7 @@ struct nfs4_layout_stateid {
        spinlock_t                      ls_lock;
        struct list_head                ls_layouts;
        u32                             ls_layout_type;
-       struct file                     *ls_file;
+       struct nfsd_file                *ls_file;
        struct nfsd4_callback           ls_recall;
        stateid_t                       ls_recall_sid;
        bool                            ls_recalled;
@@ -616,7 +617,7 @@ struct nfsd4_copy;
 
 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
-               stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
+               stateid_t *stateid, int flags, struct nfsd_file **filp);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
                     stateid_t *stateid, unsigned char typemask,
                     struct nfs4_stid **s, struct nfsd_net *nn);
@@ -645,7 +646,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
 extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
-                                                       struct nfsd_net *nn);
+                               struct xdr_netobj princhash, struct nfsd_net *nn);
 extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
 
 struct nfs4_file *find_file(struct knfsd_fh *fh);
@@ -657,7 +658,7 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
 {
        refcount_inc(&fi->fi_ref);
 }
-struct file *find_any_file(struct nfs4_file *f);
+struct nfsd_file *find_any_file(struct nfs4_file *f);
 
 /* grace period management */
 void nfsd4_end_grace(struct nfsd_net *nn);
index 80933e4334d84b9d80884527d6b029ee8f6aa867..ffc78a0e28b24e59c2976400e4309f861dddfc8f 100644 (file)
@@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
 DEFINE_NFSD_ERR_EVENT(write_err);
 
 #include "state.h"
+#include "filecache.h"
+#include "vfs.h"
 
 DECLARE_EVENT_CLASS(nfsd_stateid_class,
        TP_PROTO(stateid_t *stp),
@@ -164,6 +166,144 @@ DEFINE_STATEID_EVENT(layout_recall_done);
 DEFINE_STATEID_EVENT(layout_recall_fail);
 DEFINE_STATEID_EVENT(layout_recall_release);
 
+#define show_nf_flags(val)                                             \
+       __print_flags(val, "|",                                         \
+               { 1 << NFSD_FILE_HASHED,        "HASHED" },             \
+               { 1 << NFSD_FILE_PENDING,       "PENDING" },            \
+               { 1 << NFSD_FILE_BREAK_READ,    "BREAK_READ" },         \
+               { 1 << NFSD_FILE_BREAK_WRITE,   "BREAK_WRITE" },        \
+               { 1 << NFSD_FILE_REFERENCED,    "REFERENCED"})
+
+/* FIXME: This should probably be fleshed out in the future. */
+#define show_nf_may(val)                                               \
+       __print_flags(val, "|",                                         \
+               { NFSD_MAY_READ,                "READ" },               \
+               { NFSD_MAY_WRITE,               "WRITE" },              \
+               { NFSD_MAY_NOT_BREAK_LEASE,     "NOT_BREAK_LEASE" })
+
+DECLARE_EVENT_CLASS(nfsd_file_class,
+       TP_PROTO(struct nfsd_file *nf),
+       TP_ARGS(nf),
+       TP_STRUCT__entry(
+               __field(unsigned int, nf_hashval)
+               __field(void *, nf_inode)
+               __field(int, nf_ref)
+               __field(unsigned long, nf_flags)
+               __field(unsigned char, nf_may)
+               __field(struct file *, nf_file)
+       ),
+       TP_fast_assign(
+               __entry->nf_hashval = nf->nf_hashval;
+               __entry->nf_inode = nf->nf_inode;
+               __entry->nf_ref = atomic_read(&nf->nf_ref);
+               __entry->nf_flags = nf->nf_flags;
+               __entry->nf_may = nf->nf_may;
+               __entry->nf_file = nf->nf_file;
+       ),
+       TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+               __entry->nf_hashval,
+               __entry->nf_inode,
+               __entry->nf_ref,
+               show_nf_flags(__entry->nf_flags),
+               show_nf_may(__entry->nf_may),
+               __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_EVENT(name) \
+DEFINE_EVENT(nfsd_file_class, name, \
+       TP_PROTO(struct nfsd_file *nf), \
+       TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+
+TRACE_EVENT(nfsd_file_acquire,
+       TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
+                struct inode *inode, unsigned int may_flags,
+                struct nfsd_file *nf, __be32 status),
+
+       TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
+
+       TP_STRUCT__entry(
+               __field(__be32, xid)
+               __field(unsigned int, hash)
+               __field(void *, inode)
+               __field(unsigned int, may_flags)
+               __field(int, nf_ref)
+               __field(unsigned long, nf_flags)
+               __field(unsigned char, nf_may)
+               __field(struct file *, nf_file)
+               __field(__be32, status)
+       ),
+
+       TP_fast_assign(
+               __entry->xid = rqstp->rq_xid;
+               __entry->hash = hash;
+               __entry->inode = inode;
+               __entry->may_flags = may_flags;
+               __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
+               __entry->nf_flags = nf ? nf->nf_flags : 0;
+               __entry->nf_may = nf ? nf->nf_may : 0;
+               __entry->nf_file = nf ? nf->nf_file : NULL;
+               __entry->status = status;
+       ),
+
+       TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+                       be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
+                       show_nf_may(__entry->may_flags), __entry->nf_ref,
+                       show_nf_flags(__entry->nf_flags),
+                       show_nf_may(__entry->nf_may), __entry->nf_file,
+                       be32_to_cpu(__entry->status))
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_search_class,
+       TP_PROTO(struct inode *inode, unsigned int hash, int found),
+       TP_ARGS(inode, hash, found),
+       TP_STRUCT__entry(
+               __field(struct inode *, inode)
+               __field(unsigned int, hash)
+               __field(int, found)
+       ),
+       TP_fast_assign(
+               __entry->inode = inode;
+               __entry->hash = hash;
+               __entry->found = found;
+       ),
+       TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+                       __entry->inode, __entry->found)
+);
+
+#define DEFINE_NFSD_FILE_SEARCH_EVENT(name)                            \
+DEFINE_EVENT(nfsd_file_search_class, name,                             \
+       TP_PROTO(struct inode *inode, unsigned int hash, int found),    \
+       TP_ARGS(inode, hash, found))
+
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
+
+TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+       TP_PROTO(struct inode *inode, u32 mask),
+       TP_ARGS(inode, mask),
+       TP_STRUCT__entry(
+               __field(struct inode *, inode)
+               __field(unsigned int, nlink)
+               __field(umode_t, mode)
+               __field(u32, mask)
+       ),
+       TP_fast_assign(
+               __entry->inode = inode;
+               __entry->nlink = inode->i_nlink;
+               __entry->mode = inode->i_mode;
+               __entry->mask = mask;
+       ),
+       TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+                       __entry->nlink, __entry->mode, __entry->mask)
+);
+
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
index c85783e536d595de5816584b3dadac4a73fc4b83..bd0a385df3fc6d632be8bed9f0e298a0da24d66f 100644 (file)
 
 #include "nfsd.h"
 #include "vfs.h"
+#include "filecache.h"
 #include "trace.h"
 
 #define NFSDDBG_FACILITY               NFSDDBG_FILEOP
 
-
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
- */
-struct raparms {
-       struct raparms          *p_next;
-       unsigned int            p_count;
-       ino_t                   p_ino;
-       dev_t                   p_dev;
-       int                     p_set;
-       struct file_ra_state    p_ra;
-       unsigned int            p_hindex;
-};
-
-struct raparm_hbucket {
-       struct raparms          *pb_head;
-       spinlock_t              pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS       4
-#define RAPARM_HASH_SIZE       (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK       (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket   raparm_hash[RAPARM_HASH_SIZE];
-
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
  * a mount point.
@@ -699,7 +672,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
 }
 #endif /* CONFIG_NFSD_V3 */
 
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int nfsd_open_break_lease(struct inode *inode, int access)
 {
        unsigned int mode;
 
@@ -715,8 +688,8 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
  * and additional flags.
  * N.B. After this call fhp needs an fh_put
  */
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+static __be32
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                        int may_flags, struct file **filp)
 {
        struct path     path;
@@ -726,25 +699,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
        __be32          err;
        int             host_err = 0;
 
-       validate_process_creds();
-
-       /*
-        * If we get here, then the client has already done an "open",
-        * and (hopefully) checked permission - so allow OWNER_OVERRIDE
-        * in case a chmod has now revoked permission.
-        *
-        * Arguably we should also allow the owner override for
-        * directories, but we never have and it doesn't seem to have
-        * caused anyone a problem.  If we were to change this, note
-        * also that our filldir callbacks would need a variant of
-        * lookup_one_len that doesn't check permissions.
-        */
-       if (type == S_IFREG)
-               may_flags |= NFSD_MAY_OWNER_OVERRIDE;
-       err = fh_verify(rqstp, fhp, type, may_flags);
-       if (err)
-               goto out;
-
        path.mnt = fhp->fh_export->ex_path.mnt;
        path.dentry = fhp->fh_dentry;
        inode = d_inode(path.dentry);
@@ -798,67 +752,46 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 out_nfserr:
        err = nfserrno(host_err);
 out:
-       validate_process_creds();
        return err;
 }
 
-struct raparms *
-nfsd_init_raparms(struct file *file)
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+               int may_flags, struct file **filp)
 {
-       struct inode *inode = file_inode(file);
-       dev_t dev = inode->i_sb->s_dev;
-       ino_t ino = inode->i_ino;
-       struct raparms  *ra, **rap, **frap = NULL;
-       int depth = 0;
-       unsigned int hash;
-       struct raparm_hbucket *rab;
-
-       hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
-       rab = &raparm_hash[hash];
-
-       spin_lock(&rab->pb_lock);
-       for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
-               if (ra->p_ino == ino && ra->p_dev == dev)
-                       goto found;
-               depth++;
-               if (ra->p_count == 0)
-                       frap = rap;
-       }
-       depth = nfsdstats.ra_size;
-       if (!frap) {    
-               spin_unlock(&rab->pb_lock);
-               return NULL;
-       }
-       rap = frap;
-       ra = *frap;
-       ra->p_dev = dev;
-       ra->p_ino = ino;
-       ra->p_set = 0;
-       ra->p_hindex = hash;
-found:
-       if (rap != &rab->pb_head) {
-               *rap = ra->p_next;
-               ra->p_next   = rab->pb_head;
-               rab->pb_head = ra;
-       }
-       ra->p_count++;
-       nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
-       spin_unlock(&rab->pb_lock);
+       __be32 err;
 
-       if (ra->p_set)
-               file->f_ra = ra->p_ra;
-       return ra;
+       validate_process_creds();
+       /*
+        * If we get here, then the client has already done an "open",
+        * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+        * in case a chmod has now revoked permission.
+        *
+        * Arguably we should also allow the owner override for
+        * directories, but we never have and it doesn't seem to have
+        * caused anyone a problem.  If we were to change this, note
+        * also that our filldir callbacks would need a variant of
+        * lookup_one_len that doesn't check permissions.
+        */
+       if (type == S_IFREG)
+               may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+       err = fh_verify(rqstp, fhp, type, may_flags);
+       if (!err)
+               err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+       validate_process_creds();
+       return err;
 }
 
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
+__be32
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+               int may_flags, struct file **filp)
 {
-       struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+       __be32 err;
 
-       spin_lock(&rab->pb_lock);
-       ra->p_ra = file->f_ra;
-       ra->p_set = 1;
-       ra->p_count--;
-       spin_unlock(&rab->pb_lock);
+       validate_process_creds();
+       err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+       validate_process_creds();
+       return err;
 }
 
 /*
@@ -901,12 +834,23 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
        return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
 }
 
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
+               size_t expected)
+{
+       if (expected != 0 && len == 0)
+               return 1;
+       if (offset+len >= i_size_read(file_inode(file)))
+               return 1;
+       return 0;
+}
+
 static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                               struct file *file, loff_t offset,
-                              unsigned long *count, int host_err)
+                              unsigned long *count, u32 *eof, ssize_t host_err)
 {
        if (host_err >= 0) {
                nfsdstats.io_read += host_err;
+               *eof = nfsd_eof_on_read(file, offset, host_err, *count);
                *count = host_err;
                fsnotify_access(file);
                trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -918,7 +862,8 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 
 __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-                       struct file *file, loff_t offset, unsigned long *count)
+                       struct file *file, loff_t offset, unsigned long *count,
+                       u32 *eof)
 {
        struct splice_desc sd = {
                .len            = 0,
@@ -926,25 +871,27 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                .pos            = offset,
                .u.data         = rqstp,
        };
-       int host_err;
+       ssize_t host_err;
 
        trace_nfsd_read_splice(rqstp, fhp, offset, *count);
        rqstp->rq_next_page = rqstp->rq_respages + 1;
        host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
-       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
 
 __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
                  struct file *file, loff_t offset,
-                 struct kvec *vec, int vlen, unsigned long *count)
+                 struct kvec *vec, int vlen, unsigned long *count,
+                 u32 *eof)
 {
        struct iov_iter iter;
-       int host_err;
+       loff_t ppos = offset;
+       ssize_t host_err;
 
        trace_nfsd_read_vector(rqstp, fhp, offset, *count);
        iov_iter_kvec(&iter, READ, vec, vlen, *count);
-       host_err = vfs_iter_read(file, &iter, &offset, 0);
-       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+       host_err = vfs_iter_read(file, &iter, &ppos, 0);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
 
 /*
@@ -1025,8 +972,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        nfsdstats.io_write += *cnt;
        fsnotify_modify(file);
 
-       if (stable && use_wgather)
+       if (stable && use_wgather) {
                host_err = wait_for_concurrent_writes(file);
+               if (host_err < 0)
+                       nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+                                                nfsd_net_id));
+       }
 
 out_nfserr:
        if (host_err >= 0) {
@@ -1047,27 +998,25 @@ out_nfserr:
  * N.B. After this call fhp needs an fh_put
  */
 __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
-       loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+       loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
+       u32 *eof)
 {
+       struct nfsd_file        *nf;
        struct file *file;
-       struct raparms  *ra;
        __be32 err;
 
        trace_nfsd_read_start(rqstp, fhp, offset, *count);
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+       err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
        if (err)
                return err;
 
-       ra = nfsd_init_raparms(file);
-
+       file = nf->nf_file;
        if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
-               err = nfsd_splice_read(rqstp, fhp, file, offset, count);
+               err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
        else
-               err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
+               err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
 
-       if (ra)
-               nfsd_put_raparams(file, ra);
-       fput(file);
+       nfsd_file_put(nf);
 
        trace_nfsd_read_done(rqstp, fhp, offset, *count);
 
@@ -1083,17 +1032,18 @@ __be32
 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
           struct kvec *vec, int vlen, unsigned long *cnt, int stable)
 {
-       struct file *file = NULL;
-       __be32 err = 0;
+       struct nfsd_file *nf;
+       __be32 err;
 
        trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
 
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
        if (err)
                goto out;
 
-       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
-       fput(file);
+       err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
+                       vlen, cnt, stable);
+       nfsd_file_put(nf);
 out:
        trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
        return err;
@@ -1113,9 +1063,9 @@ __be32
 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                loff_t offset, unsigned long count)
 {
-       struct file     *file;
-       loff_t          end = LLONG_MAX;
-       __be32          err = nfserr_inval;
+       struct nfsd_file        *nf;
+       loff_t                  end = LLONG_MAX;
+       __be32                  err = nfserr_inval;
 
        if (offset < 0)
                goto out;
@@ -1125,20 +1075,27 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                        goto out;
        }
 
-       err = nfsd_open(rqstp, fhp, S_IFREG,
-                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
+       err = nfsd_file_acquire(rqstp, fhp,
+                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
        if (err)
                goto out;
        if (EX_ISSYNC(fhp->fh_export)) {
-               int err2 = vfs_fsync_range(file, offset, end, 0);
+               int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
 
-               if (err2 != -EINVAL)
-                       err = nfserrno(err2);
-               else
+               switch (err2) {
+               case 0:
+                       break;
+               case -EINVAL:
                        err = nfserr_notsupp;
+                       break;
+               default:
+                       err = nfserrno(err2);
+                       nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+                                                nfsd_net_id));
+               }
        }
 
-       fput(file);
+       nfsd_file_put(nf);
 out:
        return err;
 }
@@ -1659,6 +1616,26 @@ out_nfserr:
        goto out_unlock;
 }
 
+static void
+nfsd_close_cached_files(struct dentry *dentry)
+{
+       struct inode *inode = d_inode(dentry);
+
+       if (inode && S_ISREG(inode->i_mode))
+               nfsd_file_close_inode_sync(inode);
+}
+
+static bool
+nfsd_has_cached_files(struct dentry *dentry)
+{
+       bool            ret = false;
+       struct inode *inode = d_inode(dentry);
+
+       if (inode && S_ISREG(inode->i_mode))
+               ret = nfsd_file_is_cached(inode);
+       return ret;
+}
+
 /*
  * Rename a file
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
@@ -1671,6 +1648,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        struct inode    *fdir, *tdir;
        __be32          err;
        int             host_err;
+       bool            has_cached = false;
 
        err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
        if (err)
@@ -1689,6 +1667,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
                goto out;
 
+retry:
        host_err = fh_want_write(ffhp);
        if (host_err) {
                err = nfserrno(host_err);
@@ -1728,11 +1707,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
                goto out_dput_new;
 
-       host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
-       if (!host_err) {
-               host_err = commit_metadata(tfhp);
-               if (!host_err)
-                       host_err = commit_metadata(ffhp);
+       if (nfsd_has_cached_files(ndentry)) {
+               has_cached = true;
+               goto out_dput_old;
+       } else {
+               host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+               if (!host_err) {
+                       host_err = commit_metadata(tfhp);
+                       if (!host_err)
+                               host_err = commit_metadata(ffhp);
+               }
        }
  out_dput_new:
        dput(ndentry);
@@ -1745,12 +1729,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
         * as that would do the wrong thing if the two directories
         * were the same, so again we do it by hand.
         */
-       fill_post_wcc(ffhp);
-       fill_post_wcc(tfhp);
+       if (!has_cached) {
+               fill_post_wcc(ffhp);
+               fill_post_wcc(tfhp);
+       }
        unlock_rename(tdentry, fdentry);
        ffhp->fh_locked = tfhp->fh_locked = false;
        fh_drop_write(ffhp);
 
+       /*
+        * If the target dentry has cached open files, then we need to try to
+        * close them prior to doing the rename. Flushing delayed fput
+        * shouldn't be done with locks held however, so we delay it until this
+        * point and then reattempt the whole shebang.
+        */
+       if (has_cached) {
+               has_cached = false;
+               nfsd_close_cached_files(ndentry);
+               dput(ndentry);
+               goto retry;
+       }
 out:
        return err;
 }
@@ -1797,10 +1795,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!type)
                type = d_inode(rdentry)->i_mode & S_IFMT;
 
-       if (type != S_IFDIR)
+       if (type != S_IFDIR) {
+               nfsd_close_cached_files(rdentry);
                host_err = vfs_unlink(dirp, rdentry, NULL);
-       else
+       } else {
                host_err = vfs_rmdir(dirp, rdentry);
+       }
+
        if (!host_err)
                host_err = commit_metadata(fhp);
        dput(rdentry);
@@ -2074,63 +2075,3 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 
        return err? nfserrno(err) : 0;
 }
-
-void
-nfsd_racache_shutdown(void)
-{
-       struct raparms *raparm, *last_raparm;
-       unsigned int i;
-
-       dprintk("nfsd: freeing readahead buffers.\n");
-
-       for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-               raparm = raparm_hash[i].pb_head;
-               while(raparm) {
-                       last_raparm = raparm;
-                       raparm = raparm->p_next;
-                       kfree(last_raparm);
-               }
-               raparm_hash[i].pb_head = NULL;
-       }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
-       int     i;
-       int     j = 0;
-       int     nperbucket;
-       struct raparms **raparm = NULL;
-
-
-       if (raparm_hash[0].pb_head)
-               return 0;
-       nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
-       nperbucket = max(2, nperbucket);
-       cache_size = nperbucket * RAPARM_HASH_SIZE;
-
-       dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
-       for (i = 0; i < RAPARM_HASH_SIZE; i++) {
-               spin_lock_init(&raparm_hash[i].pb_lock);
-
-               raparm = &raparm_hash[i].pb_head;
-               for (j = 0; j < nperbucket; j++) {
-                       *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
-                       if (!*raparm)
-                               goto out_nomem;
-                       raparm = &(*raparm)->p_next;
-               }
-               *raparm = NULL;
-       }
-
-       nfsdstats.ra_size = cache_size;
-       return 0;
-
-out_nomem:
-       dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
-       nfsd_racache_shutdown();
-       return -ENOMEM;
-}
index db351247892d05155e9cc2d2517229b071e807a1..a13fd9d7e1f5eebe6a20613d9adf75a715dac0cf 100644 (file)
@@ -40,8 +40,6 @@
 typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 
 /* nfsd/vfs.c */
-int            nfsd_racache_init(int);
-void           nfsd_racache_shutdown(void);
 int            nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
                                struct svc_export **expp);
 __be32         nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -75,18 +73,23 @@ __be32              do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_commit(struct svc_rqst *, struct svc_fh *,
                                loff_t, unsigned long);
 #endif /* CONFIG_NFSD_V3 */
+int            nfsd_open_break_lease(struct inode *, int);
 __be32         nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
                                int, struct file **);
-struct raparms;
+__be32         nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+                               int, struct file **);
 __be32         nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
-                               unsigned long *count);
+                               unsigned long *count,
+                               u32 *eof);
 __be32         nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
                                struct kvec *vec, int vlen,
-                               unsigned long *count);
+                               unsigned long *count,
+                               u32 *eof);
 __be32                 nfsd_read(struct svc_rqst *, struct svc_fh *,
-                               loff_t, struct kvec *, int, unsigned long *);
+                               loff_t, struct kvec *, int, unsigned long *,
+                               u32 *eof);
 __be32                 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
                                struct kvec *, int, unsigned long *, int);
 __be32         nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -115,9 +118,6 @@ __be32              nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_permission(struct svc_rqst *, struct svc_export *,
                                struct dentry *, int);
 
-struct raparms *nfsd_init_raparms(struct file *file);
-void           nfsd_put_raparams(struct file *file, struct raparms *ra);
-
 static inline int fh_want_write(struct svc_fh *fh)
 {
        int ret;
@@ -152,23 +152,4 @@ static inline int nfsd_create_is_exclusive(int createmode)
               || createmode == NFS4_CREATE_EXCLUSIVE4_1;
 }
 
-static inline bool nfsd_eof_on_read(long requested, long read,
-                               loff_t offset, loff_t size)
-{
-       /* We assume a short read means eof: */
-       if (requested > read)
-               return true;
-       /*
-        * A non-short read might also reach end of file.  The spec
-        * still requires us to set eof in that case.
-        *
-        * Further operations may have modified the file size since
-        * the read, so the following check is not atomic with the read.
-        * We've only seen that cause a problem for a client in the case
-        * where the read returned a count of 0 without setting eof.
-        * That case was fixed by the addition of the above check.
-        */
-       return (offset + read >= size);
-}
-
 #endif /* LINUX_NFSD_VFS_H */
index 2cb29e961a760f6797abfe9317c4903fba868c93..99ff9f403ff18365b501f8c18fb43c2b538eae59 100644 (file)
@@ -151,7 +151,7 @@ struct nfsd3_readres {
        __be32                  status;
        struct svc_fh           fh;
        unsigned long           count;
-       int                     eof;
+       __u32                   eof;
 };
 
 struct nfsd3_writeres {
index d64c870f998a838514a56d022ad447a34ad06d20..f4737d66ee984d80e61232d4d240286071be6d28 100644 (file)
@@ -273,15 +273,14 @@ struct nfsd4_open_downgrade {
 
 
 struct nfsd4_read {
-       stateid_t       rd_stateid;         /* request */
-       u64             rd_offset;          /* request */
-       u32             rd_length;          /* request */
-       int             rd_vlen;
-       struct file     *rd_filp;
-       bool            rd_tmp_file;
+       stateid_t               rd_stateid;         /* request */
+       u64                     rd_offset;          /* request */
+       u32                     rd_length;          /* request */
+       int                     rd_vlen;
+       struct nfsd_file        *rd_nf;
        
-       struct svc_rqst *rd_rqstp;          /* response */
-       struct svc_fh rd_fhp;             /* response */
+       struct svc_rqst         *rd_rqstp;          /* response */
+       struct svc_fh           *rd_fhp;             /* response */
 };
 
 struct nfsd4_readdir {
@@ -538,8 +537,8 @@ struct nfsd4_copy {
 
        struct nfs4_client      *cp_clp;
 
-       struct file             *file_src;
-       struct file             *file_dst;
+       struct nfsd_file        *nf_src;
+       struct nfsd_file        *nf_dst;
 
        stateid_t               cp_stateid;
 
index 5a00121fb2197881c418c6bd7c8ebbec5eb44911..f3462828a0e2d1aa536235c534dbf63d6acf4828 100644 (file)
@@ -54,8 +54,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
 {
        fsnotify_destroy_marks(&sb->s_fsnotify_marks);
 }
-/* Wait until all marks queued for destruction are destroyed */
-extern void fsnotify_wait_marks_destroyed(void);
 
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
index 0391190305cc56b4ff32df43068225eca564822d..133f723aca0703da8d9969f190992370ca5fc27a 100644 (file)
@@ -108,6 +108,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
        if (refcount_dec_and_test(&group->refcnt))
                fsnotify_final_destroy_group(group);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_group);
 
 /*
  * Create a new fsnotify_group and hold a reference for the group returned.
@@ -137,6 +138,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
        return group;
 }
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
 
 int fsnotify_fasync(int fd, struct file *file, int on)
 {
index 99ddd126f6f0c61cccc3dfc57cdf7bb7e6ef94e6..1d96216dffd196637fb4e8f50182d7a95ee0111d 100644 (file)
@@ -276,6 +276,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
        queue_delayed_work(system_unbound_wq, &reaper_work,
                           FSNOTIFY_REAPER_DELAY);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
 
 /*
  * Get mark reference when we found the mark via lockless traversal of object
@@ -430,6 +431,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
        mutex_unlock(&group->mark_mutex);
        fsnotify_free_mark(mark);
 }
+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
 
 /*
  * Sorting function for lists of fsnotify marks.
@@ -685,6 +687,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
        mutex_unlock(&group->mark_mutex);
        return ret;
 }
+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
 
 /*
  * Given a list of marks, find the mark associated with given group. If found
@@ -711,6 +714,7 @@ struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
        spin_unlock(&conn->lock);
        return NULL;
 }
+EXPORT_SYMBOL_GPL(fsnotify_find_mark);
 
 /* Clear any marks in a group with given type mask */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
@@ -809,6 +813,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
        mark->group = group;
        WRITE_ONCE(mark->connector, NULL);
 }
+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
 
 /*
  * Destroy all marks in destroy_list, waits for SRCU period to finish before
@@ -837,3 +842,4 @@ void fsnotify_wait_marks_destroyed(void)
 {
        flush_delayed_work(&reaper_work);
 }
+EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
index b0c6b0d34d0213569c88c9bd83280c46ecaf2be2..e0d909d357634bb26a9adfa65ddbcc9bce5364f6 100644 (file)
@@ -1168,6 +1168,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
 extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
 extern int lease_modify(struct file_lock *, int, struct list_head *);
+
+struct notifier_block;
+extern int lease_register_notifier(struct notifier_block *);
+extern void lease_unregister_notifier(struct notifier_block *);
+
 struct files_struct;
 extern void show_fd_locks(struct seq_file *f,
                         struct file *filp, struct files_struct *files);
index 2de3b2ddd19ac446fe076d35d9fcdd8b8696e027..1915bdba2fad9a64407850c376e21eca8f0ecf41 100644 (file)
@@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
 /* free mark */
 extern void fsnotify_free_mark(struct fsnotify_mark *mark);
+/* Wait until all marks queued for destruction are destroyed */
+extern void fsnotify_wait_marks_destroyed(void);
 /* run all the marks in a group, and clear all of the marks attached to given object type */
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
 /* run all the marks in a group, and clear all of the vfsmount marks */
index c7f38e8971747cd0c7633abf981f7fb0f3860a31..f8603724fbee2557e7264a5e3681796f2d6b0ee7 100644 (file)
@@ -87,6 +87,7 @@ struct cache_detail {
                                              int has_died);
 
        struct cache_head *     (*alloc)(void);
+       void                    (*flush)(void);
        int                     (*match)(struct cache_head *orig, struct cache_head *new);
        void                    (*init)(struct cache_head *orig, struct cache_head *new);
        void                    (*update)(struct cache_head *orig, struct cache_head *new);
@@ -107,9 +108,9 @@ struct cache_detail {
        /* fields for communication over channel */
        struct list_head        queue;
 
-       atomic_t                readers;                /* how many time is /chennel open */
-       time_t                  last_close;             /* if no readers, when did last close */
-       time_t                  last_warn;              /* when we last warned about no readers */
+       atomic_t                writers;                /* how many time is /channel open */
+       time_t                  last_close;             /* if no writers, when did last close */
+       time_t                  last_warn;              /* when we last warned about no writers */
 
        union {
                struct proc_dir_entry   *procfs;
index 981f0d726ad4b7eaf5e7ac118edd2e5b7ab06d02..40f65888dd3887ee0f9bc9c083cce61209acff2e 100644 (file)
@@ -42,6 +42,7 @@
 
 #ifndef SVC_RDMA_H
 #define SVC_RDMA_H
+#include <linux/llist.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/rpc_rdma.h>
@@ -107,8 +108,7 @@ struct svcxprt_rdma {
        struct list_head     sc_read_complete_q;
        struct work_struct   sc_work;
 
-       spinlock_t           sc_recv_lock;
-       struct list_head     sc_recv_ctxts;
+       struct llist_head    sc_recv_ctxts;
 };
 /* sc_flags */
 #define RDMAXPRT_CONN_PENDING  3
@@ -125,6 +125,7 @@ enum {
 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
 
 struct svc_rdma_recv_ctxt {
+       struct llist_node       rc_node;
        struct list_head        rc_list;
        struct ib_recv_wr       rc_recv_wr;
        struct ib_cqe           rc_cqe;
@@ -200,7 +201,6 @@ extern struct svc_xprt_class svc_rdma_bc_class;
 #endif
 
 /* svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
index b1e9de4f07d5d5caea4db005fd666135b127a8a1..a519313af953907a24d77b5e6b9c2cb80ace1076 100644 (file)
 #include <linux/types.h>
 
 /* latest upcall version available */
-#define CLD_UPCALL_VERSION 1
+#define CLD_UPCALL_VERSION 2
 
 /* defined by RFC3530 */
 #define NFS4_OPAQUE_LIMIT 1024
 
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
 enum cld_command {
        Cld_Create,             /* create a record for this cm_id */
        Cld_Remove,             /* remove record of this cm_id */
        Cld_Check,              /* is this cm_id allowed? */
        Cld_GraceDone,          /* grace period is complete */
-       Cld_GraceStart,
+       Cld_GraceStart,         /* grace start (upload client records) */
+       Cld_GetVersion,         /* query max supported upcall version */
 };
 
 /* representation of long-form NFSv4 client ID */
@@ -45,6 +50,17 @@ struct cld_name {
        unsigned char   cn_id[NFS4_OPAQUE_LIMIT];       /* client-provided */
 } __attribute__((packed));
 
+/* sha256 hash of the kerberos principal */
+struct cld_princhash {
+       __u8            cp_len;                         /* length of cp_data */
+       unsigned char   cp_data[SHA256_DIGEST_SIZE];    /* hash of principal */
+} __attribute__((packed));
+
+struct cld_clntinfo {
+       struct cld_name         cc_name;
+       struct cld_princhash    cc_princhash;
+} __attribute__((packed));
+
 /* message struct for communication with userspace */
 struct cld_msg {
        __u8            cm_vers;                /* upcall version */
@@ -54,7 +70,28 @@ struct cld_msg {
        union {
                __s64           cm_gracetime;   /* grace period start time */
                struct cld_name cm_name;
+               __u8            cm_version;     /* for getting max version */
+       } __attribute__((packed)) cm_u;
+} __attribute__((packed));
+
+/* version 2 message can include hash of kerberos principal */
+struct cld_msg_v2 {
+       __u8            cm_vers;                /* upcall version */
+       __u8            cm_cmd;                 /* upcall command */
+       __s16           cm_status;              /* return code */
+       __u32           cm_xid;                 /* transaction id */
+       union {
+               struct cld_name cm_name;
+               __u8            cm_version;     /* for getting max version */
+               struct cld_clntinfo cm_clntinfo; /* name & princ hash */
        } __attribute__((packed)) cm_u;
 } __attribute__((packed));
 
+struct cld_msg_hdr {
+       __u8            cm_vers;                /* upcall version */
+       __u8            cm_cmd;                 /* upcall command */
+       __s16           cm_status;              /* return code */
+       __u32           cm_xid;                 /* transaction id */
+} __attribute__((packed));
+
 #endif /* !_NFSD_CLD_H */
index 6f1528f271eed6ea0b7343c1c81e67702cb31dd5..a349094f6fb7d90c31391bf0fde9bd743e0bd429 100644 (file)
@@ -373,7 +373,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
        spin_lock(&cache_list_lock);
        cd->nextcheck = 0;
        cd->entries = 0;
-       atomic_set(&cd->readers, 0);
+       atomic_set(&cd->writers, 0);
        cd->last_close = 0;
        cd->last_warn = -1;
        list_add(&cd->others, &cache_list);
@@ -1029,11 +1029,13 @@ static int cache_open(struct inode *inode, struct file *filp,
                }
                rp->offset = 0;
                rp->q.reader = 1;
-               atomic_inc(&cd->readers);
+
                spin_lock(&queue_lock);
                list_add(&rp->q.list, &cd->queue);
                spin_unlock(&queue_lock);
        }
+       if (filp->f_mode & FMODE_WRITE)
+               atomic_inc(&cd->writers);
        filp->private_data = rp;
        return 0;
 }
@@ -1062,8 +1064,10 @@ static int cache_release(struct inode *inode, struct file *filp,
                filp->private_data = NULL;
                kfree(rp);
 
+       }
+       if (filp->f_mode & FMODE_WRITE) {
+               atomic_dec(&cd->writers);
                cd->last_close = seconds_since_boot();
-               atomic_dec(&cd->readers);
        }
        module_put(cd->owner);
        return 0;
@@ -1171,7 +1175,7 @@ static void warn_no_listener(struct cache_detail *detail)
 
 static bool cache_listeners_exist(struct cache_detail *detail)
 {
-       if (atomic_read(&detail->readers))
+       if (atomic_read(&detail->writers))
                return true;
        if (detail->last_close == 0)
                /* This cache was never opened */
@@ -1520,6 +1524,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
        cd->nextcheck = now;
        cache_flush();
 
+       if (cd->flush)
+               cd->flush();
+
        *ppos += count;
        return count;
 }
index 220b79988000ed11e9ddf2abf10b5acb521e366f..d11b70552c33965206d3bc060d4fa80f53f155e9 100644 (file)
@@ -1233,8 +1233,8 @@ svc_generic_init_request(struct svc_rqst *rqstp,
 
        if (rqstp->rq_vers >= progp->pg_nvers )
                goto err_bad_vers;
-         versp = progp->pg_vers[rqstp->rq_vers];
-         if (!versp)
+       versp = progp->pg_vers[rqstp->rq_vers];
+       if (!versp)
                goto err_bad_vers;
 
        /*
index abdb3004a1e35cb6e6a5c624904938e1b70b3c0f..97bca509a391bdb8d84732294b8b9f7fb6ceee55 100644 (file)
@@ -73,8 +73,6 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-struct workqueue_struct *svc_rdma_wq;
-
 /*
  * This function implements reading and resetting an atomic_t stat
  * variable through read/write to a proc file. Any write to the file
@@ -230,7 +228,6 @@ static struct ctl_table svcrdma_root_table[] = {
 void svc_rdma_cleanup(void)
 {
        dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
-       destroy_workqueue(svc_rdma_wq);
        if (svcrdma_table_header) {
                unregister_sysctl_table(svcrdma_table_header);
                svcrdma_table_header = NULL;
@@ -246,10 +243,6 @@ int svc_rdma_init(void)
        dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
        dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
-       svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
-       if (!svc_rdma_wq)
-               return -ENOMEM;
-
        if (!svcrdma_table_header)
                svcrdma_table_header =
                        register_sysctl_table(svcrdma_root_table);
index 65e2fb9aac656f2daab864a077543f4abb9e8fe2..96bccd39846949c2f7a49643581192b948d278d0 100644 (file)
@@ -172,9 +172,10 @@ static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
 void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_recv_ctxt *ctxt;
+       struct llist_node *node;
 
-       while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
-               list_del(&ctxt->rc_list);
+       while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
+               ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
                svc_rdma_recv_ctxt_destroy(rdma, ctxt);
        }
 }
@@ -183,21 +184,18 @@ static struct svc_rdma_recv_ctxt *
 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_recv_ctxt *ctxt;
+       struct llist_node *node;
 
-       spin_lock(&rdma->sc_recv_lock);
-       ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
-       if (!ctxt)
+       node = llist_del_first(&rdma->sc_recv_ctxts);
+       if (!node)
                goto out_empty;
-       list_del(&ctxt->rc_list);
-       spin_unlock(&rdma->sc_recv_lock);
+       ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
 
 out:
        ctxt->rc_page_count = 0;
        return ctxt;
 
 out_empty:
-       spin_unlock(&rdma->sc_recv_lock);
-
        ctxt = svc_rdma_recv_ctxt_alloc(rdma);
        if (!ctxt)
                return NULL;
@@ -218,11 +216,9 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
        for (i = 0; i < ctxt->rc_page_count; i++)
                put_page(ctxt->rc_pages[i]);
 
-       if (!ctxt->rc_temp) {
-               spin_lock(&rdma->sc_recv_lock);
-               list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
-               spin_unlock(&rdma->sc_recv_lock);
-       } else
+       if (!ctxt->rc_temp)
+               llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+       else
                svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 }
 
index 4d3db6ee7f09ca7eae3d4ac239e5dbac452851cc..145a3615c319366fbcc9815f76d59f4e271dc6ff 100644 (file)
@@ -140,14 +140,13 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
-       INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
+       init_llist_head(&cma_xprt->sc_recv_ctxts);
        INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
        init_waitqueue_head(&cma_xprt->sc_send_wait);
 
        spin_lock_init(&cma_xprt->sc_lock);
        spin_lock_init(&cma_xprt->sc_rq_dto_lock);
        spin_lock_init(&cma_xprt->sc_send_lock);
-       spin_lock_init(&cma_xprt->sc_recv_lock);
        spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
        /*
@@ -630,8 +629,9 @@ static void svc_rdma_free(struct svc_xprt *xprt)
 {
        struct svcxprt_rdma *rdma =
                container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
        INIT_WORK(&rdma->sc_work, __svc_rdma_free);
-       queue_work(svc_rdma_wq, &rdma->sc_work);
+       schedule_work(&rdma->sc_work);
 }
 
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)