Merge tag 'nfsd-4.3' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 6 Sep 2015 00:26:24 +0000 (17:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 6 Sep 2015 00:26:24 +0000 (17:26 -0700)
Pull nfsd updates from Bruce Fields:
 "Nothing major, but:

   - Add Jeff Layton as an nfsd co-maintainer: no change to existing
     practice, just an acknowledgement of the status quo.

   - Two patches ("nfsd: ensure that...") for a race overlooked by the
     state locking rewrite, causing a crash noticed by multiple users.

   - Lots of smaller bugfixes all over from Kinglong Mee.

   - From Jeff, some cleanup of server rpc code in preparation for
     possible shift of nfsd threads to workqueues"

* tag 'nfsd-4.3' of git://linux-nfs.org/~bfields/linux: (52 commits)
  nfsd: deal with DELEGRETURN racing with CB_RECALL
  nfsd: return CLID_INUSE for unexpected SETCLIENTID_CONFIRM case
  nfsd: ensure that delegation stateid hash references are only put once
  nfsd: ensure that the ol stateid hash reference is only put once
  net: sunrpc: fix tracepoint Warning: unknown op '->'
  nfsd: allow more than one laundry job to run at a time
  nfsd: don't WARN/backtrace for invalid container deployment.
  fs: fix fs/locks.c kernel-doc warning
  nfsd: Add Jeff Layton as co-maintainer
  NFSD: Return word2 bitmask if setting security label in OPEN/CREATE
  NFSD: Set the attributes used to store the verifier for EXCLUSIVE4_1
  nfsd: SUPPATTR_EXCLCREAT must be encoded before SECURITY_LABEL.
  nfsd: Fix an FS_LAYOUT_TYPES/LAYOUT_TYPES encode bug
  NFSD: Store parent's stat in a separate value
  nfsd: Fix two typos in comments
  lockd: NLM grace period shouldn't block NFSv4 opens
  nfsd: include linux/nfs4.h in export.h
  sunrpc: Switch to using hash list instead single list
  sunrpc/nfsd: Remove redundant code by exports seq_operations functions
  sunrpc: Store cache_detail in seq_file's private directly
  ...

35 files changed:
Documentation/filesystems/nfs/nfs-rdma.txt
fs/lockd/svc.c
fs/locks.c
fs/nfs/callback.c
fs/nfs_common/grace.c
fs/nfsd/export.c
fs/nfsd/export.h
fs/nfsd/idmap.h
fs/nfsd/netns.h
fs/nfsd/nfs2acl.c
fs/nfsd/nfs3acl.c
fs/nfsd/nfs4acl.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4idmap.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfssvc.c
fs/nfsd/state.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
include/linux/fs.h
include/linux/sunrpc/cache.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/svc_xprt.h
include/trace/events/sunrpc.h
include/uapi/linux/nfsacl.h
net/sunrpc/cache.c
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/xprt_rdma.h

index 95c13aa575ff32eebeb4938f435376ca0595a439..906b6c233f62f63cc2b80e572324a73d367b43d7 100644 (file)
@@ -138,9 +138,9 @@ Installation
   - Build, install, reboot
 
     The NFS/RDMA code will be enabled automatically if NFS and RDMA
-    are turned on. The NFS/RDMA client and server are configured via the
-    SUNRPC_XPRT_RDMA_CLIENT and SUNRPC_XPRT_RDMA_SERVER config options that both
-    depend on SUNRPC and INFINIBAND. The default value of both options will be:
+    are turned on. The NFS/RDMA client and server are configured via the hidden
+    SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
+    value of SUNRPC_XPRT_RDMA will be:
 
      - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
        and server will not be built
@@ -238,9 +238,8 @@ NFS/RDMA Setup
 
   - Start the NFS server
 
-    If the NFS/RDMA server was built as a module
-    (CONFIG_SUNRPC_XPRT_RDMA_SERVER=m in kernel config), load the RDMA
-    transport module:
+    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA transport module:
 
     $ modprobe svcrdma
 
@@ -259,9 +258,8 @@ NFS/RDMA Setup
 
   - On the client system
 
-    If the NFS/RDMA client was built as a module
-    (CONFIG_SUNRPC_XPRT_RDMA_CLIENT=m in kernel config), load the RDMA client
-    module:
+    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA client module:
 
     $ modprobe xprtrdma.ko
 
index 55505cbe11afa165ec90ec934301c15a1b9a4314..d678bcc3cbcb440e90045e578c755f6972079852 100644 (file)
@@ -322,6 +322,11 @@ out_rqst:
        return error;
 }
 
+static struct svc_serv_ops lockd_sv_ops = {
+       .svo_shutdown           = svc_rpcb_cleanup,
+       .svo_enqueue_xprt       = svc_xprt_do_enqueue,
+};
+
 static struct svc_serv *lockd_create_svc(void)
 {
        struct svc_serv *serv;
@@ -350,7 +355,7 @@ static struct svc_serv *lockd_create_svc(void)
                nlm_timeout = LOCKD_DFLT_TIMEO;
        nlmsvc_timeout = nlm_timeout * HZ;
 
-       serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup);
+       serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
        if (!serv) {
                printk(KERN_WARNING "lockd_up: create service failed\n");
                return ERR_PTR(-ENOMEM);
@@ -586,6 +591,7 @@ static int lockd_init_net(struct net *net)
 
        INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
        INIT_LIST_HEAD(&ln->lockd_manager.list);
+       ln->lockd_manager.block_opens = false;
        spin_lock_init(&ln->nsm_clnt_lock);
        return 0;
 }
index d3d558ba4da7966de9699aeb98b2630bd7c6854e..2a54c800a22391cbd78af02f2d9a70804519c815 100644 (file)
@@ -1568,6 +1568,7 @@ int fcntl_getlease(struct file *filp)
  *                         desired lease.
  * @dentry:    dentry to check
  * @arg:       type of lease that we're trying to acquire
+ * @flags:     current lock flags
  *
  * Check to see if there's an existing open fd on this file that would
  * conflict with the lease we're trying to set.
index 682529c009966b85f986955c04d2b48fb645e981..2c4a0b565d28e6eba828668a01123d8dce9f05e1 100644 (file)
@@ -308,6 +308,10 @@ err_bind:
        return ret;
 }
 
+static struct svc_serv_ops nfs_cb_sv_ops = {
+       .svo_enqueue_xprt       = svc_xprt_do_enqueue,
+};
+
 static struct svc_serv *nfs_callback_create_svc(int minorversion)
 {
        struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
@@ -333,7 +337,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
                printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
                        cb_info->users);
 
-       serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
+       serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops);
        if (!serv) {
                printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
                return ERR_PTR(-ENOMEM);
index ae6e58ea4de51b832138268f8cbdf9ed5cdefa56..fd8c9a5bcac44c74101d1b2ba4001c8785875254 100644 (file)
@@ -63,14 +63,33 @@ EXPORT_SYMBOL_GPL(locks_end_grace);
  * lock reclaims.
  */
 int
-locks_in_grace(struct net *net)
+__state_in_grace(struct net *net, bool open)
 {
        struct list_head *grace_list = net_generic(net, grace_net_id);
+       struct lock_manager *lm;
 
-       return !list_empty(grace_list);
+       if (!open)
+               return !list_empty(grace_list);
+
+       list_for_each_entry(lm, grace_list, list) {
+               if (lm->block_opens)
+                       return true;
+       }
+       return false;
+}
+
+int locks_in_grace(struct net *net)
+{
+       return __state_in_grace(net, 0);
 }
 EXPORT_SYMBOL_GPL(locks_in_grace);
 
+int opens_in_grace(struct net *net)
+{
+       return __state_in_grace(net, 1);
+}
+EXPORT_SYMBOL_GPL(opens_in_grace);
+
 static int __net_init
 grace_init_net(struct net *net)
 {
index f79521a597471ed6286fd63c5e483860bfcfbe80..b4d84b579f20cd5da76866586dfa283d64c6669a 100644 (file)
@@ -1075,73 +1075,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
        return rv;
 }
 
-/* Iterator */
-
-static void *e_start(struct seq_file *m, loff_t *pos)
-       __acquires(((struct cache_detail *)m->private)->hash_lock)
-{
-       loff_t n = *pos;
-       unsigned hash, export;
-       struct cache_head *ch;
-       struct cache_detail *cd = m->private;
-       struct cache_head **export_table = cd->hash_table;
-
-       read_lock(&cd->hash_lock);
-       if (!n--)
-               return SEQ_START_TOKEN;
-       hash = n >> 32;
-       export = n & ((1LL<<32) - 1);
-
-       
-       for (ch=export_table[hash]; ch; ch=ch->next)
-               if (!export--)
-                       return ch;
-       n &= ~((1LL<<32) - 1);
-       do {
-               hash++;
-               n += 1LL<<32;
-       } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL);
-       if (hash >= EXPORT_HASHMAX)
-               return NULL;
-       *pos = n+1;
-       return export_table[hash];
-}
-
-static void *e_next(struct seq_file *m, void *p, loff_t *pos)
-{
-       struct cache_head *ch = p;
-       int hash = (*pos >> 32);
-       struct cache_detail *cd = m->private;
-       struct cache_head **export_table = cd->hash_table;
-
-       if (p == SEQ_START_TOKEN)
-               hash = 0;
-       else if (ch->next == NULL) {
-               hash++;
-               *pos += 1LL<<32;
-       } else {
-               ++*pos;
-               return ch->next;
-       }
-       *pos &= ~((1LL<<32) - 1);
-       while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) {
-               hash++;
-               *pos += 1LL<<32;
-       }
-       if (hash >= EXPORT_HASHMAX)
-               return NULL;
-       ++*pos;
-       return export_table[hash];
-}
-
-static void e_stop(struct seq_file *m, void *p)
-       __releases(((struct cache_detail *)m->private)->hash_lock)
-{
-       struct cache_detail *cd = m->private;
-
-       read_unlock(&cd->hash_lock);
-}
-
 static struct flags {
        int flag;
        char *name[2];
@@ -1270,9 +1203,9 @@ static int e_show(struct seq_file *m, void *p)
 }
 
 const struct seq_operations nfs_exports_op = {
-       .start  = e_start,
-       .next   = e_next,
-       .stop   = e_stop,
+       .start  = cache_seq_start,
+       .next   = cache_seq_next,
+       .stop   = cache_seq_stop,
        .show   = e_show,
 };
 
index 1f52bfcc436f210c16e148ea90f2b9b0bab6a5c6..2e315072bf3fb83e62d0c469a2d43b2d3683392f 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/sunrpc/cache.h>
 #include <uapi/linux/nfsd/export.h>
+#include <linux/nfs4.h>
 
 struct knfsd_fh;
 struct svc_fh;
index a3f34900091f492d28132c6cf1c90a4c836b7efe..23cc85d1efdd63d91e4d9e7aa4dfce387b22acdf 100644 (file)
@@ -37,9 +37,7 @@
 
 #include <linux/in.h>
 #include <linux/sunrpc/svc.h>
-
-/* XXX from linux/nfs_idmap.h */
-#define IDMAP_NAMESZ 128
+#include <linux/nfs_idmap.h>
 
 #ifdef CONFIG_NFSD_V4
 int nfsd_idmap_init(struct net *);
index ea6749a327602de97a5bbbea81058acad1522bbd..d8b16c2568f30f906dc49eb77597fabbd718ee4b 100644 (file)
@@ -110,6 +110,7 @@ struct nfsd_net {
        unsigned int max_connections;
 
        u32 clientid_counter;
+       u32 clverifier_counter;
 
        struct svc_serv *nfsd_serv;
 };
index d54701f6dc7873440b2c11d375b96323484ba259..1580ea6fd64df15da96b832eeef9f3dc2560e32b 100644 (file)
@@ -44,13 +44,13 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 
        inode = d_inode(fh->fh_dentry);
 
-       if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+       if (argp->mask & ~NFS_ACL_MASK)
                RETURN_STATUS(nfserr_inval);
        resp->mask = argp->mask;
 
        nfserr = fh_getattr(fh, &resp->stat);
        if (nfserr)
-               goto fail;
+               RETURN_STATUS(nfserr);
 
        if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
                acl = get_acl(inode, ACL_TYPE_ACCESS);
@@ -202,7 +202,7 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
        if (!p)
                return 0;
        argp->mask = ntohl(*p++);
-       if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+       if (argp->mask & ~NFS_ACL_MASK ||
            !xdr_argsize_check(rqstp, p))
                return 0;
 
@@ -293,9 +293,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
                                  resp->acl_default,
                                  resp->mask & NFS_DFACL,
                                  NFS_ACL_DEFAULT);
-       if (n <= 0)
-               return 0;
-       return 1;
+       return (n > 0);
 }
 
 static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
index 882b1a14bc3e85961fbd628009c7618b13263a88..01df4cd7c753fe12cee4c6d37a5ba9400b6f8f41 100644 (file)
@@ -41,7 +41,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 
        inode = d_inode(fh->fh_dentry);
 
-       if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
+       if (argp->mask & ~NFS_ACL_MASK)
                RETURN_STATUS(nfserr_inval);
        resp->mask = argp->mask;
 
@@ -148,7 +148,7 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
        if (!p)
                return 0;
        args->mask = ntohl(*p++);
-       if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
+       if (args->mask & ~NFS_ACL_MASK ||
            !xdr_argsize_check(rqstp, p))
                return 0;
 
index eb5accf1b37f5a1eb38506e551a1d1a83a8d9245..6adabd6049b7199ad72e01e361e3637a88b016ff 100644 (file)
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/nfs_fs.h>
+#include <linux/posix_acl.h>
+
 #include "nfsfh.h"
 #include "nfsd.h"
 #include "acl.h"
@@ -100,7 +102,7 @@ deny_mask_from_posix(unsigned short perm, u32 flags)
 /* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the
  * side of being more restrictive, so the mode bit mapping below is
  * pessimistic.  An optimistic version would be needed to handle DENY's,
- * but we espect to coalesce all ALLOWs and DENYs before mapping to mode
+ * but we expect to coalesce all ALLOWs and DENYs before mapping to mode
  * bits. */
 
 static void
@@ -458,7 +460,7 @@ init_state(struct posix_acl_state *state, int cnt)
        state->empty = 1;
        /*
         * In the worst case, each individual acl could be for a distinct
-        * named user or group, but we don't no which, so we allocate
+        * named user or group, but we don't know which, so we allocate
         * enough space for either:
         */
        alloc = sizeof(struct posix_ace_state_array)
index a49201835a970b77a6c767cab55b8f755d9ffa21..e7f50c4081d60c404a642fa18d8a17be97bfe83f 100644 (file)
@@ -435,12 +435,12 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
         */
        status = 0;
 out:
-       if (status)
-               nfsd4_mark_cb_fault(cb->cb_clp, status);
+       cb->cb_seq_status = status;
        return status;
 out_overflow:
        print_overflow_msg(__func__, xdr);
-       return -EIO;
+       status = -EIO;
+       goto out;
 }
 
 static int decode_cb_sequence4res(struct xdr_stream *xdr,
@@ -451,11 +451,10 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
        if (cb->cb_minorversion == 0)
                return 0;
 
-       status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status);
-       if (unlikely(status || cb->cb_status))
+       status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status);
+       if (unlikely(status || cb->cb_seq_status))
                return status;
 
-       cb->cb_update_seq_nr = true;
        return decode_cb_sequence4resok(xdr, cb);
 }
 
@@ -527,7 +526,7 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 
        if (cb != NULL) {
                status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_status))
+               if (unlikely(status || cb->cb_seq_status))
                        return status;
        }
 
@@ -617,7 +616,7 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
 
        if (cb) {
                status = decode_cb_sequence4res(xdr, cb);
-               if (unlikely(status || cb->cb_status))
+               if (unlikely(status || cb->cb_seq_status))
                        return status;
        }
        return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
@@ -876,7 +875,11 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
        u32 minorversion = clp->cl_minorversion;
 
        cb->cb_minorversion = minorversion;
-       cb->cb_update_seq_nr = false;
+       /*
+        * cb_seq_status is only set in decode_cb_sequence4res,
+        * and so will remain 1 if an rpc level failure occurs.
+        */
+       cb->cb_seq_status = 1;
        cb->cb_status = 0;
        if (minorversion) {
                if (!nfsd41_cb_get_slot(clp, task))
@@ -885,15 +888,30 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
        rpc_call_start(task);
 }
 
-static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
+static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
 {
-       struct nfsd4_callback *cb = calldata;
        struct nfs4_client *clp = cb->cb_clp;
+       struct nfsd4_session *session = clp->cl_cb_session;
+       bool ret = true;
 
-       dprintk("%s: minorversion=%d\n", __func__,
-               clp->cl_minorversion);
+       if (!clp->cl_minorversion) {
+               /*
+                * If the backchannel connection was shut down while this
+                * task was queued, we need to resubmit it after setting up
+                * a new backchannel connection.
+                *
+                * Note that if we lost our callback connection permanently
+                * the submission code will error out, so we don't need to
+                * handle that case here.
+                */
+               if (task->tk_flags & RPC_TASK_KILLED)
+                       goto need_restart;
+
+               return true;
+       }
 
-       if (clp->cl_minorversion) {
+       switch (cb->cb_seq_status) {
+       case 0:
                /*
                 * No need for lock, access serialized in nfsd4_cb_prepare
                 *
@@ -901,29 +919,63 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
                 * If CB_SEQUENCE returns an error, then the state of the slot
                 * (sequence ID, cached reply) MUST NOT change.
                 */
-               if (cb->cb_update_seq_nr)
-                       ++clp->cl_cb_session->se_cb_seq_nr;
-
-               clear_bit(0, &clp->cl_cb_slot_busy);
-               rpc_wake_up_next(&clp->cl_cb_waitq);
-               dprintk("%s: freed slot, new seqid=%d\n", __func__,
-                       clp->cl_cb_session->se_cb_seq_nr);
+               ++session->se_cb_seq_nr;
+               break;
+       case -ESERVERFAULT:
+               ++session->se_cb_seq_nr;
+       case 1:
+       case -NFS4ERR_BADSESSION:
+               nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
+               ret = false;
+               break;
+       case -NFS4ERR_DELAY:
+               if (!rpc_restart_call(task))
+                       goto out;
+
+               rpc_delay(task, 2 * HZ);
+               return false;
+       case -NFS4ERR_BADSLOT:
+               goto retry_nowait;
+       case -NFS4ERR_SEQ_MISORDERED:
+               if (session->se_cb_seq_nr != 1) {
+                       session->se_cb_seq_nr = 1;
+                       goto retry_nowait;
+               }
+               break;
+       default:
+               dprintk("%s: unprocessed error %d\n", __func__,
+                       cb->cb_seq_status);
        }
 
-       /*
-        * If the backchannel connection was shut down while this
-        * task was queued, we need to resubmit it after setting up
-        * a new backchannel connection.
-        *
-        * Note that if we lost our callback connection permanently
-        * the submission code will error out, so we don't need to
-        * handle that case here.
-        */
-       if (task->tk_flags & RPC_TASK_KILLED) {
-               task->tk_status = 0;
-               cb->cb_need_restart = true;
+       clear_bit(0, &clp->cl_cb_slot_busy);
+       rpc_wake_up_next(&clp->cl_cb_waitq);
+       dprintk("%s: freed slot, new seqid=%d\n", __func__,
+               clp->cl_cb_session->se_cb_seq_nr);
+
+       if (task->tk_flags & RPC_TASK_KILLED)
+               goto need_restart;
+out:
+       return ret;
+retry_nowait:
+       if (rpc_restart_call_prepare(task))
+               ret = false;
+       goto out;
+need_restart:
+       task->tk_status = 0;
+       cb->cb_need_restart = true;
+       return false;
+}
+
+static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
+{
+       struct nfsd4_callback *cb = calldata;
+       struct nfs4_client *clp = cb->cb_clp;
+
+       dprintk("%s: minorversion=%d\n", __func__,
+               clp->cl_minorversion);
+
+       if (!nfsd4_cb_sequence_done(task, cb))
                return;
-       }
 
        if (cb->cb_status) {
                WARN_ON_ONCE(task->tk_status);
@@ -1099,8 +1151,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
        cb->cb_msg.rpc_resp = cb;
        cb->cb_ops = ops;
        INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
+       cb->cb_seq_status = 1;
        cb->cb_status = 0;
-       cb->cb_update_seq_nr = false;
        cb->cb_need_restart = false;
 }
 
index e1b3d3d472da70d08f4392b01e206a0224d54767..5b20577dcdd233162d8030003758274d7619d038 100644 (file)
@@ -59,9 +59,6 @@ MODULE_PARM_DESC(nfs4_disable_idmapping,
  * that.
  */
 
-#define IDMAP_TYPE_USER  0
-#define IDMAP_TYPE_GROUP 1
-
 struct ent {
        struct cache_head h;
        int               type;                /* User / Group */
index 90cfda75313c447cb8668141a624023ef509590d..4ce6b97b31adec12b47deafecf28e2e672753b8f 100644 (file)
@@ -276,13 +276,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
                        nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
 
                /*
-                * Following rfc 3530 14.2.16, use the returned bitmask
-                * to indicate which attributes we used to store the
-                * verifier:
+                * Following rfc 3530 14.2.16, and rfc 5661 18.16.4
+                * use the returned bitmask to indicate which attributes
+                * we used to store the verifier:
                 */
-               if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)
-                       open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |
-                                                       FATTR4_WORD1_TIME_MODIFY);
+               if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
+                       open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
+                                               FATTR4_WORD1_TIME_MODIFY);
        } else
                /*
                 * Note this may exit with the parent still locked.
@@ -362,7 +362,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        __be32 status;
        struct svc_fh *resfh = NULL;
-       struct nfsd4_compoundres *resp;
        struct net *net = SVC_NET(rqstp);
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
@@ -389,8 +388,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                copy_clientid(&open->op_clientid, cstate->session);
 
        /* check seqid for replay. set nfs4_owner */
-       resp = rqstp->rq_resp;
-       status = nfsd4_process_open1(&resp->cstate, open, nn);
+       status = nfsd4_process_open1(cstate, open, nn);
        if (status == nfserr_replay_me) {
                struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay;
                fh_put(&cstate->current_fh);
@@ -417,10 +415,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        /* Openowner is now set, so sequence id will get bumped.  Now we need
         * these checks before we do any creates: */
        status = nfserr_grace;
-       if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+       if (opens_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
                goto out;
        status = nfserr_no_grace;
-       if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+       if (!opens_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
                goto out;
 
        switch (open->op_claim_type) {
@@ -829,7 +827,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        __be32 status;
 
-       if (locks_in_grace(SVC_NET(rqstp)))
+       if (opens_in_grace(SVC_NET(rqstp)))
                return nfserr_grace;
        status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
                             remove->rm_name, remove->rm_namelen);
@@ -848,7 +846,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
        if (!cstate->save_fh.fh_dentry)
                return status;
-       if (locks_in_grace(SVC_NET(rqstp)) &&
+       if (opens_in_grace(SVC_NET(rqstp)) &&
                !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
                return nfserr_grace;
        status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
@@ -1364,10 +1362,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
                goto out;
        }
 
-       nfserr = ops->proc_layoutcommit(inode, lcp);
-       if (nfserr)
-               goto out_put_stid;
-
        if (new_size > i_size_read(inode)) {
                lcp->lc_size_chg = 1;
                lcp->lc_newsize = new_size;
@@ -1375,7 +1369,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
                lcp->lc_size_chg = 0;
        }
 
-out_put_stid:
+       nfserr = ops->proc_layoutcommit(inode, lcp);
        nfs4_put_stid(&ls->ls_stid);
 out:
        return nfserr;
index d88ea7b9a85cbf4557d7689872150d08d33e4e64..e3d47091b191db40ccbbdf64c8123fed75b8bab3 100644 (file)
@@ -272,6 +272,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
                .ctx.actor = nfsd4_build_namelist,
                .names = LIST_HEAD_INIT(ctx.names)
        };
+       struct name_list *entry, *tmp;
        int status;
 
        status = nfs4_save_creds(&original_cred);
@@ -286,9 +287,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 
        status = iterate_dir(nn->rec_file, &ctx.ctx);
        mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
-       while (!list_empty(&ctx.names)) {
-               struct name_list *entry;
-               entry = list_entry(ctx.names.next, struct name_list, list);
+
+       list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
                if (!status) {
                        struct dentry *dentry;
                        dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
@@ -304,6 +304,12 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
        }
        mutex_unlock(&d_inode(dir)->i_mutex);
        nfs4_reset_creds(original_cred);
+
+       list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
+               dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name);
+               list_del(&entry->list);
+               kfree(entry);
+       }
        return status;
 }
 
@@ -541,8 +547,7 @@ nfsd4_legacy_tracking_init(struct net *net)
 
        /* XXX: The legacy code won't work in a container */
        if (net != &init_net) {
-               WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client "
-                       "tracking in a container!\n");
+               pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n");
                return -EINVAL;
        }
 
@@ -1254,8 +1259,7 @@ nfsd4_umh_cltrack_init(struct net *net)
 
        /* XXX: The usermode helper s not working in container yet. */
        if (net != &init_net) {
-               WARN(1, KERN_ERR "NFSD: attempt to initialize umh client "
-                       "tracking in a container!\n");
+               pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
                return -EINVAL;
        }
 
index 95202719a1fd26bd27ea71a2fe85ec1c248e8d13..0f1d5691b795751553d02003c32c4ceb90a1ee5c 100644 (file)
@@ -777,13 +777,16 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
        list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
 }
 
-static void
+static bool
 unhash_delegation_locked(struct nfs4_delegation *dp)
 {
        struct nfs4_file *fp = dp->dl_stid.sc_file;
 
        lockdep_assert_held(&state_lock);
 
+       if (list_empty(&dp->dl_perfile))
+               return false;
+
        dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
        /* Ensure that deleg break won't try to requeue it */
        ++dp->dl_time;
@@ -792,16 +795,21 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
        list_del_init(&dp->dl_recall_lru);
        list_del_init(&dp->dl_perfile);
        spin_unlock(&fp->fi_lock);
+       return true;
 }
 
 static void destroy_delegation(struct nfs4_delegation *dp)
 {
+       bool unhashed;
+
        spin_lock(&state_lock);
-       unhash_delegation_locked(dp);
+       unhashed = unhash_delegation_locked(dp);
        spin_unlock(&state_lock);
-       put_clnt_odstate(dp->dl_clnt_odstate);
-       nfs4_put_deleg_lease(dp->dl_stid.sc_file);
-       nfs4_put_stid(&dp->dl_stid);
+       if (unhashed) {
+               put_clnt_odstate(dp->dl_clnt_odstate);
+               nfs4_put_deleg_lease(dp->dl_stid.sc_file);
+               nfs4_put_stid(&dp->dl_stid);
+       }
 }
 
 static void revoke_delegation(struct nfs4_delegation *dp)
@@ -990,6 +998,12 @@ release_all_access(struct nfs4_ol_stateid *stp)
        }
 }
 
+static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop)
+{
+       kfree(sop->so_owner.data);
+       sop->so_ops->so_free(sop);
+}
+
 static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
 {
        struct nfs4_client *clp = sop->so_client;
@@ -1000,20 +1014,23 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
                return;
        sop->so_ops->so_unhash(sop);
        spin_unlock(&clp->cl_lock);
-       kfree(sop->so_owner.data);
-       sop->so_ops->so_free(sop);
+       nfs4_free_stateowner(sop);
 }
 
-static void unhash_ol_stateid(struct nfs4_ol_stateid *stp)
+static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp)
 {
        struct nfs4_file *fp = stp->st_stid.sc_file;
 
        lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
 
+       if (list_empty(&stp->st_perfile))
+               return false;
+
        spin_lock(&fp->fi_lock);
-       list_del(&stp->st_perfile);
+       list_del_init(&stp->st_perfile);
        spin_unlock(&fp->fi_lock);
        list_del(&stp->st_perstateowner);
+       return true;
 }
 
 static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
@@ -1063,25 +1080,27 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
        list_add(&stp->st_locks, reaplist);
 }
 
-static void unhash_lock_stateid(struct nfs4_ol_stateid *stp)
+static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
 {
        struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
 
        lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
 
        list_del_init(&stp->st_locks);
-       unhash_ol_stateid(stp);
        nfs4_unhash_stid(&stp->st_stid);
+       return unhash_ol_stateid(stp);
 }
 
 static void release_lock_stateid(struct nfs4_ol_stateid *stp)
 {
        struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
+       bool unhashed;
 
        spin_lock(&oo->oo_owner.so_client->cl_lock);
-       unhash_lock_stateid(stp);
+       unhashed = unhash_lock_stateid(stp);
        spin_unlock(&oo->oo_owner.so_client->cl_lock);
-       nfs4_put_stid(&stp->st_stid);
+       if (unhashed)
+               nfs4_put_stid(&stp->st_stid);
 }
 
 static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
@@ -1129,7 +1148,7 @@ static void release_lockowner(struct nfs4_lockowner *lo)
        while (!list_empty(&lo->lo_owner.so_stateids)) {
                stp = list_first_entry(&lo->lo_owner.so_stateids,
                                struct nfs4_ol_stateid, st_perstateowner);
-               unhash_lock_stateid(stp);
+               WARN_ON(!unhash_lock_stateid(stp));
                put_ol_stateid_locked(stp, &reaplist);
        }
        spin_unlock(&clp->cl_lock);
@@ -1142,21 +1161,26 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
 {
        struct nfs4_ol_stateid *stp;
 
+       lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock);
+
        while (!list_empty(&open_stp->st_locks)) {
                stp = list_entry(open_stp->st_locks.next,
                                struct nfs4_ol_stateid, st_locks);
-               unhash_lock_stateid(stp);
+               WARN_ON(!unhash_lock_stateid(stp));
                put_ol_stateid_locked(stp, reaplist);
        }
 }
 
-static void unhash_open_stateid(struct nfs4_ol_stateid *stp,
+static bool unhash_open_stateid(struct nfs4_ol_stateid *stp,
                                struct list_head *reaplist)
 {
+       bool unhashed;
+
        lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
 
-       unhash_ol_stateid(stp);
+       unhashed = unhash_ol_stateid(stp);
        release_open_stateid_locks(stp, reaplist);
+       return unhashed;
 }
 
 static void release_open_stateid(struct nfs4_ol_stateid *stp)
@@ -1164,8 +1188,8 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
        LIST_HEAD(reaplist);
 
        spin_lock(&stp->st_stid.sc_client->cl_lock);
-       unhash_open_stateid(stp, &reaplist);
-       put_ol_stateid_locked(stp, &reaplist);
+       if (unhash_open_stateid(stp, &reaplist))
+               put_ol_stateid_locked(stp, &reaplist);
        spin_unlock(&stp->st_stid.sc_client->cl_lock);
        free_ol_stateid_reaplist(&reaplist);
 }
@@ -1210,8 +1234,8 @@ static void release_openowner(struct nfs4_openowner *oo)
        while (!list_empty(&oo->oo_owner.so_stateids)) {
                stp = list_first_entry(&oo->oo_owner.so_stateids,
                                struct nfs4_ol_stateid, st_perstateowner);
-               unhash_open_stateid(stp, &reaplist);
-               put_ol_stateid_locked(stp, &reaplist);
+               if (unhash_open_stateid(stp, &reaplist))
+                       put_ol_stateid_locked(stp, &reaplist);
        }
        spin_unlock(&clp->cl_lock);
        free_ol_stateid_reaplist(&reaplist);
@@ -1714,7 +1738,7 @@ __destroy_client(struct nfs4_client *clp)
        spin_lock(&state_lock);
        while (!list_empty(&clp->cl_delegations)) {
                dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
-               unhash_delegation_locked(dp);
+               WARN_ON(!unhash_delegation_locked(dp));
                list_add(&dp->dl_recall_lru, &reaplist);
        }
        spin_unlock(&state_lock);
@@ -1894,7 +1918,7 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
         * __force to keep sparse happy
         */
        verf[0] = (__force __be32)get_seconds();
-       verf[1] = (__force __be32)nn->clientid_counter;
+       verf[1] = (__force __be32)nn->clverifier_counter++;
        memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
 }
 
@@ -2241,6 +2265,9 @@ static bool client_has_state(struct nfs4_client *clp)
         * Also note we should probably be using this in 4.0 case too.
         */
        return !list_empty(&clp->cl_openowners)
+#ifdef CONFIG_NFSD_PNFS
+               || !list_empty(&clp->cl_lo_states)
+#endif
                || !list_empty(&clp->cl_delegations)
                || !list_empty(&clp->cl_sessions);
 }
@@ -2547,11 +2574,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
                        goto out_free_conn;
                cs_slot = &conf->cl_cs_slot;
                status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
-               if (status == nfserr_replay_cache) {
-                       status = nfsd4_replay_create_session(cr_ses, cs_slot);
-                       goto out_free_conn;
-               } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
-                       status = nfserr_seq_misordered;
+               if (status) {
+                       if (status == nfserr_replay_cache)
+                               status = nfsd4_replay_create_session(cr_ses, cs_slot);
                        goto out_free_conn;
                }
        } else if (unconf) {
@@ -3041,10 +3066,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        unconf = find_unconfirmed_client_by_name(&clname, nn);
        if (unconf)
                unhash_client_locked(unconf);
-       if (conf && same_verf(&conf->cl_verifier, &clverifier))
+       if (conf && same_verf(&conf->cl_verifier, &clverifier)) {
                /* case 1: probable callback update */
                copy_clid(new, conf);
-       else /* case 4 (new client) or cases 2, 3 (client reboot): */
+               gen_confirm(new, nn);
+       } else /* case 4 (new client) or cases 2, 3 (client reboot): */
                gen_clid(new, nn);
        new->cl_minorversion = 0;
        gen_callback(new, setclid, rqstp);
@@ -3085,10 +3111,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
        /*
         * We try hard to give out unique clientid's, so if we get an
         * attempt to confirm the same clientid with a different cred,
-        * there's a bug somewhere.  Let's charitably assume it's our
-        * bug.
+        * the client may be buggy; this should never happen.
+        *
+        * Nevertheless, RFC 7530 recommends INUSE for this case:
         */
-       status = nfserr_serverfault;
+       status = nfserr_clid_inuse;
        if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
                goto out;
        if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
@@ -3315,7 +3342,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
                hash_openowner(oo, clp, strhashval);
                ret = oo;
        } else
-               nfs4_free_openowner(&oo->oo_owner);
+               nfs4_free_stateowner(&oo->oo_owner);
+
        spin_unlock(&clp->cl_lock);
        return ret;
 }
@@ -3482,6 +3510,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
 {
        struct nfs4_delegation *dp = cb_to_delegation(cb);
 
+       if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID)
+               return 1;
+
        switch (task->tk_status) {
        case 0:
                return 1;
@@ -3885,12 +3916,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
        return status;
 }
 
-static void
-nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
-{
-       open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
-}
-
 /* Should we give out recallable state?: */
 static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
 {
@@ -3923,7 +3948,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
 static int nfs4_setlease(struct nfs4_delegation *dp)
 {
        struct nfs4_file *fp = dp->dl_stid.sc_file;
-       struct file_lock *fl, *ret;
+       struct file_lock *fl;
        struct file *filp;
        int status = 0;
 
@@ -3934,10 +3959,10 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
        if (!filp) {
                /* We should always have a readable file here */
                WARN_ON_ONCE(1);
+               locks_free_lock(fl);
                return -EBADF;
        }
        fl->fl_file = filp;
-       ret = fl;
        status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
        if (fl)
                locks_free_lock(fl);
@@ -4063,7 +4088,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
                case NFS4_OPEN_CLAIM_FH:
                        /*
                         * Let's not give out any delegations till everyone's
-                        * had the chance to reclaim theirs....
+                        * had the chance to reclaim theirs, *and* until
+                        * NLM locks have all been reclaimed:
                         */
                        if (locks_in_grace(clp->net))
                                goto out_no_deleg;
@@ -4209,7 +4235,7 @@ out:
        if (fp)
                put_nfs4_file(fp);
        if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
-               nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate));
+               open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
        /*
        * To finish the open response, we just need to set the rflags.
        */
@@ -4338,14 +4364,12 @@ nfs4_laundromat(struct nfsd_net *nn)
        spin_lock(&state_lock);
        list_for_each_safe(pos, next, &nn->del_recall_lru) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
-                       continue;
                if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
                        t = dp->dl_time - cutoff;
                        new_timeo = min(new_timeo, t);
                        break;
                }
-               unhash_delegation_locked(dp);
+               WARN_ON(!unhash_delegation_locked(dp));
                list_add(&dp->dl_recall_lru, &reaplist);
        }
        spin_unlock(&state_lock);
@@ -4440,7 +4464,7 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid,
 {
        if (ONE_STATEID(stateid) && (flags & RD_STATE))
                return nfs_ok;
-       else if (locks_in_grace(net)) {
+       else if (opens_in_grace(net)) {
                /* Answer in remaining cases depends on existence of
                 * conflicting state; so we must wait out the grace period. */
                return nfserr_grace;
@@ -4459,7 +4483,7 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid,
 static inline int
 grace_disallows_io(struct net *net, struct inode *inode)
 {
-       return locks_in_grace(net) && mandatory_lock(inode);
+       return opens_in_grace(net) && mandatory_lock(inode);
 }
 
 /* Returns true iff a is later than b: */
@@ -4751,7 +4775,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                if (check_for_locks(stp->st_stid.sc_file,
                                    lockowner(stp->st_stateowner)))
                        break;
-               unhash_lock_stateid(stp);
+               WARN_ON(!unhash_lock_stateid(stp));
                spin_unlock(&cl->cl_lock);
                nfs4_put_stid(s);
                ret = nfs_ok;
@@ -4967,20 +4991,23 @@ out:
 static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
 {
        struct nfs4_client *clp = s->st_stid.sc_client;
+       bool unhashed;
        LIST_HEAD(reaplist);
 
        s->st_stid.sc_type = NFS4_CLOSED_STID;
        spin_lock(&clp->cl_lock);
-       unhash_open_stateid(s, &reaplist);
+       unhashed = unhash_open_stateid(s, &reaplist);
 
        if (clp->cl_minorversion) {
-               put_ol_stateid_locked(s, &reaplist);
+               if (unhashed)
+                       put_ol_stateid_locked(s, &reaplist);
                spin_unlock(&clp->cl_lock);
                free_ol_stateid_reaplist(&reaplist);
        } else {
                spin_unlock(&clp->cl_lock);
                free_ol_stateid_reaplist(&reaplist);
-               move_to_close_lru(s, clp->net);
+               if (unhashed)
+                       move_to_close_lru(s, clp->net);
        }
 }
 
@@ -5045,9 +5072,6 @@ out:
        return status;
 }
 
-
-#define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
-
 static inline u64
 end_offset(u64 start, u64 len)
 {
@@ -5139,8 +5163,7 @@ nevermind:
 }
 
 static struct nfs4_lockowner *
-find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
-               struct nfs4_client *clp)
+find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
 {
        unsigned int strhashval = ownerstr_hashval(owner);
        struct nfs4_stateowner *so;
@@ -5158,13 +5181,12 @@ find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
 }
 
 static struct nfs4_lockowner *
-find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner,
-               struct nfs4_client *clp)
+find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner)
 {
        struct nfs4_lockowner *lo;
 
        spin_lock(&clp->cl_lock);
-       lo = find_lockowner_str_locked(clid, owner, clp);
+       lo = find_lockowner_str_locked(clp, owner);
        spin_unlock(&clp->cl_lock);
        return lo;
 }
@@ -5208,14 +5230,14 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
        lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
        lo->lo_owner.so_ops = &lockowner_ops;
        spin_lock(&clp->cl_lock);
-       ret = find_lockowner_str_locked(&clp->cl_clientid,
-                       &lock->lk_new_owner, clp);
+       ret = find_lockowner_str_locked(clp, &lock->lk_new_owner);
        if (ret == NULL) {
                list_add(&lo->lo_owner.so_strhash,
                         &clp->cl_ownerstr_hashtbl[strhashval]);
                ret = lo;
        } else
-               nfs4_free_lockowner(&lo->lo_owner);
+               nfs4_free_stateowner(&lo->lo_owner);
+
        spin_unlock(&clp->cl_lock);
        return ret;
 }
@@ -5298,8 +5320,8 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
 static int
 check_lock_length(u64 offset, u64 length)
 {
-       return ((length == 0)  || ((length != NFS4_MAX_UINT64) &&
-            LOFF_OVERFLOW(offset, length)));
+       return ((length == 0) || ((length != NFS4_MAX_UINT64) &&
+               (length > ~offset)));
 }
 
 static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
@@ -5328,9 +5350,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
        struct nfs4_lockowner *lo;
        unsigned int strhashval;
 
-       lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl);
+       lo = find_lockowner_str(cl, &lock->lk_new_owner);
        if (!lo) {
-               strhashval = ownerstr_hashval(&lock->v.new.owner);
+               strhashval = ownerstr_hashval(&lock->lk_new_owner);
                lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
                if (lo == NULL)
                        return nfserr_jukebox;
@@ -5391,7 +5413,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (lock->lk_is_new) {
                if (nfsd4_has_session(cstate))
                        /* See rfc 5661 18.10.3: given clientid is ignored: */
-                       memcpy(&lock->v.new.clientid,
+                       memcpy(&lock->lk_new_clientid,
                                &cstate->session->se_client->cl_clientid,
                                sizeof(clientid_t));
 
@@ -5409,7 +5431,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                open_sop = openowner(open_stp->st_stateowner);
                status = nfserr_bad_stateid;
                if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
-                                               &lock->v.new.clientid))
+                                               &lock->lk_new_clientid))
                        goto out;
                status = lookup_or_create_lock_state(cstate, open_stp, lock,
                                                        &lock_stp, &new);
@@ -5603,8 +5625,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                goto out;
        }
 
-       lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner,
-                               cstate->clp);
+       lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
        if (lo)
                file_lock->fl_owner = (fl_owner_t)lo;
        file_lock->fl_pid = current->tgid;
@@ -6019,7 +6040,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
 
 static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
                                    struct list_head *collect,
-                                   void (*func)(struct nfs4_ol_stateid *))
+                                   bool (*func)(struct nfs4_ol_stateid *))
 {
        struct nfs4_openowner *oop;
        struct nfs4_ol_stateid *stp, *st_next;
@@ -6033,9 +6054,9 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
                        list_for_each_entry_safe(lst, lst_next,
                                        &stp->st_locks, st_locks) {
                                if (func) {
-                                       func(lst);
-                                       nfsd_inject_add_lock_to_list(lst,
-                                                               collect);
+                                       if (func(lst))
+                                               nfsd_inject_add_lock_to_list(lst,
+                                                                       collect);
                                }
                                ++count;
                                /*
@@ -6305,7 +6326,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
                                continue;
 
                        atomic_inc(&clp->cl_refcount);
-                       unhash_delegation_locked(dp);
+                       WARN_ON(!unhash_delegation_locked(dp));
                        list_add(&dp->dl_recall_lru, victims);
                }
                ++count;
@@ -6584,6 +6605,7 @@ nfs4_state_start_net(struct net *net)
                return ret;
        nn->boot_time = get_seconds();
        nn->grace_ended = false;
+       nn->nfsd4_manager.block_opens = true;
        locks_start_grace(net, &nn->nfsd4_manager);
        nfsd4_client_tracking_init(net);
        printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
@@ -6602,7 +6624,7 @@ nfs4_state_start(void)
        ret = set_callback_cred();
        if (ret)
                return -ENOMEM;
-       laundry_wq = create_singlethread_workqueue("nfsd4");
+       laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
        if (laundry_wq == NULL) {
                ret = -ENOMEM;
                goto out_recovery;
@@ -6635,7 +6657,7 @@ nfs4_state_shutdown_net(struct net *net)
        spin_lock(&state_lock);
        list_for_each_safe(pos, next, &nn->del_recall_lru) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               unhash_delegation_locked(dp);
+               WARN_ON(!unhash_delegation_locked(dp));
                list_add(&dp->dl_recall_lru, &reaplist);
        }
        spin_unlock(&state_lock);
index 75e0563c09d1911d927501ee52b53a3bd988940e..51c9e9ca39a4d7c5e0537a2cafed4ec9caeb789d 100644 (file)
@@ -2140,6 +2140,27 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
                return nfsd4_encode_user(xdr, rqstp, ace->who_uid);
 }
 
+static inline __be32
+nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type)
+{
+       __be32 *p;
+
+       if (layout_type) {
+               p = xdr_reserve_space(xdr, 8);
+               if (!p)
+                       return nfserr_resource;
+               *p++ = cpu_to_be32(1);
+               *p++ = cpu_to_be32(layout_type);
+       } else {
+               p = xdr_reserve_space(xdr, 4);
+               if (!p)
+                       return nfserr_resource;
+               *p++ = cpu_to_be32(0);
+       }
+
+       return 0;
+}
+
 #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
                              FATTR4_WORD0_RDATTR_ERROR)
 #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
@@ -2205,6 +2226,39 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
        return err;
 }
 
+static __be32
+nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
+{
+       __be32 *p;
+
+       if (bmval2) {
+               p = xdr_reserve_space(xdr, 16);
+               if (!p)
+                       goto out_resource;
+               *p++ = cpu_to_be32(3);
+               *p++ = cpu_to_be32(bmval0);
+               *p++ = cpu_to_be32(bmval1);
+               *p++ = cpu_to_be32(bmval2);
+       } else if (bmval1) {
+               p = xdr_reserve_space(xdr, 12);
+               if (!p)
+                       goto out_resource;
+               *p++ = cpu_to_be32(2);
+               *p++ = cpu_to_be32(bmval0);
+               *p++ = cpu_to_be32(bmval1);
+       } else {
+               p = xdr_reserve_space(xdr, 8);
+               if (!p)
+                       goto out_resource;
+               *p++ = cpu_to_be32(1);
+               *p++ = cpu_to_be32(bmval0);
+       }
+
+       return 0;
+out_resource:
+       return nfserr_resource;
+}
+
 /*
  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
  * ourselves.
@@ -2301,28 +2355,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
        }
 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
 
-       if (bmval2) {
-               p = xdr_reserve_space(xdr, 16);
-               if (!p)
-                       goto out_resource;
-               *p++ = cpu_to_be32(3);
-               *p++ = cpu_to_be32(bmval0);
-               *p++ = cpu_to_be32(bmval1);
-               *p++ = cpu_to_be32(bmval2);
-       } else if (bmval1) {
-               p = xdr_reserve_space(xdr, 12);
-               if (!p)
-                       goto out_resource;
-               *p++ = cpu_to_be32(2);
-               *p++ = cpu_to_be32(bmval0);
-               *p++ = cpu_to_be32(bmval1);
-       } else {
-               p = xdr_reserve_space(xdr, 8);
-               if (!p)
-                       goto out_resource;
-               *p++ = cpu_to_be32(1);
-               *p++ = cpu_to_be32(bmval0);
-       }
+       status = nfsd4_encode_bitmap(xdr, bmval0, bmval1, bmval2);
+       if (status)
+               goto out;
 
        attrlen_offset = xdr->buf->len;
        p = xdr_reserve_space(xdr, 4);
@@ -2675,6 +2710,9 @@ out_acl:
                *p++ = cpu_to_be32(stat.mtime.tv_nsec);
        }
        if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+               struct kstat parent_stat;
+               u64 ino = stat.ino;
+
                p = xdr_reserve_space(xdr, 8);
                if (!p)
                        goto out_resource;
@@ -2683,25 +2721,25 @@ out_acl:
                 * and this is the root of a cross-mounted filesystem.
                 */
                if (ignore_crossmnt == 0 &&
-                   dentry == exp->ex_path.mnt->mnt_root)
-                       get_parent_attributes(exp, &stat);
-               p = xdr_encode_hyper(p, stat.ino);
+                   dentry == exp->ex_path.mnt->mnt_root) {
+                       err = get_parent_attributes(exp, &parent_stat);
+                       if (err)
+                               goto out_nfserr;
+                       ino = parent_stat.ino;
+               }
+               p = xdr_encode_hyper(p, ino);
        }
 #ifdef CONFIG_NFSD_PNFS
-       if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) ||
-           (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) {
-               if (exp->ex_layout_type) {
-                       p = xdr_reserve_space(xdr, 8);
-                       if (!p)
-                               goto out_resource;
-                       *p++ = cpu_to_be32(1);
-                       *p++ = cpu_to_be32(exp->ex_layout_type);
-               } else {
-                       p = xdr_reserve_space(xdr, 4);
-                       if (!p)
-                               goto out_resource;
-                       *p++ = cpu_to_be32(0);
-               }
+       if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
+               status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+               if (status)
+                       goto out;
+       }
+
+       if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) {
+               status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+               if (status)
+                       goto out;
        }
 
        if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
@@ -2711,21 +2749,20 @@ out_acl:
                *p++ = cpu_to_be32(stat.blksize);
        }
 #endif /* CONFIG_NFSD_PNFS */
+       if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+               status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0,
+                                                 NFSD_SUPPATTR_EXCLCREAT_WORD1,
+                                                 NFSD_SUPPATTR_EXCLCREAT_WORD2);
+               if (status)
+                       goto out;
+       }
+
        if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
                status = nfsd4_encode_security_label(xdr, rqstp, context,
                                                                contextlen);
                if (status)
                        goto out;
        }
-       if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
-               p = xdr_reserve_space(xdr, 16);
-               if (!p)
-                       goto out_resource;
-               *p++ = cpu_to_be32(3);
-               *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
-               *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
-               *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
-       }
 
        attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
        write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
@@ -3044,13 +3081,12 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
        __be32 *p;
 
        if (!nfserr) {
-               p = xdr_reserve_space(xdr, 32);
+               p = xdr_reserve_space(xdr, 20);
                if (!p)
                        return nfserr_resource;
-               p = encode_cinfo(p, &create->cr_cinfo);
-               *p++ = cpu_to_be32(2);
-               *p++ = cpu_to_be32(create->cr_bmval[0]);
-               *p++ = cpu_to_be32(create->cr_bmval[1]);
+               encode_cinfo(p, &create->cr_cinfo);
+               nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
+                               create->cr_bmval[1], create->cr_bmval[2]);
        }
        return nfserr;
 }
@@ -3190,16 +3226,22 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
        nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
        if (nfserr)
                goto out;
-       p = xdr_reserve_space(xdr, 40);
+       p = xdr_reserve_space(xdr, 24);
        if (!p)
                return nfserr_resource;
        p = encode_cinfo(p, &open->op_cinfo);
        *p++ = cpu_to_be32(open->op_rflags);
-       *p++ = cpu_to_be32(2);
-       *p++ = cpu_to_be32(open->op_bmval[0]);
-       *p++ = cpu_to_be32(open->op_bmval[1]);
-       *p++ = cpu_to_be32(open->op_delegate_type);
 
+       nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
+                                       open->op_bmval[2]);
+       if (nfserr)
+               goto out;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               return nfserr_resource;
+
+       *p++ = cpu_to_be32(open->op_delegate_type);
        switch (open->op_delegate_type) {
        case NFS4_OPEN_DELEGATE_NONE:
                break;
index 9277cc91c21b051011c4dbfe4d677b32d37ad37f..ad4e2377dd636cf893e7a02e61565730207045c5 100644 (file)
@@ -391,6 +391,14 @@ static int nfsd_get_default_max_blksize(void)
        return ret;
 }
 
+static struct svc_serv_ops nfsd_thread_sv_ops = {
+       .svo_shutdown           = nfsd_last_thread,
+       .svo_function           = nfsd,
+       .svo_enqueue_xprt       = svc_xprt_do_enqueue,
+       .svo_setup              = svc_set_num_threads,
+       .svo_module             = THIS_MODULE,
+};
+
 int nfsd_create_serv(struct net *net)
 {
        int error;
@@ -405,7 +413,7 @@ int nfsd_create_serv(struct net *net)
                nfsd_max_blksize = nfsd_get_default_max_blksize();
        nfsd_reset_versions();
        nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
-                                     nfsd_last_thread, nfsd, THIS_MODULE);
+                                               &nfsd_thread_sv_ops);
        if (nn->nfsd_serv == NULL)
                return -ENOMEM;
 
@@ -500,8 +508,8 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
        /* apply the new numbers */
        svc_get(nn->nfsd_serv);
        for (i = 0; i < n; i++) {
-               err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i],
-                                         nthreads[i]);
+               err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
+                               &nn->nfsd_serv->sv_pools[i], nthreads[i]);
                if (err)
                        break;
        }
@@ -540,7 +548,8 @@ nfsd_svc(int nrservs, struct net *net)
        error = nfsd_startup_net(nrservs, net);
        if (error)
                goto out_destroy;
-       error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
+       error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
+                       NULL, nrservs);
        if (error)
                goto out_shutdown;
        /* We are holding a reference to nn->nfsd_serv which
index 4874ce515fc10024a5bda53e16511b43a8a5f603..583ffc13cae27d165be052609f37dc7428b54dcc 100644 (file)
@@ -67,8 +67,8 @@ struct nfsd4_callback {
        struct rpc_message cb_msg;
        struct nfsd4_callback_ops *cb_ops;
        struct work_struct cb_work;
+       int cb_seq_status;
        int cb_status;
-       bool cb_update_seq_nr;
        bool cb_need_restart;
 };
 
index b5e077a6e7d4d33d3da25bde15842d68199f6ae8..45c04979e7b3c1fed3d14c387c50de9ce95a729a 100644 (file)
@@ -1249,12 +1249,6 @@ out_nfserr:
 
 #ifdef CONFIG_NFSD_V3
 
-static inline int nfsd_create_is_exclusive(int createmode)
-{
-       return createmode == NFS3_CREATE_EXCLUSIVE
-              || createmode == NFS4_CREATE_EXCLUSIVE4_1;
-}
-
 /*
  * NFSv3 and NFSv4 version of nfsd_create
  */
index 5be875e3e638c733d67bc3efff584e2022c4b18f..fee2451ae24824830481e1d4005922e63f39dabe 100644 (file)
@@ -131,4 +131,10 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
        return nfserrno(vfs_getattr(&p, stat));
 }
 
+static inline int nfsd_create_is_exclusive(int createmode)
+{
+       return createmode == NFS3_CREATE_EXCLUSIVE
+              || createmode == NFS4_CREATE_EXCLUSIVE4_1;
+}
+
 #endif /* LINUX_NFSD_VFS_H */
index 864203c10dbcb3972a88cec15a1db52be674b439..dc634a55163be60a95379970403c998d2b663e7c 100644 (file)
@@ -943,12 +943,18 @@ struct lock_manager_operations {
 
 struct lock_manager {
        struct list_head list;
+       /*
+        * NFSv4 and up also want opens blocked during the grace period;
+        * NLM doesn't care:
+        */
+       bool block_opens;
 };
 
 struct net;
 void locks_start_grace(struct net *, struct lock_manager *);
 void locks_end_grace(struct lock_manager *);
 int locks_in_grace(struct net *);
+int opens_in_grace(struct net *);
 
 /* that will die - we need it for nfs_lock_info */
 #include <linux/nfs_fs_i.h>
index 437ddb6c4aefbcf4d1b5abbacd6cfcd1ff5a9b3b..03d3b4c92d9f1171134e4dcea78080d4f42ccafe 100644 (file)
@@ -46,7 +46,7 @@
  * 
  */
 struct cache_head {
-       struct cache_head * next;
+       struct hlist_node       cache_list;
        time_t          expiry_time;    /* After time time, don't use the data */
        time_t          last_refresh;   /* If CACHE_PENDING, this is when upcall 
                                         * was sent, else this is when update was received
@@ -73,7 +73,7 @@ struct cache_detail_pipefs {
 struct cache_detail {
        struct module *         owner;
        int                     hash_size;
-       struct cache_head **    hash_table;
+       struct hlist_head *     hash_table;
        rwlock_t                hash_lock;
 
        atomic_t                inuse; /* active user-space update or lookup */
@@ -224,6 +224,11 @@ extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
                                        umode_t, struct cache_detail *);
 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
 
+/* Must store cache_detail in seq_file->private if using next three functions */
+extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
+extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos);
+extern void cache_seq_stop(struct seq_file *file, void *p);
+
 extern void qword_add(char **bpp, int *lp, char *str);
 extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);
 extern int qword_get(char **bpp, char *dest, int bufsize);
index fae6fb947fc8671e238d4db1d9acd6ff9b59098a..cc0fc712bb8238e27446dc92899906e55cff4de7 100644 (file)
 #include <linux/wait.h>
 #include <linux/mm.h>
 
-/*
- * This is the RPC server thread function prototype
- */
-typedef int            (*svc_thread_fn)(void *);
-
 /* statistics for svc_pool structures */
 struct svc_pool_stats {
        atomic_long_t   packets;
@@ -54,6 +49,25 @@ struct svc_pool {
        unsigned long           sp_flags;
 } ____cacheline_aligned_in_smp;
 
+struct svc_serv;
+
+struct svc_serv_ops {
+       /* Callback to use when last thread exits. */
+       void            (*svo_shutdown)(struct svc_serv *, struct net *);
+
+       /* function for service threads to run */
+       int             (*svo_function)(void *);
+
+       /* queue up a transport for servicing */
+       void            (*svo_enqueue_xprt)(struct svc_xprt *);
+
+       /* set up thread (or whatever) execution context */
+       int             (*svo_setup)(struct svc_serv *, struct svc_pool *, int);
+
+       /* optional module to count when adding threads (pooled svcs only) */
+       struct module   *svo_module;
+};
+
 /*
  * RPC service.
  *
@@ -85,16 +99,7 @@ struct svc_serv {
 
        unsigned int            sv_nrpools;     /* number of thread pools */
        struct svc_pool *       sv_pools;       /* array of thread pools */
-
-       void                    (*sv_shutdown)(struct svc_serv *serv,
-                                              struct net *net);
-                                               /* Callback to use when last thread
-                                                * exits.
-                                                */
-
-       struct module *         sv_module;      /* optional module to count when
-                                                * adding threads */
-       svc_thread_fn           sv_function;    /* main function for threads */
+       struct svc_serv_ops     *sv_ops;        /* server operations */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct list_head        sv_cb_list;     /* queue for callback requests
                                                 * that arrive over the same
@@ -422,6 +427,29 @@ struct svc_procedure {
        unsigned int            pc_xdrressize;  /* maximum size of XDR reply */
 };
 
+/*
+ * Mode for mapping cpus to pools.
+ */
+enum {
+       SVC_POOL_AUTO = -1,     /* choose one of the others */
+       SVC_POOL_GLOBAL,        /* no mapping, just a single global pool
+                                * (legacy & UP mode) */
+       SVC_POOL_PERCPU,        /* one pool per cpu */
+       SVC_POOL_PERNODE        /* one pool per numa node */
+};
+
+struct svc_pool_map {
+       int count;                      /* How many svc_servs use us */
+       int mode;                       /* Note: int not enum to avoid
+                                        * warnings about "enumeration value
+                                        * not handled in switch" */
+       unsigned int npools;
+       unsigned int *pool_to;          /* maps pool id to cpu or node */
+       unsigned int *to_pool;          /* maps cpu or node to pool id */
+};
+
+extern struct svc_pool_map svc_pool_map;
+
 /*
  * Function prototypes.
  */
@@ -429,13 +457,17 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
 int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
-                           void (*shutdown)(struct svc_serv *, struct net *net));
+                           struct svc_serv_ops *);
+struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
+                                       struct svc_pool *pool, int node);
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
                                        struct svc_pool *pool, int node);
+void              svc_rqst_free(struct svc_rqst *);
 void              svc_exit_thread(struct svc_rqst *);
+unsigned int      svc_pool_map_get(void);
+void              svc_pool_map_put(void);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-                       void (*shutdown)(struct svc_serv *, struct net *net),
-                       svc_thread_fn, struct module *);
+                       struct svc_serv_ops *);
 int               svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 int               svc_pool_stats_open(struct svc_serv *serv, struct file *file);
 void              svc_destroy(struct svc_serv *);
index cb94ee4181d4937bd99e125891fe633fa6763719..d5ee6d8b7c5809f7d71e3966a2961d8d1b7ac021 100644 (file)
@@ -172,13 +172,6 @@ struct svcxprt_rdma {
 #define RDMAXPRT_SQ_PENDING    2
 #define RDMAXPRT_CONN_PENDING  3
 
-#define RPCRDMA_MAX_SVC_SEGS   (64)    /* server max scatter/gather */
-#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
-#define RPCRDMA_MAXPAYLOAD     RPCSVC_MAXPAYLOAD
-#else
-#define RPCRDMA_MAXPAYLOAD     (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
-#endif
-
 #define RPCRDMA_LISTEN_BACKLOG  10
 /* The default ORD value is based on two outstanding full-size writes with a
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
@@ -187,6 +180,8 @@ struct svcxprt_rdma {
 #define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
+#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+
 /* svc_rdma_marshal.c */
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -213,6 +208,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
 
 /* svc_rdma_sendto.c */
 extern int svc_rdma_sendto(struct svc_rqst *);
+extern struct rpcrdma_read_chunk *
+       svc_rdma_get_read_chunk(struct rpcrdma_msg *);
 
 /* svc_rdma_transport.c */
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
@@ -225,7 +222,6 @@ extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
 extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
 extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
-extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *);
 extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
 extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
                              struct svc_rdma_fastreg_mr *);
@@ -238,83 +234,4 @@ extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
-/*
- * Returns the address of the first read chunk or <nul> if no read chunk is
- * present
- */
-static inline struct rpcrdma_read_chunk *
-svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
-{
-       struct rpcrdma_read_chunk *ch =
-               (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-
-       if (ch->rc_discrim == 0)
-               return NULL;
-
-       return ch;
-}
-
-/*
- * Returns the address of the first read write array element or <nul> if no
- * write array list is present
- */
-static inline struct rpcrdma_write_array *
-svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
-{
-       if (rmsgp->rm_body.rm_chunks[0] != 0
-           || rmsgp->rm_body.rm_chunks[1] == 0)
-               return NULL;
-
-       return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
-}
-
-/*
- * Returns the address of the first reply array element or <nul> if no
- * reply array is present
- */
-static inline struct rpcrdma_write_array *
-svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
-{
-       struct rpcrdma_read_chunk *rch;
-       struct rpcrdma_write_array *wr_ary;
-       struct rpcrdma_write_array *rp_ary;
-
-       /* XXX: Need to fix when reply list may occur with read-list and/or
-        * write list */
-       if (rmsgp->rm_body.rm_chunks[0] != 0 ||
-           rmsgp->rm_body.rm_chunks[1] != 0)
-               return NULL;
-
-       rch = svc_rdma_get_read_chunk(rmsgp);
-       if (rch) {
-               while (rch->rc_discrim)
-                       rch++;
-
-               /* The reply list follows an empty write array located
-                * at 'rc_position' here. The reply array is at rc_target.
-                */
-               rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
-
-               goto found_it;
-       }
-
-       wr_ary = svc_rdma_get_write_array(rmsgp);
-       if (wr_ary) {
-               rp_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->
-                       wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length;
-
-               goto found_it;
-       }
-
-       /* No read list, no write list */
-       rp_ary = (struct rpcrdma_write_array *)
-               &rmsgp->rm_body.rm_chunks[2];
-
- found_it:
-       if (rp_ary->wc_discrim == 0)
-               return NULL;
-
-       return rp_ary;
-}
 #endif
index 79f6f8f3dc0a2af7c8f51c43f086837144d6c34c..78512cfe1fe687f251e111e7d70be56540249a17 100644 (file)
@@ -116,6 +116,7 @@ void        svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
                      struct svc_serv *);
 int    svc_create_xprt(struct svc_serv *, const char *, struct net *,
                        const int, const unsigned short, int);
+void   svc_xprt_do_enqueue(struct svc_xprt *xprt);
 void   svc_xprt_enqueue(struct svc_xprt *xprt);
 void   svc_xprt_put(struct svc_xprt *xprt);
 void   svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
index fd1a02cb3c8235d08cd6f7f579a96c91d7c21c39..003dca933803901da37efb168833b67a3a1f06ce 100644 (file)
@@ -529,18 +529,21 @@ TRACE_EVENT(svc_xprt_do_enqueue,
 
        TP_STRUCT__entry(
                __field(struct svc_xprt *, xprt)
-               __field(struct svc_rqst *, rqst)
+               __field_struct(struct sockaddr_storage, ss)
+               __field(int, pid)
+               __field(unsigned long, flags)
        ),
 
        TP_fast_assign(
                __entry->xprt = xprt;
-               __entry->rqst = rqst;
+               xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
+               __entry->pid = rqst? rqst->rq_task->pid : 0;
+               __entry->flags = xprt ? xprt->xpt_flags : 0;
        ),
 
        TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
-               (struct sockaddr *)&__entry->xprt->xpt_remote,
-               __entry->rqst ? __entry->rqst->rq_task->pid : 0,
-               show_svc_xprt_flags(__entry->xprt->xpt_flags))
+               (struct sockaddr *)&__entry->ss,
+               __entry->pid, show_svc_xprt_flags(__entry->flags))
 );
 
 TRACE_EVENT(svc_xprt_dequeue,
@@ -589,16 +592,20 @@ TRACE_EVENT(svc_handle_xprt,
        TP_STRUCT__entry(
                __field(struct svc_xprt *, xprt)
                __field(int, len)
+               __field_struct(struct sockaddr_storage, ss)
+               __field(unsigned long, flags)
        ),
 
        TP_fast_assign(
                __entry->xprt = xprt;
+               xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
                __entry->len = len;
+               __entry->flags = xprt ? xprt->xpt_flags : 0;
        ),
 
        TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
-               (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len,
-               show_svc_xprt_flags(__entry->xprt->xpt_flags))
+               (struct sockaddr *)&__entry->ss,
+               __entry->len, show_svc_xprt_flags(__entry->flags))
 );
 #endif /* _TRACE_SUNRPC_H */
 
index 9bb9771a107f22eb66c3045acc573420e32ba0e5..5527266311624df58f08ae8fec7d531972db2405 100644 (file)
@@ -22,6 +22,7 @@
 #define NFS_ACLCNT             0x0002
 #define NFS_DFACL              0x0004
 #define NFS_DFACLCNT           0x0008
+#define NFS_ACL_MASK           0x000f
 
 /* Flag for Default ACL entries */
 #define NFS_ACL_DEFAULT                0x1000
index 2928afffbb81ffdfea820fd1c2b87c3ae8b02a73..4a2340a5440115dd4b758d4490793306b2bb4641 100644 (file)
@@ -44,7 +44,7 @@ static void cache_revisit_request(struct cache_head *item);
 static void cache_init(struct cache_head *h)
 {
        time_t now = seconds_since_boot();
-       h->next = NULL;
+       INIT_HLIST_NODE(&h->cache_list);
        h->flags = 0;
        kref_init(&h->ref);
        h->expiry_time = now + CACHE_NEW_EXPIRY;
@@ -54,15 +54,14 @@ static void cache_init(struct cache_head *h)
 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
                                       struct cache_head *key, int hash)
 {
-       struct cache_head **head,  **hp;
-       struct cache_head *new = NULL, *freeme = NULL;
+       struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL;
+       struct hlist_head *head;
 
        head = &detail->hash_table[hash];
 
        read_lock(&detail->hash_lock);
 
-       for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
-               struct cache_head *tmp = *hp;
+       hlist_for_each_entry(tmp, head, cache_list) {
                if (detail->match(tmp, key)) {
                        if (cache_is_expired(detail, tmp))
                                /* This entry is expired, we will discard it. */
@@ -88,12 +87,10 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
        write_lock(&detail->hash_lock);
 
        /* check if entry appeared while we slept */
-       for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
-               struct cache_head *tmp = *hp;
+       hlist_for_each_entry(tmp, head, cache_list) {
                if (detail->match(tmp, key)) {
                        if (cache_is_expired(detail, tmp)) {
-                               *hp = tmp->next;
-                               tmp->next = NULL;
+                               hlist_del_init(&tmp->cache_list);
                                detail->entries --;
                                freeme = tmp;
                                break;
@@ -104,8 +101,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
                        return tmp;
                }
        }
-       new->next = *head;
-       *head = new;
+
+       hlist_add_head(&new->cache_list, head);
        detail->entries++;
        cache_get(new);
        write_unlock(&detail->hash_lock);
@@ -143,7 +140,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
         * If 'old' is not VALID, we update it directly,
         * otherwise we need to replace it
         */
-       struct cache_head **head;
        struct cache_head *tmp;
 
        if (!test_bit(CACHE_VALID, &old->flags)) {
@@ -168,15 +164,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
        }
        cache_init(tmp);
        detail->init(tmp, old);
-       head = &detail->hash_table[hash];
 
        write_lock(&detail->hash_lock);
        if (test_bit(CACHE_NEGATIVE, &new->flags))
                set_bit(CACHE_NEGATIVE, &tmp->flags);
        else
                detail->update(tmp, new);
-       tmp->next = *head;
-       *head = tmp;
+       hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
        detail->entries++;
        cache_get(tmp);
        cache_fresh_locked(tmp, new->expiry_time);
@@ -416,28 +410,29 @@ static int cache_clean(void)
        /* find a non-empty bucket in the table */
        while (current_detail &&
               current_index < current_detail->hash_size &&
-              current_detail->hash_table[current_index] == NULL)
+              hlist_empty(&current_detail->hash_table[current_index]))
                current_index++;
 
        /* find a cleanable entry in the bucket and clean it, or set to next bucket */
 
        if (current_detail && current_index < current_detail->hash_size) {
-               struct cache_head *ch, **cp;
+               struct cache_head *ch = NULL;
                struct cache_detail *d;
+               struct hlist_head *head;
+               struct hlist_node *tmp;
 
                write_lock(&current_detail->hash_lock);
 
                /* Ok, now to clean this strand */
 
-               cp = & current_detail->hash_table[current_index];
-               for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) {
+               head = &current_detail->hash_table[current_index];
+               hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
                        if (current_detail->nextcheck > ch->expiry_time)
                                current_detail->nextcheck = ch->expiry_time+1;
                        if (!cache_is_expired(current_detail, ch))
                                continue;
 
-                       *cp = ch->next;
-                       ch->next = NULL;
+                       hlist_del_init(&ch->cache_list);
                        current_detail->entries--;
                        rv = 1;
                        break;
@@ -1270,18 +1265,13 @@ EXPORT_SYMBOL_GPL(qword_get);
  * get a header, then pass each real item in the cache
  */
 
-struct handle {
-       struct cache_detail *cd;
-};
-
-static void *c_start(struct seq_file *m, loff_t *pos)
+void *cache_seq_start(struct seq_file *m, loff_t *pos)
        __acquires(cd->hash_lock)
 {
        loff_t n = *pos;
        unsigned int hash, entry;
        struct cache_head *ch;
-       struct cache_detail *cd = ((struct handle*)m->private)->cd;
-
+       struct cache_detail *cd = m->private;
 
        read_lock(&cd->hash_lock);
        if (!n--)
@@ -1289,7 +1279,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
        hash = n >> 32;
        entry = n & ((1LL<<32) - 1);
 
-       for (ch=cd->hash_table[hash]; ch; ch=ch->next)
+       hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list)
                if (!entry--)
                        return ch;
        n &= ~((1LL<<32) - 1);
@@ -1297,51 +1287,57 @@ static void *c_start(struct seq_file *m, loff_t *pos)
                hash++;
                n += 1LL<<32;
        } while(hash < cd->hash_size &&
-               cd->hash_table[hash]==NULL);
+               hlist_empty(&cd->hash_table[hash]));
        if (hash >= cd->hash_size)
                return NULL;
        *pos = n+1;
-       return cd->hash_table[hash];
+       return hlist_entry_safe(cd->hash_table[hash].first,
+                               struct cache_head, cache_list);
 }
+EXPORT_SYMBOL_GPL(cache_seq_start);
 
-static void *c_next(struct seq_file *m, void *p, loff_t *pos)
+void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 {
        struct cache_head *ch = p;
        int hash = (*pos >> 32);
-       struct cache_detail *cd = ((struct handle*)m->private)->cd;
+       struct cache_detail *cd = m->private;
 
        if (p == SEQ_START_TOKEN)
                hash = 0;
-       else if (ch->next == NULL) {
+       else if (ch->cache_list.next == NULL) {
                hash++;
                *pos += 1LL<<32;
        } else {
                ++*pos;
-               return ch->next;
+               return hlist_entry_safe(ch->cache_list.next,
+                                       struct cache_head, cache_list);
        }
        *pos &= ~((1LL<<32) - 1);
        while (hash < cd->hash_size &&
-              cd->hash_table[hash] == NULL) {
+              hlist_empty(&cd->hash_table[hash])) {
                hash++;
                *pos += 1LL<<32;
        }
        if (hash >= cd->hash_size)
                return NULL;
        ++*pos;
-       return cd->hash_table[hash];
+       return hlist_entry_safe(cd->hash_table[hash].first,
+                               struct cache_head, cache_list);
 }
+EXPORT_SYMBOL_GPL(cache_seq_next);
 
-static void c_stop(struct seq_file *m, void *p)
+void cache_seq_stop(struct seq_file *m, void *p)
        __releases(cd->hash_lock)
 {
-       struct cache_detail *cd = ((struct handle*)m->private)->cd;
+       struct cache_detail *cd = m->private;
        read_unlock(&cd->hash_lock);
 }
+EXPORT_SYMBOL_GPL(cache_seq_stop);
 
 static int c_show(struct seq_file *m, void *p)
 {
        struct cache_head *cp = p;
-       struct cache_detail *cd = ((struct handle*)m->private)->cd;
+       struct cache_detail *cd = m->private;
 
        if (p == SEQ_START_TOKEN)
                return cd->cache_show(m, cd, NULL);
@@ -1364,33 +1360,36 @@ static int c_show(struct seq_file *m, void *p)
 }
 
 static const struct seq_operations cache_content_op = {
-       .start  = c_start,
-       .next   = c_next,
-       .stop   = c_stop,
+       .start  = cache_seq_start,
+       .next   = cache_seq_next,
+       .stop   = cache_seq_stop,
        .show   = c_show,
 };
 
 static int content_open(struct inode *inode, struct file *file,
                        struct cache_detail *cd)
 {
-       struct handle *han;
+       struct seq_file *seq;
+       int err;
 
        if (!cd || !try_module_get(cd->owner))
                return -EACCES;
-       han = __seq_open_private(file, &cache_content_op, sizeof(*han));
-       if (han == NULL) {
+
+       err = seq_open(file, &cache_content_op);
+       if (err) {
                module_put(cd->owner);
-               return -ENOMEM;
+               return err;
        }
 
-       han->cd = cd;
+       seq = file->private_data;
+       seq->private = cd;
        return 0;
 }
 
 static int content_release(struct inode *inode, struct file *file,
                struct cache_detail *cd)
 {
-       int ret = seq_release_private(inode, file);
+       int ret = seq_release(inode, file);
        module_put(cd->owner);
        return ret;
 }
@@ -1665,17 +1664,21 @@ EXPORT_SYMBOL_GPL(cache_unregister_net);
 struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net)
 {
        struct cache_detail *cd;
+       int i;
 
        cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL);
        if (cd == NULL)
                return ERR_PTR(-ENOMEM);
 
-       cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *),
+       cd->hash_table = kzalloc(cd->hash_size * sizeof(struct hlist_head),
                                 GFP_KERNEL);
        if (cd->hash_table == NULL) {
                kfree(cd);
                return ERR_PTR(-ENOMEM);
        }
+
+       for (i = 0; i < cd->hash_size; i++)
+               INIT_HLIST_HEAD(&cd->hash_table[i]);
        cd->net = net;
        return cd;
 }
index 5a16d8d8c831c4ad2805f5958b9ccef63449af82..a8f579df14d83597b1c11e465894c28f982ed774 100644 (file)
 
 static void svc_unregister(const struct svc_serv *serv, struct net *net);
 
-#define svc_serv_is_pooled(serv)    ((serv)->sv_function)
+#define svc_serv_is_pooled(serv)    ((serv)->sv_ops->svo_function)
 
-/*
- * Mode for mapping cpus to pools.
- */
-enum {
-       SVC_POOL_AUTO = -1,     /* choose one of the others */
-       SVC_POOL_GLOBAL,        /* no mapping, just a single global pool
-                                * (legacy & UP mode) */
-       SVC_POOL_PERCPU,        /* one pool per cpu */
-       SVC_POOL_PERNODE        /* one pool per numa node */
-};
 #define SVC_POOL_DEFAULT       SVC_POOL_GLOBAL
 
 /*
  * Structure for mapping cpus to pools and vice versa.
  * Setup once during sunrpc initialisation.
  */
-static struct svc_pool_map {
-       int count;                      /* How many svc_servs use us */
-       int mode;                       /* Note: int not enum to avoid
-                                        * warnings about "enumeration value
-                                        * not handled in switch" */
-       unsigned int npools;
-       unsigned int *pool_to;          /* maps pool id to cpu or node */
-       unsigned int *to_pool;          /* maps cpu or node to pool id */
-} svc_pool_map = {
-       .count = 0,
+struct svc_pool_map svc_pool_map = {
        .mode = SVC_POOL_DEFAULT
 };
+EXPORT_SYMBOL_GPL(svc_pool_map);
+
 static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
 
 static int
@@ -236,7 +219,7 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
  * vice versa).  Initialise the map if we're the first user.
  * Returns the number of pools.
  */
-static unsigned int
+unsigned int
 svc_pool_map_get(void)
 {
        struct svc_pool_map *m = &svc_pool_map;
@@ -271,7 +254,7 @@ svc_pool_map_get(void)
        mutex_unlock(&svc_pool_map_mutex);
        return m->npools;
 }
-
+EXPORT_SYMBOL_GPL(svc_pool_map_get);
 
 /*
  * Drop a reference to the global map of cpus to pools.
@@ -280,7 +263,7 @@ svc_pool_map_get(void)
  * mode using the pool_mode module option without
  * rebooting or re-loading sunrpc.ko.
  */
-static void
+void
 svc_pool_map_put(void)
 {
        struct svc_pool_map *m = &svc_pool_map;
@@ -297,7 +280,7 @@ svc_pool_map_put(void)
 
        mutex_unlock(&svc_pool_map_mutex);
 }
-
+EXPORT_SYMBOL_GPL(svc_pool_map_put);
 
 static int svc_pool_map_get_node(unsigned int pidx)
 {
@@ -423,7 +406,7 @@ EXPORT_SYMBOL_GPL(svc_bind);
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-            void (*shutdown)(struct svc_serv *serv, struct net *net))
+            struct svc_serv_ops *ops)
 {
        struct svc_serv *serv;
        unsigned int vers;
@@ -440,7 +423,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
                bufsize = RPCSVC_MAXPAYLOAD;
        serv->sv_max_payload = bufsize? bufsize : 4096;
        serv->sv_max_mesg  = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
-       serv->sv_shutdown  = shutdown;
+       serv->sv_ops = ops;
        xdrsize = 0;
        while (prog) {
                prog->pg_lovers = prog->pg_nvers-1;
@@ -486,26 +469,22 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 struct svc_serv *
 svc_create(struct svc_program *prog, unsigned int bufsize,
-          void (*shutdown)(struct svc_serv *serv, struct net *net))
+          struct svc_serv_ops *ops)
 {
-       return __svc_create(prog, bufsize, /*npools*/1, shutdown);
+       return __svc_create(prog, bufsize, /*npools*/1, ops);
 }
 EXPORT_SYMBOL_GPL(svc_create);
 
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-                 void (*shutdown)(struct svc_serv *serv, struct net *net),
-                 svc_thread_fn func, struct module *mod)
+                 struct svc_serv_ops *ops)
 {
        struct svc_serv *serv;
        unsigned int npools = svc_pool_map_get();
 
-       serv = __svc_create(prog, bufsize, npools, shutdown);
+       serv = __svc_create(prog, bufsize, npools, ops);
        if (!serv)
                goto out_err;
-
-       serv->sv_function = func;
-       serv->sv_module = mod;
        return serv;
 out_err:
        svc_pool_map_put();
@@ -517,8 +496,8 @@ void svc_shutdown_net(struct svc_serv *serv, struct net *net)
 {
        svc_close_net(serv, net);
 
-       if (serv->sv_shutdown)
-               serv->sv_shutdown(serv, net);
+       if (serv->sv_ops->svo_shutdown)
+               serv->sv_ops->svo_shutdown(serv, net);
 }
 EXPORT_SYMBOL_GPL(svc_shutdown_net);
 
@@ -604,40 +583,52 @@ svc_release_buffer(struct svc_rqst *rqstp)
 }
 
 struct svc_rqst *
-svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
+svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
 {
        struct svc_rqst *rqstp;
 
        rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node);
        if (!rqstp)
-               goto out_enomem;
+               return rqstp;
 
-       serv->sv_nrthreads++;
        __set_bit(RQ_BUSY, &rqstp->rq_flags);
        spin_lock_init(&rqstp->rq_lock);
        rqstp->rq_server = serv;
        rqstp->rq_pool = pool;
-       spin_lock_bh(&pool->sp_lock);
-       pool->sp_nrthreads++;
-       list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
-       spin_unlock_bh(&pool->sp_lock);
 
        rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
        if (!rqstp->rq_argp)
-               goto out_thread;
+               goto out_enomem;
 
        rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
        if (!rqstp->rq_resp)
-               goto out_thread;
+               goto out_enomem;
 
        if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
-               goto out_thread;
+               goto out_enomem;
 
        return rqstp;
-out_thread:
-       svc_exit_thread(rqstp);
 out_enomem:
-       return ERR_PTR(-ENOMEM);
+       svc_rqst_free(rqstp);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(svc_rqst_alloc);
+
+struct svc_rqst *
+svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
+{
+       struct svc_rqst *rqstp;
+
+       rqstp = svc_rqst_alloc(serv, pool, node);
+       if (!rqstp)
+               return ERR_PTR(-ENOMEM);
+
+       serv->sv_nrthreads++;
+       spin_lock_bh(&pool->sp_lock);
+       pool->sp_nrthreads++;
+       list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
+       spin_unlock_bh(&pool->sp_lock);
+       return rqstp;
 }
 EXPORT_SYMBOL_GPL(svc_prepare_thread);
 
@@ -739,12 +730,12 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
                        break;
                }
 
-               __module_get(serv->sv_module);
-               task = kthread_create_on_node(serv->sv_function, rqstp,
+               __module_get(serv->sv_ops->svo_module);
+               task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
                                              node, "%s", serv->sv_name);
                if (IS_ERR(task)) {
                        error = PTR_ERR(task);
-                       module_put(serv->sv_module);
+                       module_put(serv->sv_ops->svo_module);
                        svc_exit_thread(rqstp);
                        break;
                }
@@ -772,15 +763,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
  * mutex" for the service.
  */
 void
-svc_exit_thread(struct svc_rqst *rqstp)
+svc_rqst_free(struct svc_rqst *rqstp)
 {
-       struct svc_serv *serv = rqstp->rq_server;
-       struct svc_pool *pool = rqstp->rq_pool;
-
        svc_release_buffer(rqstp);
        kfree(rqstp->rq_resp);
        kfree(rqstp->rq_argp);
        kfree(rqstp->rq_auth_data);
+       kfree_rcu(rqstp, rq_rcu_head);
+}
+EXPORT_SYMBOL_GPL(svc_rqst_free);
+
+void
+svc_exit_thread(struct svc_rqst *rqstp)
+{
+       struct svc_serv *serv = rqstp->rq_server;
+       struct svc_pool *pool = rqstp->rq_pool;
 
        spin_lock_bh(&pool->sp_lock);
        pool->sp_nrthreads--;
@@ -788,7 +785,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
                list_del_rcu(&rqstp->rq_all);
        spin_unlock_bh(&pool->sp_lock);
 
-       kfree_rcu(rqstp, rq_rcu_head);
+       svc_rqst_free(rqstp);
 
        /* Release the server */
        if (serv)
index 163ac45c36394f0ce144235ad09a5a3227be173a..a6cbb2104667d22e6b64ffa34718f8ed1f50ef0e 100644 (file)
@@ -24,7 +24,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(unsigned long closure);
 static void svc_delete_xprt(struct svc_xprt *xprt);
-static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -225,12 +224,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
        }
 
        /* As soon as we clear busy, the xprt could be closed and
-        * 'put', so we need a reference to call svc_xprt_do_enqueue with:
+        * 'put', so we need a reference to call svc_enqueue_xprt with:
         */
        svc_xprt_get(xprt);
        smp_mb__before_atomic();
        clear_bit(XPT_BUSY, &xprt->xpt_flags);
-       svc_xprt_do_enqueue(xprt);
+       xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
        svc_xprt_put(xprt);
 }
 
@@ -320,7 +319,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
        return false;
 }
 
-static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 {
        struct svc_pool *pool;
        struct svc_rqst *rqstp = NULL;
@@ -402,6 +401,7 @@ redo_search:
 out:
        trace_svc_xprt_do_enqueue(xprt, rqstp);
 }
+EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
 
 /*
  * Queue up a transport with data pending. If there are idle nfsd
@@ -412,7 +412,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 {
        if (test_bit(XPT_BUSY, &xprt->xpt_flags))
                return;
-       svc_xprt_do_enqueue(xprt);
+       xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
index d25cd430f9ffbebc6fdb2a35e662a0ddf51c3cae..1dfae83170650ec26d53973e6627acdaa78dcddf 100644 (file)
@@ -136,6 +136,79 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
        return dma_addr;
 }
 
+/* Returns the address of the first read chunk or <nul> if no read chunk
+ * is present
+ */
+struct rpcrdma_read_chunk *
+svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
+{
+       struct rpcrdma_read_chunk *ch =
+               (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+
+       if (ch->rc_discrim == xdr_zero)
+               return NULL;
+       return ch;
+}
+
+/* Returns the address of the first read write array element or <nul>
+ * if no write array list is present
+ */
+static struct rpcrdma_write_array *
+svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
+{
+       if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
+           rmsgp->rm_body.rm_chunks[1] == xdr_zero)
+               return NULL;
+       return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
+}
+
+/* Returns the address of the first reply array element or <nul> if no
+ * reply array is present
+ */
+static struct rpcrdma_write_array *
+svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
+{
+       struct rpcrdma_read_chunk *rch;
+       struct rpcrdma_write_array *wr_ary;
+       struct rpcrdma_write_array *rp_ary;
+
+       /* XXX: Need to fix when reply chunk may occur with read list
+        *      and/or write list.
+        */
+       if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
+           rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+               return NULL;
+
+       rch = svc_rdma_get_read_chunk(rmsgp);
+       if (rch) {
+               while (rch->rc_discrim != xdr_zero)
+                       rch++;
+
+               /* The reply chunk follows an empty write array located
+                * at 'rc_position' here. The reply array is at rc_target.
+                */
+               rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
+               goto found_it;
+       }
+
+       wr_ary = svc_rdma_get_write_array(rmsgp);
+       if (wr_ary) {
+               int chunk = be32_to_cpu(wr_ary->wc_nchunks);
+
+               rp_ary = (struct rpcrdma_write_array *)
+                        &wr_ary->wc_array[chunk].wc_target.rs_length;
+               goto found_it;
+       }
+
+       /* No read list, no write list */
+       rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];
+
+ found_it:
+       if (rp_ary->wc_discrim == xdr_zero)
+               return NULL;
+       return rp_ary;
+}
+
 /* Assumptions:
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
@@ -384,6 +457,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
                      int byte_count)
 {
        struct ib_send_wr send_wr;
+       u32 xdr_off;
        int sge_no;
        int sge_bytes;
        int page_no;
@@ -418,8 +492,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
        ctxt->direction = DMA_TO_DEVICE;
 
        /* Map the payload indicated by 'byte_count' */
+       xdr_off = 0;
        for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
-               int xdr_off = 0;
                sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
                byte_count -= sge_bytes;
                ctxt->sge[sge_no].addr =
@@ -457,6 +531,13 @@ static int send_reply(struct svcxprt_rdma *rdma,
        }
        rqstp->rq_next_page = rqstp->rq_respages + 1;
 
+       /* The loop above bumps sc_dma_used for each sge. The
+        * xdr_buf.tail gets a separate sge, but resides in the
+        * same page as xdr_buf.head. Don't count it twice.
+        */
+       if (sge_no > ctxt->count)
+               atomic_dec(&rdma->sc_dma_used);
+
        if (sge_no > rdma->sc_max_sge) {
                pr_err("svcrdma: Too many sges (%d)\n", sge_no);
                goto err;
index 6b36279e428850ee6fc96e49e2997a2a5679dc3c..21e40365042ca2c4756e87f5b8dfa09912adc072 100644 (file)
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
        .xcl_name = "rdma",
        .xcl_owner = THIS_MODULE,
        .xcl_ops = &svc_rdma_ops,
-       .xcl_max_payload = RPCRDMA_MAXPAYLOAD,
+       .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
        .xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
@@ -659,6 +659,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
                if (xprt) {
                        set_bit(XPT_CLOSE, &xprt->xpt_flags);
                        svc_xprt_enqueue(xprt);
+                       svc_xprt_put(xprt);
                }
                break;
        default:
@@ -1201,40 +1202,6 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp)
        return 1;
 }
 
-/*
- * Attempt to register the kvec representing the RPC memory with the
- * device.
- *
- * Returns:
- *  NULL : The device does not support fastreg or there were no more
- *         fastreg mr.
- *  frmr : The kvec register request was successfully posted.
- *    <0 : An error was encountered attempting to register the kvec.
- */
-int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
-                    struct svc_rdma_fastreg_mr *frmr)
-{
-       struct ib_send_wr fastreg_wr;
-       u8 key;
-
-       /* Bump the key */
-       key = (u8)(frmr->mr->lkey & 0x000000FF);
-       ib_update_fast_reg_key(frmr->mr, ++key);
-
-       /* Prepare FASTREG WR */
-       memset(&fastreg_wr, 0, sizeof fastreg_wr);
-       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-       fastreg_wr.send_flags = IB_SEND_SIGNALED;
-       fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
-       fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
-       fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
-       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       fastreg_wr.wr.fast_reg.length = frmr->map_len;
-       fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
-       fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
-       return svc_rdma_send(xprt, &fastreg_wr);
-}
-
 int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 {
        struct ib_send_wr *bad_wr, *n_wr;
index f49dd8b381221dceaef4847e4ae28d397ebcdf27..e718d0959af34207082211b5a3b0c69bc5d779eb 100644 (file)
@@ -51,7 +51,6 @@
 #include <linux/sunrpc/clnt.h>                 /* rpc_xprt */
 #include <linux/sunrpc/rpc_rdma.h>     /* RPC/RDMA protocol */
 #include <linux/sunrpc/xprtrdma.h>     /* xprt parameters */
-#include <linux/sunrpc/svc.h>          /* RPCSVC_MAXPAYLOAD */
 
 #define RDMA_RESOLVE_TIMEOUT   (5000)  /* 5 seconds */
 #define RDMA_CONNECT_RETRY_MAX (2)     /* retries if no listener backlog */