Merge tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2019 21:33:15 +0000 (14:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2019 21:33:15 +0000 (14:33 -0700)
Pull NFS client updates from Anna Schumaker:
 "Highlights include:

  Stable bugfixes:
   - Fall back to MDS if no deviceid is found rather than aborting   # v4.11+
   - NFS4: Fix v4.0 client state corruption when mount

  Features:
   - Much improved handling of soft mounts with NFS v4.0:
       - Reduce risk of false positive timeouts
       - Faster failover of reads and writes after a timeout
       - Added a "softerr" mount option to return ETIMEDOUT instead of
         EIO to the application after a timeout
   - Increase number of xprtrdma backchannel requests
   - Add additional xprtrdma tracepoints
   - Improved send completion batching for xprtrdma

  Other bugfixes and cleanups:
   - Return -EINVAL when NFS v4.2 is passed an invalid dedup mode
   - Reduce usage of GFP_ATOMIC pages in SUNRPC
   - Various minor NFS over RDMA cleanups and bugfixes
   - Use the correct container namespace for upcalls
   - Don't share superblocks between user namespaces
   - Various other container fixes
   - Make nfs_match_client() killable to prevent soft lockups
   - Don't mark all open state for recovery when handling recallable
     state revoked flag"

* tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (69 commits)
  SUNRPC: Rebalance a kref in auth_gss.c
  NFS: Fix a double unlock from nfs_match,get_client
  nfs: pass the correct prototype to read_cache_page
  NFSv4: don't mark all open state for recovery when handling recallable state revoked flag
  SUNRPC: Fix an error code in gss_alloc_msg()
  SUNRPC: task should be exit if encode return EKEYEXPIRED more times
  NFS4: Fix v4.0 client state corruption when mount
  PNFS fallback to MDS if no deviceid found
  NFS: make nfs_match_client killable
  lockd: Store the lockd client credential in struct nlm_host
  NFS: When mounting, don't share filesystems between different user namespaces
  NFS: Convert NFSv2 to use the container user namespace
  NFSv4: Convert the NFS client idmapper to use the container user namespace
  NFS: Convert NFSv3 to use the container user namespace
  SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall
  SUNRPC: Use the client user namespace when encoding creds
  NFS: Store the credential of the mount process in the nfs_server
  SUNRPC: Cache cred of process creating the rpc_client
  xprtrdma: Remove stale comment
  xprtrdma: Update comments that reference ib_drain_qp
  ...

1  2 
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/super.c
fs/nfsd/nfs4callback.c
include/trace/events/sunrpc.h

diff --combined fs/nfs/inode.c
index f61af8307dc88fdb0d06db04df0008649865f8e7,efc45f22c5817339a22175e35fa1aa10275a596c..3bc2550cfe4ea5d33753750e0fe7154aec49a1a1
@@@ -885,10 -885,14 +885,14 @@@ struct nfs_lock_context *nfs_get_lock_c
                spin_lock(&inode->i_lock);
                res = __nfs_find_lock_context(ctx);
                if (res == NULL) {
-                       list_add_tail_rcu(&new->list, &ctx->lock_context.list);
-                       new->open_context = ctx;
-                       res = new;
-                       new = NULL;
+                       new->open_context = get_nfs_open_context(ctx);
+                       if (new->open_context) {
+                               list_add_tail_rcu(&new->list,
+                                               &ctx->lock_context.list);
+                               res = new;
+                               new = NULL;
+                       } else
+                               res = ERR_PTR(-EBADF);
                }
                spin_unlock(&inode->i_lock);
                kfree(new);
@@@ -906,6 -910,7 +910,7 @@@ void nfs_put_lock_context(struct nfs_lo
                return;
        list_del_rcu(&l_ctx->list);
        spin_unlock(&inode->i_lock);
+       put_nfs_open_context(ctx);
        kfree_rcu(l_ctx, rcu_head);
  }
  EXPORT_SYMBOL_GPL(nfs_put_lock_context);
@@@ -2055,11 -2060,17 +2060,11 @@@ struct inode *nfs_alloc_inode(struct su
  }
  EXPORT_SYMBOL_GPL(nfs_alloc_inode);
  
 -static void nfs_i_callback(struct rcu_head *head)
 +void nfs_free_inode(struct inode *inode)
  {
 -      struct inode *inode = container_of(head, struct inode, i_rcu);
        kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
  }
 -
 -void nfs_destroy_inode(struct inode *inode)
 -{
 -      call_rcu(&inode->i_rcu, nfs_i_callback);
 -}
 -EXPORT_SYMBOL_GPL(nfs_destroy_inode);
 +EXPORT_SYMBOL_GPL(nfs_free_inode);
  
  static inline void nfs4_init_once(struct nfs_inode *nfsi)
  {
diff --combined fs/nfs/internal.h
index 331a0504eaf8407e82e38655cc2324fa470de80f,22232e76df4704ee8a0ce2cba4eef651ee179133..498fab72f70bc8b4d3033126bbe1eaa2635e6db1
@@@ -84,6 -84,7 +84,7 @@@ struct nfs_client_initdata 
        u32 minorversion;
        struct net *net;
        const struct rpc_timeout *timeparms;
+       const struct cred *cred;
  };
  
  /*
@@@ -381,7 -382,7 +382,7 @@@ int nfs_check_flags(int)
  /* inode.c */
  extern struct workqueue_struct *nfsiod_workqueue;
  extern struct inode *nfs_alloc_inode(struct super_block *sb);
 -extern void nfs_destroy_inode(struct inode *);
 +extern void nfs_free_inode(struct inode *);
  extern int nfs_write_inode(struct inode *, struct writeback_control *);
  extern int nfs_drop_inode(struct inode *);
  extern void nfs_clear_inode(struct inode *);
@@@ -766,15 -767,10 +767,10 @@@ static inline bool nfs_error_is_fatal(i
        case -ESTALE:
        case -E2BIG:
        case -ENOMEM:
+       case -ETIMEDOUT:
                return true;
        default:
                return false;
        }
  }
  
- static inline void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
- {
-       ctx->error = error;
-       smp_wmb();
-       set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
- }
diff --combined fs/nfs/super.c
index 450ae77d19bff847364bf2c1634571309eadb48f,4f014c4c7bc1edd1b2bdbe349292aa22fa341ce3..d6c687419a818d61ee49420ccc248803ee14d0a8
@@@ -78,7 -78,7 +78,7 @@@
  
  enum {
        /* Mount options that take no arguments */
-       Opt_soft, Opt_hard,
+       Opt_soft, Opt_softerr, Opt_hard,
        Opt_posix, Opt_noposix,
        Opt_cto, Opt_nocto,
        Opt_ac, Opt_noac,
@@@ -125,6 -125,7 +125,7 @@@ static const match_table_t nfs_mount_op
        { Opt_sloppy, "sloppy" },
  
        { Opt_soft, "soft" },
+       { Opt_softerr, "softerr" },
        { Opt_hard, "hard" },
        { Opt_deprecated, "intr" },
        { Opt_deprecated, "nointr" },
@@@ -309,7 -310,7 +310,7 @@@ struct file_system_type nfs_xdev_fs_typ
  
  const struct super_operations nfs_sops = {
        .alloc_inode    = nfs_alloc_inode,
 -      .destroy_inode  = nfs_destroy_inode,
 +      .free_inode     = nfs_free_inode,
        .write_inode    = nfs_write_inode,
        .drop_inode     = nfs_drop_inode,
        .statfs         = nfs_statfs,
@@@ -628,7 -629,8 +629,8 @@@ static void nfs_show_mount_options(stru
                const char *str;
                const char *nostr;
        } nfs_info[] = {
-               { NFS_MOUNT_SOFT, ",soft", ",hard" },
+               { NFS_MOUNT_SOFT, ",soft", "" },
+               { NFS_MOUNT_SOFTERR, ",softerr", "" },
                { NFS_MOUNT_POSIX, ",posix", "" },
                { NFS_MOUNT_NOCTO, ",nocto", "" },
                { NFS_MOUNT_NOAC, ",noac", "" },
                seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
        if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults)
                seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
+       if (!(nfss->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)))
+                       seq_puts(m, ",hard");
        for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
                if (nfss->flags & nfs_infop->flag)
                        seq_puts(m, nfs_infop->str);
@@@ -1239,10 -1243,15 +1243,15 @@@ static int nfs_parse_mount_options(cha
                 */
                case Opt_soft:
                        mnt->flags |= NFS_MOUNT_SOFT;
+                       mnt->flags &= ~NFS_MOUNT_SOFTERR;
                        break;
-               case Opt_hard:
+               case Opt_softerr:
+                       mnt->flags |= NFS_MOUNT_SOFTERR;
                        mnt->flags &= ~NFS_MOUNT_SOFT;
                        break;
+               case Opt_hard:
+                       mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
+                       break;
                case Opt_posix:
                        mnt->flags |= NFS_MOUNT_POSIX;
                        break;
@@@ -2476,6 -2485,21 +2485,21 @@@ static int nfs_compare_super_address(st
        return 1;
  }
  
+ static int nfs_compare_userns(const struct nfs_server *old,
+               const struct nfs_server *new)
+ {
+       const struct user_namespace *oldns = &init_user_ns;
+       const struct user_namespace *newns = &init_user_ns;
+       if (old->client && old->client->cl_cred)
+               oldns = old->client->cl_cred->user_ns;
+       if (new->client && new->client->cl_cred)
+               newns = new->client->cl_cred->user_ns;
+       if (oldns != newns)
+               return 0;
+       return 1;
+ }
  static int nfs_compare_super(struct super_block *sb, void *data)
  {
        struct nfs_sb_mountdata *sb_mntdata = data;
                return 0;
        if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
                return 0;
+       if (!nfs_compare_userns(old, server))
+               return 0;
        return nfs_compare_mount_options(sb, server, mntflags);
  }
  
diff --combined fs/nfsd/nfs4callback.c
index 7caa3801ce72b70de75802f0a5c1b78b1087ebb5,3a10399a0ef1ac4f4ad9e23d52449ec01fcc023d..9b93e7a9a26df59fb31a9a3ad2a71a62b283fc07
@@@ -868,6 -868,7 +868,7 @@@ static int setup_callback_client(struc
                .program        = &cb_program,
                .version        = 1,
                .flags          = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+               .cred           = current_cred(),
        };
        struct rpc_clnt *client;
        const struct cred *cred;
@@@ -1010,9 -1011,8 +1011,9 @@@ static void nfsd4_cb_prepare(struct rpc
        cb->cb_seq_status = 1;
        cb->cb_status = 0;
        if (minorversion) {
 -              if (!nfsd41_cb_get_slot(clp, task))
 +              if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
                        return;
 +              cb->cb_holds_slot = true;
        }
        rpc_call_start(task);
  }
@@@ -1033,15 -1033,12 +1034,15 @@@ static bool nfsd4_cb_sequence_done(stru
                 * the submission code will error out, so we don't need to
                 * handle that case here.
                 */
-               if (task->tk_flags & RPC_TASK_KILLED)
+               if (RPC_SIGNALLED(task))
                        goto need_restart;
  
                return true;
        }
  
 +      if (!cb->cb_holds_slot)
 +              goto need_restart;
 +
        switch (cb->cb_seq_status) {
        case 0:
                /*
                        cb->cb_seq_status);
        }
  
 +      cb->cb_holds_slot = false;
        clear_bit(0, &clp->cl_cb_slot_busy);
        rpc_wake_up_next(&clp->cl_cb_waitq);
        dprintk("%s: freed slot, new seqid=%d\n", __func__,
                clp->cl_cb_session->se_cb_seq_nr);
  
-       if (task->tk_flags & RPC_TASK_KILLED)
+       if (RPC_SIGNALLED(task))
                goto need_restart;
  out:
        return ret;
@@@ -1288,7 -1284,6 +1289,7 @@@ void nfsd4_init_cb(struct nfsd4_callbac
        cb->cb_seq_status = 1;
        cb->cb_status = 0;
        cb->cb_need_restart = false;
 +      cb->cb_holds_slot = false;
  }
  
  void nfsd4_run_cb(struct nfsd4_callback *cb)
index f0a6f0c5549cc15b793fac1e534c5c32b8569b23,dd301db645212e30f081b5de37d4d04f2ae4d930..ffa3c51dbb1a08ac3748211a94b497f672b320c3
@@@ -82,7 -82,6 +82,6 @@@ TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER)
  TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
  TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
  TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
- TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
  TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
  TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
  TRACE_DEFINE_ENUM(RPC_TASK_SENT);
@@@ -97,7 -96,6 +96,6 @@@ TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_T
                { RPC_CALL_MAJORSEEN, "MAJORSEEN" },                    \
                { RPC_TASK_ROOTCREDS, "ROOTCREDS" },                    \
                { RPC_TASK_DYNAMIC, "DYNAMIC" },                        \
-               { RPC_TASK_KILLED, "KILLED" },                          \
                { RPC_TASK_SOFT, "SOFT" },                              \
                { RPC_TASK_SOFTCONN, "SOFTCONN" },                      \
                { RPC_TASK_SENT, "SENT" },                              \
@@@ -111,6 -109,7 +109,7 @@@ TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE)
  TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
  TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
  TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
+ TRACE_DEFINE_ENUM(RPC_TASK_SIGNALLED);
  
  #define rpc_show_runstate(flags)                                      \
        __print_flags(flags, "|",                                       \
                { (1UL << RPC_TASK_ACTIVE), "ACTIVE" },                 \
                { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" },           \
                { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" },           \
-               { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+               { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" },     \
+               { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" })
  
  DECLARE_EVENT_CLASS(rpc_task_running,
  
                __entry->flags = task->tk_flags;
                ),
  
 -      TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%pf",
 +      TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%ps",
                __entry->task_id, __entry->client_id,
                rpc_show_task_flags(__entry->flags),
                rpc_show_runstate(__entry->runstate),
@@@ -186,7 -186,7 +186,7 @@@ DECLARE_EVENT_CLASS(rpc_task_queued
                __entry->client_id = task->tk_client ?
                                     task->tk_client->cl_clid : -1;
                __entry->task_id = task->tk_pid;
-               __entry->timeout = task->tk_timeout;
+               __entry->timeout = rpc_task_timeout(task);
                __entry->runstate = task->tk_runstate;
                __entry->status = task->tk_status;
                __entry->flags = task->tk_flags;