Merge tag 'nfs-for-4.8-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Jul 2016 23:33:25 +0000 (16:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Jul 2016 23:33:25 +0000 (16:33 -0700)
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable bugfixes:
   - nfs: don't create zero-length requests

   - several LAYOUTGET bugfixes

  Features:
   - several performance related features

   - more aggressive caching when we can rely on close-to-open
     cache consistency

   - remove serialisation of O_DIRECT reads and writes

   - optimise several code paths to not flush to disk unnecessarily.

     However allow for the idiosyncracies of pNFS for those layout
     types that need to issue a LAYOUTCOMMIT before the metadata can
     be updated on the server.

   - SUNRPC updates to the client data receive path

   - pNFS/SCSI support RH/Fedora dm-mpath device nodes

   - pNFS files/flexfiles can now use unprivileged ports when
     the generic NFS mount options allow it.

  Bugfixes:
   - Don't use RDMA direct data placement together with data
     integrity or privacy security flavours

   - Remove the RDMA ALLPHYSICAL memory registration mode as
     it has potential security holes.

   - Several layout recall fixes to improve NFSv4.1 protocol
     compliance.

   - Fix an Oops in the pNFS files and flexfiles connection
     setup to the DS

   - Allow retry of operations that used a returned delegation
      stateid

   - Don't mark the inode as revalidated if a LAYOUTCOMMIT is
     outstanding

   - Fix writeback races in nfs4_copy_range() and
     nfs42_proc_deallocate()"

* tag 'nfs-for-4.8-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (104 commits)
  pNFS: Actively set attributes as invalid if LAYOUTCOMMIT is outstanding
  NFSv4: Clean up lookup of SECINFO_NO_NAME
  NFSv4.2: Fix warning "variable ‘stateids’ set but not used"
  NFSv4: Fix warning "no previous prototype for ‘nfs4_listxattr’"
  SUNRPC: Fix a compiler warning in fs/nfs/clnt.c
  pNFS: Remove redundant smp_mb() from pnfs_init_lseg()
  pNFS: Cleanup - do layout segment initialisation in one place
  pNFS: Remove redundant stateid invalidation
  pNFS: Remove redundant pnfs_mark_layout_returned_if_empty()
  pNFS: Clear the layout metadata if the server changed the layout stateid
  pNFS: Cleanup - don't open code pnfs_mark_layout_stateid_invalid()
  NFS: pnfs_mark_matching_lsegs_return() should match the layout sequence id
  pNFS: Do not set plh_return_seq for non-callback related layoutreturns
  pNFS: Ensure layoutreturn acts as a completion for layout callbacks
  pNFS: Fix CB_LAYOUTRECALL stateid verification
  pNFS: Always update the layout barrier seqid on LAYOUTGET
  pNFS: Always update the layout stateid if NFS_LAYOUT_INVALID_STID is set
  pNFS: Clear the layout return tracking on layout reinitialisation
  pNFS: LAYOUTRETURN should only update the stateid if the layout is valid
  nfs: don't create zero-length requests
  ...

1  2 
fs/nfs/client.c
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/internal.h
fs/nfs/nfstrace.h
fs/nfs/write.c

diff --combined fs/nfs/client.c
index 487c5607d52f4c5c853a1f13cc8fbb545e9e79a2,4849d0f778dc5ab1cc097ff6e1a08d12ecba652d..003ebce4bbc49fa0e5508816027ae65798119e4f
@@@ -367,8 -367,6 +367,6 @@@ nfs_found_client(const struct nfs_clien
   */
  struct nfs_client *
  nfs_get_client(const struct nfs_client_initdata *cl_init,
-              const struct rpc_timeout *timeparms,
-              const char *ip_addr,
               rpc_authflavor_t authflavour)
  {
        struct nfs_client *clp, *new = NULL;
                                        &nn->nfs_client_list);
                        spin_unlock(&nn->nfs_client_lock);
                        new->cl_flags = cl_init->init_flags;
-                       return rpc_ops->init_client(new, timeparms, ip_addr);
+                       return rpc_ops->init_client(new, cl_init);
                }
  
                spin_unlock(&nn->nfs_client_lock);
@@@ -470,7 -468,7 +468,7 @@@ EXPORT_SYMBOL_GPL(nfs_init_timeout_valu
   * Create an RPC client handle
   */
  int nfs_create_rpc_client(struct nfs_client *clp,
-                         const struct rpc_timeout *timeparms,
+                         const struct nfs_client_initdata *cl_init,
                          rpc_authflavor_t flavor)
  {
        struct rpc_clnt         *clnt = NULL;
                .protocol       = clp->cl_proto,
                .address        = (struct sockaddr *)&clp->cl_addr,
                .addrsize       = clp->cl_addrlen,
-               .timeout        = timeparms,
+               .timeout        = cl_init->timeparms,
                .servername     = clp->cl_hostname,
+               .nodename       = cl_init->nodename,
                .program        = &nfs_program,
                .version        = clp->rpc_ops->version,
                .authflavor     = flavor,
@@@ -591,14 -590,12 +590,12 @@@ EXPORT_SYMBOL_GPL(nfs_init_server_rpccl
   * nfs_init_client - Initialise an NFS2 or NFS3 client
   *
   * @clp: nfs_client to initialise
-  * @timeparms: timeout parameters for underlying RPC transport
-  * @ip_addr: IP presentation address (not used)
+  * @cl_init: Initialisation parameters
   *
   * Returns pointer to an NFS client, or an ERR_PTR value.
   */
  struct nfs_client *nfs_init_client(struct nfs_client *clp,
-                   const struct rpc_timeout *timeparms,
-                   const char *ip_addr)
+                                  const struct nfs_client_initdata *cl_init)
  {
        int error;
  
         * Create a client RPC handle for doing FSSTAT with UNIX auth only
         * - RFC 2623, sec 2.3.2
         */
-       error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
+       error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
        if (error < 0)
                goto error;
        nfs_mark_client_ready(clp, NFS_CS_READY);
@@@ -633,6 -630,7 +630,7 @@@ static int nfs_init_server(struct nfs_s
                           const struct nfs_parsed_mount_data *data,
                           struct nfs_subversion *nfs_mod)
  {
+       struct rpc_timeout timeparms;
        struct nfs_client_initdata cl_init = {
                .hostname = data->nfs_server.hostname,
                .addr = (const struct sockaddr *)&data->nfs_server.address,
                .nfs_mod = nfs_mod,
                .proto = data->nfs_server.protocol,
                .net = data->net,
+               .timeparms = &timeparms,
        };
-       struct rpc_timeout timeparms;
        struct nfs_client *clp;
        int error;
  
                set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
  
        /* Allocate or find a client reference we can use */
-       clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
+       clp = nfs_get_client(&cl_init, RPC_AUTH_UNIX);
        if (IS_ERR(clp)) {
                dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
                return PTR_ERR(clp);
@@@ -1102,6 -1100,7 +1100,6 @@@ static const struct file_operations nfs
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = seq_release_net,
 -      .owner          = THIS_MODULE,
  };
  
  static int nfs_volume_list_open(struct inode *inode, struct file *file);
@@@ -1122,6 -1121,7 +1120,6 @@@ static const struct file_operations nfs
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = seq_release_net,
 -      .owner          = THIS_MODULE,
  };
  
  /*
diff --combined fs/nfs/dir.c
index baaa38859899eb6b97e234f2f94f92d83fee1520,e2d606abc9e8f985e01a518d18fc9fd5903faa10..177fefb26c18d344a30dfb85f75db61c7a6e44ec
@@@ -232,7 -232,7 +232,7 @@@ int nfs_readdir_make_qstr(struct qstr *
         * in a page cache page which kmemleak does not scan.
         */
        kmemleak_not_leak(string->name);
 -      string->hash = full_name_hash(name, len);
 +      string->hash = full_name_hash(NULL, name, len);
        return 0;
  }
  
@@@ -502,7 -502,7 +502,7 @@@ void nfs_prime_dcache(struct dentry *pa
                if (filename.len == 2 && filename.name[1] == '.')
                        return;
        }
 -      filename.hash = full_name_hash(filename.name, filename.len);
 +      filename.hash = full_name_hash(parent, filename.name, filename.len);
  
        dentry = d_lookup(parent, &filename);
  again:
@@@ -734,7 -734,7 +734,7 @@@ struct page *get_cache_page(nfs_readdir
        struct page *page;
  
        for (;;) {
 -              page = read_cache_page(file_inode(desc->file)->i_mapping,
 +              page = read_cache_page(desc->file->f_mapping,
                        desc->page_index, (filler_t *)nfs_readdir_filler, desc);
                if (IS_ERR(page) || grab_page(page))
                        break;
@@@ -1397,18 -1397,19 +1397,18 @@@ struct dentry *nfs_lookup(struct inode 
        if (IS_ERR(label))
                goto out;
  
 -      /* Protect against concurrent sillydeletes */
        trace_nfs_lookup_enter(dir, dentry, flags);
        error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
        if (error == -ENOENT)
                goto no_entry;
        if (error < 0) {
                res = ERR_PTR(error);
 -              goto out_unblock_sillyrename;
 +              goto out_label;
        }
        inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
        res = ERR_CAST(inode);
        if (IS_ERR(res))
 -              goto out_unblock_sillyrename;
 +              goto out_label;
  
        /* Success: notify readdir to use READDIRPLUS */
        nfs_advise_use_readdirplus(dir);
@@@ -1417,11 -1418,11 +1417,11 @@@ no_entry
        res = d_splice_alias(inode, dentry);
        if (res != NULL) {
                if (IS_ERR(res))
 -                      goto out_unblock_sillyrename;
 +                      goto out_label;
                dentry = res;
        }
        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 -out_unblock_sillyrename:
 +out_label:
        trace_nfs_lookup_exit(dir, dentry, flags, error);
        nfs4_label_free(label);
  out:
@@@ -1484,13 -1485,11 +1484,13 @@@ int nfs_atomic_open(struct inode *dir, 
                    struct file *file, unsigned open_flags,
                    umode_t mode, int *opened)
  {
 +      DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
        struct nfs_open_context *ctx;
        struct dentry *res;
        struct iattr attr = { .ia_valid = ATTR_OPEN };
        struct inode *inode;
        unsigned int lookup_flags = 0;
 +      bool switched = false;
        int err;
  
        /* Expect a negative dentry */
  
        /* NFS only supports OPEN on regular files */
        if ((open_flags & O_DIRECTORY)) {
 -              if (!d_unhashed(dentry)) {
 +              if (!d_in_lookup(dentry)) {
                        /*
                         * Hashed negative dentry with O_DIRECTORY: dentry was
                         * revalidated and is fine, no need to perform lookup
                attr.ia_size = 0;
        }
  
 +      if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
 +              d_drop(dentry);
 +              switched = true;
 +              dentry = d_alloc_parallel(dentry->d_parent,
 +                                        &dentry->d_name, &wq);
 +              if (IS_ERR(dentry))
 +                      return PTR_ERR(dentry);
 +              if (unlikely(!d_in_lookup(dentry)))
 +                      return finish_no_open(file, dentry);
 +      }
 +
        ctx = create_nfs_open_context(dentry, open_flags);
        err = PTR_ERR(ctx);
        if (IS_ERR(ctx))
        trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
        put_nfs_open_context(ctx);
  out:
 +      if (unlikely(switched)) {
 +              d_lookup_done(dentry);
 +              dput(dentry);
 +      }
        return err;
  
  no_open:
        res = nfs_lookup(dir, dentry, lookup_flags);
 -      err = PTR_ERR(res);
 +      if (switched) {
 +              d_lookup_done(dentry);
 +              if (!res)
 +                      res = dentry;
 +              else
 +                      dput(dentry);
 +      }
        if (IS_ERR(res))
 -              goto out;
 -
 +              return PTR_ERR(res);
        return finish_no_open(file, res);
  }
  EXPORT_SYMBOL_GPL(nfs_atomic_open);
@@@ -2252,21 -2231,37 +2252,37 @@@ static struct nfs_access_entry *nfs_acc
        return NULL;
  }
  
- static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res, bool may_block)
  {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_access_entry *cache;
-       int err = -ENOENT;
+       bool retry = true;
+       int err;
  
        spin_lock(&inode->i_lock);
-       if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
-               goto out_zap;
-       cache = nfs_access_search_rbtree(inode, cred);
-       if (cache == NULL)
-               goto out;
-       if (!nfs_have_delegated_attributes(inode) &&
-           !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
-               goto out_stale;
+       for(;;) {
+               if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+                       goto out_zap;
+               cache = nfs_access_search_rbtree(inode, cred);
+               err = -ENOENT;
+               if (cache == NULL)
+                       goto out;
+               /* Found an entry, is our attribute cache valid? */
+               if (!nfs_attribute_cache_expired(inode) &&
+                   !(nfsi->cache_validity & NFS_INO_INVALID_ATTR))
+                       break;
+               err = -ECHILD;
+               if (!may_block)
+                       goto out;
+               if (!retry)
+                       goto out_zap;
+               spin_unlock(&inode->i_lock);
+               err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+               if (err)
+                       return err;
+               spin_lock(&inode->i_lock);
+               retry = false;
+       }
        res->jiffies = cache->jiffies;
        res->cred = cache->cred;
        res->mask = cache->mask;
  out:
        spin_unlock(&inode->i_lock);
        return err;
- out_stale:
-       rb_erase(&cache->rb_node, &nfsi->access_cache);
-       list_del(&cache->lru);
-       spin_unlock(&inode->i_lock);
-       nfs_access_free_entry(cache);
-       return -ENOENT;
  out_zap:
        spin_unlock(&inode->i_lock);
        nfs_access_zap_cache(inode);
@@@ -2307,13 -2296,12 +2317,12 @@@ static int nfs_access_get_cached_rcu(st
                cache = NULL;
        if (cache == NULL)
                goto out;
-       if (!nfs_have_delegated_attributes(inode) &&
-           !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+       err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode);
+       if (err)
                goto out;
        res->jiffies = cache->jiffies;
        res->cred = cache->cred;
        res->mask = cache->mask;
-       err = 0;
  out:
        rcu_read_unlock();
        return err;
@@@ -2402,18 -2390,19 +2411,19 @@@ EXPORT_SYMBOL_GPL(nfs_access_set_mask)
  static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
  {
        struct nfs_access_entry cache;
+       bool may_block = (mask & MAY_NOT_BLOCK) == 0;
        int status;
  
        trace_nfs_access_enter(inode);
  
        status = nfs_access_get_cached_rcu(inode, cred, &cache);
        if (status != 0)
-               status = nfs_access_get_cached(inode, cred, &cache);
+               status = nfs_access_get_cached(inode, cred, &cache, may_block);
        if (status == 0)
                goto out_cached;
  
        status = -ECHILD;
-       if (mask & MAY_NOT_BLOCK)
+       if (!may_block)
                goto out;
  
        /* Be clever: ask server to check for all possible rights */
diff --combined fs/nfs/direct.c
index e6210ead71d06d941d7f7083899eec321b8cc7ae,f0f4b8d3d83f8ed0b0c3faf06357e4d47542d096..72b7d13ee3c6a14e489f6a2b94c2caf66ef8598f
@@@ -196,6 -196,12 +196,12 @@@ static void nfs_direct_set_hdr_verf(str
        WARN_ON_ONCE(verfp->committed < 0);
  }
  
+ static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1,
+               const struct nfs_writeverf *v2)
+ {
+       return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier);
+ }
  /*
   * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
   * @dreq - direct request possibly spanning multiple servers
@@@ -215,7 -221,7 +221,7 @@@ static int nfs_direct_set_or_cmp_hdr_ve
                nfs_direct_set_hdr_verf(dreq, hdr);
                return 0;
        }
-       return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+       return nfs_direct_cmp_verf(verfp, &hdr->verf);
  }
  
  /*
@@@ -238,13 -244,15 +244,13 @@@ static int nfs_direct_cmp_commit_data_v
        if (verfp->committed < 0)
                return 1;
  
-       return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+       return nfs_direct_cmp_verf(verfp, &data->verf);
  }
  
  /**
   * nfs_direct_IO - NFS address space operation for direct I/O
   * @iocb: target I/O control block
 - * @iov: array of vectors that define I/O buffer
 - * @pos: offset in file to begin the operation
 - * @nr_segs: size of iovec array
 + * @iter: I/O buffer
   *
   * The presence of this routine in the address space ops vector means
   * the NFS client supports direct I/O. However, for most direct IO, we
@@@ -366,22 -374,10 +372,10 @@@ out
   * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust
   * the iocb is still valid here if this is a synchronous request.
   */
- static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
+ static void nfs_direct_complete(struct nfs_direct_req *dreq)
  {
        struct inode *inode = dreq->inode;
  
-       if (dreq->iocb && write) {
-               loff_t pos = dreq->iocb->ki_pos + dreq->count;
-               spin_lock(&inode->i_lock);
-               if (i_size_read(inode) < pos)
-                       i_size_write(inode, pos);
-               spin_unlock(&inode->i_lock);
-       }
-       if (write)
-               nfs_zap_mapping(inode, inode->i_mapping);
        inode_dio_end(inode);
  
        if (dreq->iocb) {
@@@ -436,7 -432,7 +430,7 @@@ static void nfs_direct_read_completion(
        }
  out_put:
        if (put_dreq(dreq))
-               nfs_direct_complete(dreq, false);
+               nfs_direct_complete(dreq);
        hdr->release(hdr);
  }
  
@@@ -542,7 -538,7 +536,7 @@@ static ssize_t nfs_direct_read_schedule
        }
  
        if (put_dreq(dreq))
-               nfs_direct_complete(dreq, false);
+               nfs_direct_complete(dreq);
        return 0;
  }
  
@@@ -583,17 -579,12 +577,12 @@@ ssize_t nfs_file_direct_read(struct kio
        if (!count)
                goto out;
  
-       inode_lock(inode);
-       result = nfs_sync_mapping(mapping);
-       if (result)
-               goto out_unlock;
        task_io_account_read(count);
  
        result = -ENOMEM;
        dreq = nfs_direct_req_alloc();
        if (dreq == NULL)
-               goto out_unlock;
+               goto out;
  
        dreq->inode = inode;
        dreq->bytes_left = dreq->max_count = count;
        if (!is_sync_kiocb(iocb))
                dreq->iocb = iocb;
  
+       nfs_start_io_direct(inode);
        NFS_I(inode)->read_io += count;
        result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
  
-       inode_unlock(inode);
+       nfs_end_io_direct(inode);
  
        if (!result) {
                result = nfs_direct_wait(dreq);
                        iocb->ki_pos += result;
        }
  
-       nfs_direct_req_release(dreq);
-       return result;
  out_release:
        nfs_direct_req_release(dreq);
- out_unlock:
-       inode_unlock(inode);
  out:
        return result;
  }
@@@ -657,6 -645,8 +643,8 @@@ static void nfs_direct_write_reschedule
        nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
  
        dreq->count = 0;
+       dreq->verf.committed = NFS_INVALID_STABLE_HOW;
+       nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
        for (i = 0; i < dreq->mirror_count; i++)
                dreq->mirrors[i].count = 0;
        get_dreq(dreq);
@@@ -775,7 -765,8 +763,8 @@@ static void nfs_direct_write_schedule_w
                        nfs_direct_write_reschedule(dreq);
                        break;
                default:
-                       nfs_direct_complete(dreq, true);
+                       nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
+                       nfs_direct_complete(dreq);
        }
  }
  
@@@ -991,6 -982,7 +980,7 @@@ static ssize_t nfs_direct_write_schedul
  ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
  {
        ssize_t result = -EINVAL;
+       size_t count;
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
        dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
                file, iov_iter_count(iter), (long long) iocb->ki_pos);
  
-       nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES,
-                     iov_iter_count(iter));
+       result = generic_write_checks(iocb, iter);
+       if (result <= 0)
+               return result;
+       count = result;
+       nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
  
        pos = iocb->ki_pos;
        end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
  
-       inode_lock(inode);
-       result = nfs_sync_mapping(mapping);
-       if (result)
-               goto out_unlock;
-       if (mapping->nrpages) {
-               result = invalidate_inode_pages2_range(mapping,
-                                       pos >> PAGE_SHIFT, end);
-               if (result)
-                       goto out_unlock;
-       }
-       task_io_account_write(iov_iter_count(iter));
+       task_io_account_write(count);
  
        result = -ENOMEM;
        dreq = nfs_direct_req_alloc();
        if (!dreq)
-               goto out_unlock;
+               goto out;
  
        dreq->inode = inode;
-       dreq->bytes_left = dreq->max_count = iov_iter_count(iter);
+       dreq->bytes_left = dreq->max_count = count;
        dreq->io_start = pos;
        dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
        l_ctx = nfs_get_lock_context(dreq->ctx);
        if (!is_sync_kiocb(iocb))
                dreq->iocb = iocb;
  
+       nfs_start_io_direct(inode);
        result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
  
        if (mapping->nrpages) {
                                              pos >> PAGE_SHIFT, end);
        }
  
-       inode_unlock(inode);
+       nfs_end_io_direct(inode);
  
        if (!result) {
                result = nfs_direct_wait(dreq);
                if (result > 0) {
-                       struct inode *inode = mapping->host;
                        iocb->ki_pos = pos + result;
-                       spin_lock(&inode->i_lock);
-                       if (i_size_read(inode) < iocb->ki_pos)
-                               i_size_write(inode, iocb->ki_pos);
-                       spin_unlock(&inode->i_lock);
                        /* XXX: should check the generic_write_sync retval */
                        generic_write_sync(iocb, result);
                }
        }
-       nfs_direct_req_release(dreq);
-       return result;
  out_release:
        nfs_direct_req_release(dreq);
- out_unlock:
-       inode_unlock(inode);
+ out:
        return result;
  }
  
diff --combined fs/nfs/internal.h
index 5ea04d87fc653db7bf578853b1e97b165812864e,8de509b65e8db8513aa20698169e09c105207372..7ce5e023c3c3cd36056d1272cc16c9ebb1d1198a
@@@ -66,13 -66,16 +66,16 @@@ struct nfs_clone_mount 
  
  struct nfs_client_initdata {
        unsigned long init_flags;
-       const char *hostname;
-       const struct sockaddr *addr;
+       const char *hostname;                   /* Hostname of the server */
+       const struct sockaddr *addr;            /* Address of the server */
+       const char *nodename;                   /* Hostname of the client */
+       const char *ip_addr;                    /* IP address of the client */
        size_t addrlen;
        struct nfs_subversion *nfs_mod;
        int proto;
        u32 minorversion;
        struct net *net;
+       const struct rpc_timeout *timeparms;
  };
  
  /*
@@@ -147,9 -150,8 +150,8 @@@ extern void nfs_umount(const struct nfs
  extern const struct rpc_program nfs_program;
  extern void nfs_clients_init(struct net *net);
  extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *);
- int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t);
+ int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t);
  struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
-                                 const struct rpc_timeout *, const char *,
                                  rpc_authflavor_t);
  int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
  void nfs_server_insert_lists(struct nfs_server *);
@@@ -184,7 -186,7 +186,7 @@@ extern struct nfs_server *nfs_clone_ser
                                           rpc_authflavor_t);
  extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
- extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
+ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
                                             const struct sockaddr *ds_addr,
                                             int ds_addrlen, int ds_proto,
                                             unsigned int ds_timeo,
                                             rpc_authflavor_t au_flavor);
  extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
                                                struct inode *);
- extern struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp,
+ extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
                        const struct sockaddr *ds_addr, int ds_addrlen,
                        int ds_proto, unsigned int ds_timeo,
                        unsigned int ds_retrans, rpc_authflavor_t au_flavor);
@@@ -338,8 -340,7 +340,7 @@@ nfs4_label_copy(struct nfs4_label *dst
  /* proc.c */
  void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
  extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
-                          const struct rpc_timeout *timeparms,
-                          const char *ip_addr);
+                          const struct nfs_client_initdata *);
  
  /* dir.c */
  extern void nfs_force_use_readdirplus(struct inode *dir);
@@@ -411,6 -412,19 +412,19 @@@ extern void __exit unregister_nfs_fs(vo
  extern bool nfs_sb_active(struct super_block *sb);
  extern void nfs_sb_deactive(struct super_block *sb);
  
+ /* io.c */
+ extern void nfs_start_io_read(struct inode *inode);
+ extern void nfs_end_io_read(struct inode *inode);
+ extern void nfs_start_io_write(struct inode *inode);
+ extern void nfs_end_io_write(struct inode *inode);
+ extern void nfs_start_io_direct(struct inode *inode);
+ extern void nfs_end_io_direct(struct inode *inode);
+ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
+ {
+       return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0;
+ }
  /* namespace.c */
  #define NFS_PATH_CANONICAL 1
  extern char *nfs_path(char **p, struct dentry *dentry,
@@@ -496,9 -510,29 +510,29 @@@ void nfs_init_cinfo(struct nfs_commit_i
                    struct inode *inode,
                    struct nfs_direct_req *dreq);
  int nfs_key_timeout_notify(struct file *filp, struct inode *inode);
- bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx);
+ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode);
  void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio);
  
+ int nfs_filemap_write_and_wait_range(struct address_space *mapping,
+               loff_t lstart, loff_t lend);
+ #ifdef CONFIG_NFS_V4_1
+ static inline
+ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
+ {
+       int i;
+       for (i = 0; i < cinfo->nbuckets; i++)
+               cinfo->buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+ }
+ #else
+ static inline
+ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
+ {
+ }
+ #endif
  #ifdef CONFIG_MIGRATION
  extern int nfs_migrate_page(struct address_space *,
                struct page *, struct page *, enum migrate_mode);
  #define nfs_migrate_page NULL
  #endif
  
+ static inline int
+ nfs_write_verifier_cmp(const struct nfs_write_verifier *v1,
+               const struct nfs_write_verifier *v2)
+ {
+       return memcmp(v1->data, v2->data, sizeof(v1->data));
+ }
  /* unlink.c */
  extern struct rpc_task *
  nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
@@@ -521,8 -562,7 +562,7 @@@ extern ssize_t nfs_dreq_bytes_left(stru
  /* nfs4proc.c */
  extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
  extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
-                           const struct rpc_timeout *timeparms,
-                           const char *ip_addr);
+                           const struct nfs_client_initdata *);
  extern int nfs40_walk_client_list(struct nfs_client *clp,
                                struct nfs_client **result,
                                struct rpc_cred *cred);
@@@ -623,7 -663,7 +663,7 @@@ void nfs_mark_page_unstable(struct pag
        if (!cinfo->dreq) {
                struct inode *inode = page_file_mapping(page)->host;
  
 -              inc_zone_page_state(page, NR_UNSTABLE_NFS);
 +              inc_node_page_state(page, NR_UNSTABLE_NFS);
                inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE);
                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
        }
diff --combined fs/nfs/nfstrace.h
index 31c7763b94d58803c832e08f2d046f27de7c9c2f,fe80a1c26340f3c24b4bbc06292e3e5e77af4b79..2ca9167bc97d0468d7dddc284efbbae56ebd8bbf
@@@ -37,7 -37,6 +37,6 @@@
                        { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
                        { 1 << NFS_INO_STALE, "STALE" }, \
                        { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
-                       { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
                        { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
                        { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
                        { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
@@@ -707,9 -706,9 +706,9 @@@ TRACE_EVENT(nfs_sillyrename_unlink
                        __entry->dev = dir->i_sb->s_dev;
                        __entry->dir = NFS_FILEID(dir);
                        __entry->error = error;
 -                      memcpy(__get_dynamic_array(name),
 +                      memcpy(__get_str(name),
                                data->args.name.name, len);
 -                      ((char *)__get_dynamic_array(name))[len] = 0;
 +                      __get_str(name)[len] = 0;
                ),
  
                TP_printk(
diff --combined fs/nfs/write.c
index 593fa21a02c07a9dca3f45ce0ef8f87edad61a5f,510c5befdf9bcc5e5a8bd2434f6af741327e976e..3a6724c6eb5ffbd6e83e45354cb2d4d068577527
@@@ -625,7 -625,7 +625,7 @@@ static int nfs_writepage_locked(struct 
        int err;
  
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-       nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+       nfs_pageio_init_write(&pgio, inode, 0,
                                false, &nfs_async_write_completion_ops);
        err = nfs_do_writepage(page, wbc, &pgio, launder);
        nfs_pageio_complete(&pgio);
@@@ -657,16 -657,9 +657,9 @@@ static int nfs_writepages_callback(stru
  int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
  {
        struct inode *inode = mapping->host;
-       unsigned long *bitlock = &NFS_I(inode)->flags;
        struct nfs_pageio_descriptor pgio;
        int err;
  
-       /* Stop dirtying of new pages while we sync */
-       err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
-                       nfs_wait_bit_killable, TASK_KILLABLE);
-       if (err)
-               goto out_err;
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
  
        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
        nfs_pageio_complete(&pgio);
  
-       clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
-       smp_mb__after_atomic();
-       wake_up_bit(bitlock, NFS_INO_FLUSHING);
        if (err < 0)
                goto out_err;
        err = pgio.pg_error;
@@@ -898,7 -887,7 +887,7 @@@ nfs_mark_request_commit(struct nfs_pag
  static void
  nfs_clear_page_commit(struct page *page)
  {
 -      dec_zone_page_state(page, NR_UNSTABLE_NFS);
 +      dec_node_page_state(page, NR_UNSTABLE_NFS);
        dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
                    WB_RECLAIMABLE);
  }
@@@ -1195,9 -1184,11 +1184,11 @@@ nfs_key_timeout_notify(struct file *fil
  /*
   * Test if the open context credential key is marked to expire soon.
   */
- bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx)
+ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
  {
-       return rpcauth_cred_key_to_expire(ctx->cred);
+       struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
+       return rpcauth_cred_key_to_expire(auth, ctx->cred);
  }
  
  /*
@@@ -1289,6 -1280,9 +1280,9 @@@ int nfs_updatepage(struct file *file, s
        dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
                file, count, (long long)(page_file_offset(page) + offset));
  
+       if (!count)
+               goto out;
        if (nfs_can_extend_write(file, page, inode)) {
                count = max(count + offset, nfs_page_length(page));
                offset = 0;
                nfs_set_pageerror(page);
        else
                __set_page_dirty_nobuffers(page);
+ out:
        dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
                        status, (long long)i_size_read(inode));
        return status;
@@@ -1800,7 -1794,7 +1794,7 @@@ static void nfs_commit_release_pages(st
  
                /* Okay, COMMIT succeeded, apparently. Check the verifier
                 * returned by the server against all stored verfs. */
-               if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) {
+               if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
                        /* We have a match */
                        nfs_inode_remove_request(req);
                        dprintk(" OK\n");
@@@ -1923,6 -1917,24 +1917,24 @@@ out_mark_dirty
  }
  EXPORT_SYMBOL_GPL(nfs_write_inode);
  
+ /*
+  * Wrapper for filemap_write_and_wait_range()
+  *
+  * Needed for pNFS in order to ensure data becomes visible to the
+  * client.
+  */
+ int nfs_filemap_write_and_wait_range(struct address_space *mapping,
+               loff_t lstart, loff_t lend)
+ {
+       int ret;
+       ret = filemap_write_and_wait_range(mapping, lstart, lend);
+       if (ret == 0)
+               ret = pnfs_sync_inode(mapping->host, true);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);
  /*
   * flush the inode to disk.
   */