NFS: Remove pNFS bloat from the generic write path

[linux-2.6-block.git] / fs / nfs / nfs4proc.c
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index a64aa56e140a905cfd73ba9ce4b6723589d27ad6..3b1080118452ba8d3b5fc860d9d99db8b900d8ad 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -363,9 +363,8 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
   * Must be called while holding tbl->slot_tbl_lock
   */
  static void
-nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
+nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
  {
-       int free_slotid = free_slot - tbl->slots;
         int slotid = free_slotid;
  
         BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
@@ -430,7 +429,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
         }
  
         spin_lock(&tbl->slot_tbl_lock);
-       nfs4_free_slot(tbl, res->sr_slot);
+       nfs4_free_slot(tbl, res->sr_slot - tbl->slots);
         nfs4_check_drain_fc_complete(res->sr_session);
         spin_unlock(&tbl->slot_tbl_lock);
         res->sr_slot = NULL;
@@ -553,13 +552,10 @@ int nfs41_setup_sequence(struct nfs4_session *session,
         spin_lock(&tbl->slot_tbl_lock);
         if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
             !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
-               /*
-                * The state manager will wait until the slot table is empty.
-                * Schedule the reset thread
-                */
+               /* The state manager will wait until the slot table is empty */
                 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
                 spin_unlock(&tbl->slot_tbl_lock);
-               dprintk("%s Schedule Session Reset\n", __func__);
+               dprintk("%s session is draining\n", __func__);
                 return -EAGAIN;
         }
  
@@ -3430,19 +3426,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
   */
  #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
  
-static void buf_to_pages(const void *buf, size_t buflen,
-               struct page **pages, unsigned int *pgbase)
-{
-       const void *p = buf;
-
-       *pgbase = offset_in_page(buf);
-       p -= *pgbase;
-       while (p < buf + buflen) {
-               *(pages++) = virt_to_page(p);
-               p += PAGE_CACHE_SIZE;
-       }
-}
-
  static int buf_to_pages_noslab(const void *buf, size_t buflen,
                 struct page **pages, unsigned int *pgbase)
  {
@@ -3539,9 +3522,19 @@ out:
         nfs4_set_cached_acl(inode, acl);
  }
  
+/*
+ * The getxattr API returns the required buffer length when called with a
+ * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating
+ * the required buf.  On a NULL buf, we send a page of data to the server
+ * guessing that the ACL request can be serviced by a page. If so, we cache
+ * up to the page of ACL data, and the 2nd call to getxattr is serviced by
+ * the cache. If not so, we throw away the page, and cache the required
+ * length. The next getxattr call will then produce another round trip to
+ * the server, this time with the input buf of the required size.
+ */
  static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
  {
-       struct page *pages[NFS4ACL_MAXPAGES];
+       struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
         struct nfs_getaclargs args = {
                 .fh = NFS_FH(inode),
                 .acl_pages = pages,
@@ -3556,41 +3549,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
                 .rpc_argp = &args,
                 .rpc_resp = &res,
         };
-       struct page *localpage = NULL;
-       int ret;
+       int ret = -ENOMEM, npages, i, acl_len = 0;
  
-       if (buflen < PAGE_SIZE) {
-               /* As long as we're doing a round trip to the server anyway,
-                * let's be prepared for a page of acl data. */
-               localpage = alloc_page(GFP_KERNEL);
-               resp_buf = page_address(localpage);
-               if (localpage == NULL)
-                       return -ENOMEM;
-               args.acl_pages[0] = localpage;
-               args.acl_pgbase = 0;
-               args.acl_len = PAGE_SIZE;
-       } else {
-               resp_buf = buf;
-               buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+       npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       /* As long as we're doing a round trip to the server anyway,
+        * let's be prepared for a page of acl data. */
+       if (npages == 0)
+               npages = 1;
+
+       for (i = 0; i < npages; i++) {
+               pages[i] = alloc_page(GFP_KERNEL);
+               if (!pages[i])
+                       goto out_free;
+       }
+       if (npages > 1) {
+               /* for decoding across pages */
+               args.acl_scratch = alloc_page(GFP_KERNEL);
+               if (!args.acl_scratch)
+                       goto out_free;
         }
-       ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
+       args.acl_len = npages * PAGE_SIZE;
+       args.acl_pgbase = 0;
+       /* Let decode_getfacl know not to fail if the ACL data is larger than
+        * the page we send as a guess */
+       if (buf == NULL)
+               res.acl_flags |= NFS4_ACL_LEN_REQUEST;
+       resp_buf = page_address(pages[0]);
+
+       dprintk("%s  buf %p buflen %ld npages %d args.acl_len %ld\n",
+               __func__, buf, buflen, npages, args.acl_len);
+       ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
+                            &msg, &args.seq_args, &res.seq_res, 0);
         if (ret)
                 goto out_free;
-       if (res.acl_len > args.acl_len)
-               nfs4_write_cached_acl(inode, NULL, res.acl_len);
+
+       acl_len = res.acl_len - res.acl_data_offset;
+       if (acl_len > args.acl_len)
+               nfs4_write_cached_acl(inode, NULL, acl_len);
         else
-               nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
+               nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset,
+                                     acl_len);
         if (buf) {
                 ret = -ERANGE;
-               if (res.acl_len > buflen)
+               if (acl_len > buflen)
                         goto out_free;
-               if (localpage)
-                       memcpy(buf, resp_buf, res.acl_len);
+               _copy_from_pages(buf, pages, res.acl_data_offset,
+                               res.acl_len);
         }
-       ret = res.acl_len;
+       ret = acl_len;
  out_free:
-       if (localpage)
-               __free_page(localpage);
+       for (i = 0; i < npages; i++)
+               if (pages[i])
+                       __free_page(pages[i]);
+       if (args.acl_scratch)
+               __free_page(args.acl_scratch);
         return ret;
  }
  
@@ -3621,6 +3633,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
                 nfs_zap_acl_cache(inode);
         ret = nfs4_read_cached_acl(inode, buf, buflen);
         if (ret != -ENOENT)
+               /* -ENOENT is returned if there is no ACL or if there is an ACL
+                * but no cached acl data, just the acl length */
                 return ret;
         return nfs4_get_acl_uncached(inode, buf, buflen);
  }