Merge branch 'for-2.6.31' of git://fieldses.org/git/linux-nfsd

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 22 Jun 2009 19:55:50 +0000 (12:55 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 22 Jun 2009 19:55:50 +0000 (12:55 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Jun 2009 19:55:50 +0000 (12:55 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Jun 2009 19:55:50 +0000 (12:55 -0700)
diff --combined fs/Kconfig

index d78e950402c1e2a4c91bc5ae58dc8011edc885bb,44ab328ceb2a03e08e2834145b85db951efeb705..a97263be6a91d4927bef9dd8c3e834c7c251a472
--- 1/fs/Kconfig
--- 2/fs/Kconfig
+++ b/fs/Kconfig
@@@ -39,13 -39,6 +39,13 @@@ config FS_POSIX_AC
         bool
         default n
   
+ +source "fs/xfs/Kconfig"
+ +source "fs/gfs2/Kconfig"
+ +source "fs/ocfs2/Kconfig"
+ +source "fs/btrfs/Kconfig"
+ +
+ +endif # BLOCK
+ +
   config FILE_LOCKING
         bool "Enable POSIX file locking API" if EMBEDDED
         default y
@@@ -54,6 -47,13 +54,6 @@@
             for filesystems like NFS and for the flock() system
             call. Disabling this option saves about 11k.
   
- -source "fs/xfs/Kconfig"
- -source "fs/gfs2/Kconfig"
- -source "fs/ocfs2/Kconfig"
- -source "fs/btrfs/Kconfig"
- -
- -endif # BLOCK
- -
   source "fs/notify/Kconfig"
   
   source "fs/quota/Kconfig"
@@@ -62,16 -62,6 +62,16 @@@ source "fs/autofs/Kconfig
   source "fs/autofs4/Kconfig"
   source "fs/fuse/Kconfig"
   
+ +config CUSE
+ +      tristate "Character device in Userpace support"
+ +      depends on FUSE_FS
+ +      help
+ +        This FUSE extension allows character devices to be
+ +        implemented in userspace.
+ +
+ +        If you want to develop or use userspace character device
+ +        based on CUSE, answer Y or M.
+ +
   config GENERIC_ACL
         bool
         select FS_POSIX_ACL
@@@ -134,7 -124,7 +134,7 @@@ config TMPFS_POSIX_AC
   config HUGETLBFS
         bool "HugeTLB file system support"
         depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
- -                 (S390 && 64BIT) || BROKEN
+ +                 (S390 && 64BIT) || SYS_SUPPORTS_HUGETLBFS || BROKEN
         help
           hugetlbfs is a filesystem backing for HugeTLB pages, based on
           ramfs. For architectures that support it, say Y here and read
@@@ -236,10 -226,12 +236,12 @@@ source "fs/nfsd/Kconfig
   
   config LOCKD
         tristate
+       depends on FILE_LOCKING
   
   config LOCKD_V4
         bool
         depends on NFSD_V3 || NFS_V3
+       depends on FILE_LOCKING
         default y
   
   config EXPORTFS
diff --combined fs/nfs/Kconfig

index 5d6d6f4159357c1f11dd29d93343daa43e0c8135,7dbb8f27b9d6c914fa9b5147372071abe61f53c8..2a77bc25d5afc81d9510c9f1825d842deb727b80
--- 1/fs/nfs/Kconfig
--- 2/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@@ -1,6 -1,6 +1,6 @@@
   config NFS_FS
         tristate "NFS client support"
-       depends on INET
+       depends on INET && FILE_LOCKING
         select LOCKD
         select SUNRPC
         select NFS_ACL_SUPPORT if NFS_V3_ACL
@@@ -74,15 -74,6 +74,15 @@@ config NFS_V
   
           If unsure, say N.
   
+ +config NFS_V4_1
+ +      bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)"
+ +      depends on NFS_V4 && EXPERIMENTAL
+ +      help
+ +        This option enables support for minor version 1 of the NFSv4 protocol
+ +        (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
+ +
+ +        Unless you're an NFS developer, say N.
+ +
   config ROOT_NFS
         bool "Root file system on NFS"
         depends on NFS_FS=y && IP_PNP
diff --combined fs/nfsd/export.c

index 8b1f8efb4690e2f9271adac75ad71a54c4198fe3,6eb918153fd41d49529d350c424619df23bc0c3a..b92a27629fb79ae8efb4ec508212512321c0a401
--- 1/fs/nfsd/export.c
--- 2/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@@ -464,16 -464,11 +464,11 @@@ static int secinfo_parse(char **mesg, c
                 if (err)
                         return err;
                 /*
-                * Just a quick sanity check; we could also try to check
-                * whether this pseudoflavor is supported, but at worst
-                * an unsupported pseudoflavor on the export would just
-                * be a pseudoflavor that won't match the flavor of any
-                * authenticated request.  The administrator will
-                * probably discover the problem when someone fails to
-                * authenticate.
+                * XXX: It would be nice to also check whether this
+                * pseudoflavor is supported, so we can discover the
+                * problem at export time instead of when a client fails
+                * to authenticate.
                  */
-               if (f->pseudoflavor < 0)
-                       return -EINVAL;
                 err = get_int(mesg, &f->flags);
                 if (err)
                         return err;
@@@ -847,8 -842,9 +842,8 @@@ exp_get_fsid_key(svc_client *clp, int f
         return exp_find_key(clp, FSID_NUM, fsidv, NULL);
   }
   
- -static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
- -                                 struct dentry *dentry,
- -                                 struct cache_req *reqp)
+ +static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
+ +                                   struct cache_req *reqp)
   {
         struct svc_export *exp, key;
         int err;
@@@ -857,7 -853,8 +852,7 @@@
                 return ERR_PTR(-ENOENT);
   
         key.ex_client = clp;
- -      key.ex_path.mnt = mnt;
- -      key.ex_path.dentry = dentry;
+ +      key.ex_path = *path;
   
         exp = svc_export_lookup(&key);
         if (exp == NULL)
@@@ -871,19 -868,24 +866,19 @@@
   /*
    * Find the export entry for a given dentry.
    */
- -static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
- -                                   struct dentry *dentry,
- -                                   struct cache_req *reqp)
+ +static struct svc_export *exp_parent(svc_client *clp, struct path *path)
   {
- -      svc_export *exp;
- -
- -      dget(dentry);
- -      exp = exp_get_by_name(clp, mnt, dentry, reqp);
- -
- -      while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
- -              struct dentry *parent;
- -
- -              parent = dget_parent(dentry);
- -              dput(dentry);
- -              dentry = parent;
- -              exp = exp_get_by_name(clp, mnt, dentry, reqp);
+ +      struct dentry *saved = dget(path->dentry);
+ +      svc_export *exp = exp_get_by_name(clp, path, NULL);
+ +
+ +      while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+ +              struct dentry *parent = dget_parent(path->dentry);
+ +              dput(path->dentry);
+ +              path->dentry = parent;
+ +              exp = exp_get_by_name(clp, path, NULL);
         }
- -      dput(dentry);
+ +      dput(path->dentry);
+ +      path->dentry = saved;
         return exp;
   }
   
@@@ -1011,7 -1013,7 +1006,7 @@@ exp_export(struct nfsctl_export *nxp
                 goto out_put_clp;
         err = -EINVAL;
   
- -      exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
+ +      exp = exp_get_by_name(clp, &path, NULL);
   
         memset(&new, 0, sizeof(new));
   
@@@ -1128,7 -1130,7 +1123,7 @@@ exp_unexport(struct nfsctl_export *nxp
                 goto out_domain;
   
         err = -EINVAL;
- -      exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
+ +      exp = exp_get_by_name(dom, &path, NULL);
         path_put(&path);
         if (IS_ERR(exp))
                 goto out_domain;
@@@ -1170,7 -1172,7 +1165,7 @@@ exp_rootfh(svc_client *clp, char *name
         dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
                  name, path.dentry, clp->name,
                  inode->i_sb->s_id, inode->i_ino);
- -      exp = exp_parent(clp, path.mnt, path.dentry, NULL);
+ +      exp = exp_parent(clp, &path);
         if (IS_ERR(exp)) {
                 err = PTR_ERR(exp);
                 goto out;
@@@ -1200,7 -1202,7 +1195,7 @@@ static struct svc_export *exp_find(stru
         if (IS_ERR(ek))
                 return ERR_CAST(ek);
   
- -      exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp);
+ +      exp = exp_get_by_name(clp, &ek->ek_path, reqp);
         cache_put(&ek->h, &svc_expkey_cache);
   
         if (IS_ERR(exp))
@@@ -1240,7 -1242,8 +1235,7 @@@ __be32 check_nfsd_access(struct svc_exp
    * use exp_get_by_name() or exp_find().
    */
   struct svc_export *
- -rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
- -              struct dentry *dentry)
+ +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
   {
         struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
   
@@@ -1248,7 -1251,8 +1243,7 @@@
                 goto gss;
   
         /* First try the auth_unix client: */
- -      exp = exp_get_by_name(rqstp->rq_client, mnt, dentry,
- -                                              &rqstp->rq_chandle);
+ +      exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
         if (PTR_ERR(exp) == -ENOENT)
                 goto gss;
         if (IS_ERR(exp))
@@@ -1260,7 -1264,8 +1255,7 @@@ gss
         /* Otherwise, try falling back on gss client */
         if (rqstp->rq_gssclient == NULL)
                 return exp;
- -      gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry,
- -                                              &rqstp->rq_chandle);
+ +      gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
         if (PTR_ERR(gssexp) == -ENOENT)
                 return exp;
         if (!IS_ERR(exp))
@@@ -1299,19 -1304,23 +1294,19 @@@ gss
   }
   
   struct svc_export *
- -rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
- -              struct dentry *dentry)
+ +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
   {
- -      struct svc_export *exp;
- -
- -      dget(dentry);
- -      exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
- -
- -      while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
- -              struct dentry *parent;
- -
- -              parent = dget_parent(dentry);
- -              dput(dentry);
- -              dentry = parent;
- -              exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+ +      struct dentry *saved = dget(path->dentry);
+ +      struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
+ +
+ +      while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+ +              struct dentry *parent = dget_parent(path->dentry);
+ +              dput(path->dentry);
+ +              path->dentry = parent;
+ +              exp = rqst_exp_get_by_name(rqstp, path);
         }
- -      dput(dentry);
+ +      dput(path->dentry);
+ +      path->dentry = saved;
         return exp;
   }
   
diff --combined fs/nfsd/vfs.c

index 99f835753596914a419a53a824c912a830f6a0df,1cf70616a11ebd57bec1f0e372e54fe2b386c68b..4145083dcf8817ed883652939cac1faa4dfd7cac
--- 1/fs/nfsd/vfs.c
--- 2/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@@ -55,7 -55,6 +55,7 @@@
   #include <linux/security.h>
   #endif /* CONFIG_NFSD_V4 */
   #include <linux/jhash.h>
+ +#include <linux/ima.h>
   
   #include <asm/uaccess.h>
   
@@@ -101,35 -100,36 +101,35 @@@ nfsd_cross_mnt(struct svc_rqst *rqstp, 
   {
         struct svc_export *exp = *expp, *exp2 = NULL;
         struct dentry *dentry = *dpp;
- -      struct vfsmount *mnt = mntget(exp->ex_path.mnt);
- -      struct dentry *mounts = dget(dentry);
+ +      struct path path = {.mnt = mntget(exp->ex_path.mnt),
+ +                          .dentry = dget(dentry)};
         int err = 0;
   
- -      while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+ +      while (d_mountpoint(path.dentry) && follow_down(&path))
+ +              ;
   
- -      exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+ +      exp2 = rqst_exp_get_by_name(rqstp, &path);
         if (IS_ERR(exp2)) {
                 if (PTR_ERR(exp2) != -ENOENT)
                         err = PTR_ERR(exp2);
- -              dput(mounts);
- -              mntput(mnt);
+ +              path_put(&path);
                 goto out;
         }
         if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
                 /* successfully crossed mount point */
                 /*
- -               * This is subtle: dentry is *not* under mnt at this point.
- -               * The only reason we are safe is that original mnt is pinned
- -               * down by exp, so we should dput before putting exp.
+ +               * This is subtle: path.dentry is *not* on path.mnt
+ +               * at this point.  The only reason we are safe is that
+ +               * original mnt is pinned down by exp, so we should
+ +               * put path *before* putting exp
                  */
- -              dput(dentry);
- -              *dpp = mounts;
- -              exp_put(exp);
+ +              *dpp = path.dentry;
+ +              path.dentry = dentry;
                 *expp = exp2;
- -      } else {
- -              exp_put(exp2);
- -              dput(mounts);
+ +              exp2 = exp;
         }
- -      mntput(mnt);
+ +      path_put(&path);
+ +      exp_put(exp2);
   out:
         return err;
   }
@@@ -168,29 -168,28 +168,29 @@@ nfsd_lookup_dentry(struct svc_rqst *rqs
                         /* checking mountpoint crossing is very different when stepping up */
                         struct svc_export *exp2 = NULL;
                         struct dentry *dp;
- -                      struct vfsmount *mnt = mntget(exp->ex_path.mnt);
- -                      dentry = dget(dparent);
- -                      while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+ +                      struct path path = {.mnt = mntget(exp->ex_path.mnt),
+ +                                          .dentry = dget(dparent)};
+ +
+ +                      while (path.dentry == path.mnt->mnt_root &&
+ +                             follow_up(&path))
                                 ;
- -                      dp = dget_parent(dentry);
- -                      dput(dentry);
- -                      dentry = dp;
+ +                      dp = dget_parent(path.dentry);
+ +                      dput(path.dentry);
+ +                      path.dentry = dp;
   
- -                      exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+ +                      exp2 = rqst_exp_parent(rqstp, &path);
                         if (PTR_ERR(exp2) == -ENOENT) {
- -                              dput(dentry);
                                 dentry = dget(dparent);
                         } else if (IS_ERR(exp2)) {
                                 host_err = PTR_ERR(exp2);
- -                              dput(dentry);
- -                              mntput(mnt);
+ +                              path_put(&path);
                                 goto out_nfserr;
                         } else {
+ +                              dentry = dget(path.dentry);
                                 exp_put(exp);
                                 exp = exp2;
                         }
- -                      mntput(mnt);
+ +                      path_put(&path);
                 }
         } else {
                 fh_lock(fhp);
@@@ -736,8 -735,6 +736,8 @@@ nfsd_open(struct svc_rqst *rqstp, struc
                             flags, cred);
         if (IS_ERR(*filp))
                 host_err = PTR_ERR(*filp);
+ +      else
+ +              ima_counts_get(*filp);
   out_nfserr:
         err = nfserrno(host_err);
   out:
@@@ -966,6 -963,43 +966,43 @@@ static void kill_suid(struct dentry *de
         mutex_unlock(&dentry->d_inode->i_mutex);
   }
   
+ /*
+  * Gathered writes: If another process is currently writing to the file,
+  * there's a high chance this is another nfsd (triggered by a bulk write
+  * from a client's biod). Rather than syncing the file with each write
+  * request, we sleep for 10 msec.
+  *
+  * I don't know if this roughly approximates C. Juszak's idea of
+  * gathered writes, but it's a nice and simple solution (IMHO), and it
+  * seems to work:-)
+  *
+  * Note: we do this only in the NFSv2 case, since v3 and higher have a
+  * better tool (separate unstable writes and commits) for solving this
+  * problem.
+  */
+ static int wait_for_concurrent_writes(struct file *file)
+ {
+       struct inode *inode = file->f_path.dentry->d_inode;
+       static ino_t last_ino;
+       static dev_t last_dev;
+       int err = 0;
+ 
+       if (atomic_read(&inode->i_writecount) > 1
+           || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+               dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+               msleep(10);
+               dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+       }
+ 
+       if (inode->i_state & I_DIRTY) {
+               dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+               err = nfsd_sync(file);
+       }
+       last_ino = inode->i_ino;
+       last_dev = inode->i_sb->s_dev;
+       return err;
+ }
+ 
   static __be32
   nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                 loff_t offset, struct kvec *vec, int vlen,
@@@ -978,6 -1012,7 +1015,7 @@@
         __be32                  err = 0;
         int                     host_err;
         int                     stable = *stablep;
+       int                     use_wgather;
   
   #ifdef MSNFS
         err = nfserr_perm;
@@@ -996,9 -1031,10 +1034,10 @@@
          *  -   the sync export option has been set, or
          *  -   the client requested O_SYNC behavior (NFSv3 feature).
          *  -   The file system doesn't support fsync().
-        * When gathered writes have been configured for this volume,
+        * When NFSv2 gathered writes have been configured for this volume,
          * flushing the data to disk is handled separately below.
          */
+       use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
   
         if (!file->f_op->fsync) {/* COMMIT3 cannot work */
                stable = 2;
@@@ -1007,7 -1043,7 +1046,7 @@@
   
         if (!EX_ISSYNC(exp))
                 stable = 0;
-       if (stable && !EX_WGATHER(exp)) {
+       if (stable && !use_wgather) {
                 spin_lock(&file->f_lock);
                 file->f_flags |= O_SYNC;
                 spin_unlock(&file->f_lock);
@@@ -1017,52 -1053,20 +1056,20 @@@
         oldfs = get_fs(); set_fs(KERNEL_DS);
         host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
         set_fs(oldfs);
-       if (host_err >= 0) {
-               *cnt = host_err;
-               nfsdstats.io_write += host_err;
-               fsnotify_modify(file->f_path.dentry);
-       }
+       if (host_err < 0)
+               goto out_nfserr;
+       *cnt = host_err;
+       nfsdstats.io_write += host_err;
+       fsnotify_modify(file->f_path.dentry);
   
         /* clear setuid/setgid flag after write */
-       if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+       if (inode->i_mode & (S_ISUID | S_ISGID))
                 kill_suid(dentry);
   
-       if (host_err >= 0 && stable) {
-               static ino_t    last_ino;
-               static dev_t    last_dev;
- 
-               /*
-                * Gathered writes: If another process is currently
-                * writing to the file, there's a high chance
-                * this is another nfsd (triggered by a bulk write
-                * from a client's biod). Rather than syncing the
-                * file with each write request, we sleep for 10 msec.
-                *
-                * I don't know if this roughly approximates
-                * C. Juszak's idea of gathered writes, but it's a
-                * nice and simple solution (IMHO), and it seems to
-                * work:-)
-                */
-               if (EX_WGATHER(exp)) {
-                       if (atomic_read(&inode->i_writecount) > 1
-                           || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-                               dprintk("nfsd: write defer %d\n", task_pid_nr(current));
-                               msleep(10);
-                               dprintk("nfsd: write resume %d\n", task_pid_nr(current));
-                       }
- 
-                       if (inode->i_state & I_DIRTY) {
-                               dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-                               host_err=nfsd_sync(file);
-                       }
- #if 0
-                       wake_up(&inode->i_wait);
- #endif
-               }
-               last_ino = inode->i_ino;
-               last_dev = inode->i_sb->s_dev;
-       }
+       if (stable && use_wgather)
+               host_err = wait_for_concurrent_writes(file);
   
+ out_nfserr:
         dprintk("nfsd: write complete host_err=%d\n", host_err);
         if (host_err >= 0)
                 err = 0;
@@@ -2027,7 -2031,6 +2034,7 @@@ nfsd_permission(struct svc_rqst *rqstp
                                         struct dentry *dentry, int acc)
   {
         struct inode    *inode = dentry->d_inode;
+ +      struct path     path;
         int             err;
   
         if (acc == NFSD_MAY_NOP)
@@@ -2100,17 -2103,7 +2107,17 @@@
         if (err == -EACCES && S_ISREG(inode->i_mode) &&
             acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
                 err = inode_permission(inode, MAY_EXEC);
+ +      if (err)
+ +              goto nfsd_out;
   
+ +      /* Do integrity (permission) checking now, but defer incrementing
+ +       * IMA counts to the actual file open.
+ +       */
+ +      path.mnt = exp->ex_path.mnt;
+ +      path.dentry = dentry;
+ +      err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
+ +                           IMA_COUNT_LEAVE);
+ +nfsd_out:
         return err? nfserrno(err) : 0;
   }
   
diff --combined include/linux/fs.h

index 74a57938c8801dafadeea3e0519f4849f01afd91,58e843b26b98a4a25dd40be34682a47055b27f9c..1ff5e4e019524124ece3f762f36b841b0d2e1d73
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -729,8 -729,8 +729,8 @@@ struct inode 
         struct timespec         i_atime;
         struct timespec         i_mtime;
         struct timespec         i_ctime;
- -      unsigned int            i_blkbits;
         blkcnt_t                i_blocks;
+ +      unsigned int            i_blkbits;
         unsigned short          i_bytes;
         umode_t                 i_mode;
         spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
@@@ -751,12 -751,13 +751,12 @@@
                 struct block_device     *i_bdev;
                 struct cdev             *i_cdev;
         };
- -      int                     i_cindex;
   
         __u32                   i_generation;
   
- -#ifdef CONFIG_DNOTIFY
- -      unsigned long           i_dnotify_mask; /* Directory notify events */
- -      struct dnotify_struct   *i_dnotify; /* for directory notifications */
+ +#ifdef CONFIG_FSNOTIFY
+ +      __u32                   i_fsnotify_mask; /* all events this inode cares about */
+ +      struct hlist_head       i_fsnotify_mark_entries; /* fsnotify mark entries */
   #endif
   
   #ifdef CONFIG_INOTIFY
@@@ -879,7 -880,7 +879,7 @@@ struct file_ra_state 
                                            there are only # of pages ahead */
   
         unsigned int ra_pages;          /* Maximum readahead window */
- -      int mmap_miss;                  /* Cache miss stat for mmap accesses */
+ +      unsigned int mmap_miss;         /* Cache miss stat for mmap accesses */
         loff_t prev_pos;                /* Cache last read() position */
   };
   
@@@ -1107,6 -1108,7 +1107,7 @@@ extern void locks_copy_lock(struct file
   extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
   extern void locks_remove_posix(struct file *, fl_owner_t);
   extern void locks_remove_flock(struct file *);
+ extern void locks_release_private(struct file_lock *);
   extern void posix_test_lock(struct file *, struct file_lock *);
   extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
   extern int posix_lock_file_wait(struct file *, struct file_lock *);
@@@ -1320,7 -1322,7 +1321,7 @@@ struct super_block 
         struct rw_semaphore     s_umount;
         struct mutex            s_lock;
         int                     s_count;
- -      int                     s_need_sync_fs;
+ +      int                     s_need_sync;
         atomic_t                s_active;
   #ifdef CONFIG_SECURITY
         void                    *s_security;
@@@ -1371,6 -1373,11 +1372,6 @@@
          * generic_show_options()
          */
         char *s_options;
- -
- -      /*
- -       * storage for asynchronous operations
- -       */
- -      struct list_head s_async_list;
   };
   
   extern struct timespec current_fs_time(struct super_block *sb);
@@@ -1794,7 -1801,7 +1795,7 @@@ extern struct vfsmount *kern_mount_data
   extern int may_umount_tree(struct vfsmount *);
   extern int may_umount(struct vfsmount *);
   extern long do_mount(char *, char *, char *, unsigned long, void *);
- -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *);
+ +extern struct vfsmount *collect_mounts(struct path *);
   extern void drop_collected_mounts(struct vfsmount *);
   
   extern int vfs_statfs(struct dentry *, struct kstatfs *);
@@@ -1919,9 -1926,8 +1920,9 @@@ extern void __init vfs_caches_init(unsi
   
   extern struct kmem_cache *names_cachep;
   
- -#define __getname()   kmem_cache_alloc(names_cachep, GFP_KERNEL)
- -#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
+ +#define __getname_gfp(gfp)    kmem_cache_alloc(names_cachep, (gfp))
+ +#define __getname()           __getname_gfp(GFP_KERNEL)
+ +#define __putname(name)               kmem_cache_free(names_cachep, (void *)(name))
   #ifndef CONFIG_AUDITSYSCALL
   #define putname(name)   __putname(name)
   #else
@@@ -1942,6 -1948,8 +1943,6 @@@ extern struct super_block *freeze_bdev(
   extern void emergency_thaw_all(void);
   extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
   extern int fsync_bdev(struct block_device *);
- -extern int fsync_super(struct super_block *);
- -extern int fsync_no_super(struct block_device *);
   #else
   static inline void bd_forget(struct inode *inode) {}
   static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@@ -1957,7 -1965,6 +1958,7 @@@ static inline int thaw_bdev(struct bloc
         return 0;
   }
   #endif
+ +extern int sync_filesystem(struct super_block *);
   extern const struct file_operations def_blk_fops;
   extern const struct file_operations def_chr_fops;
   extern const struct file_operations bad_sock_fops;
@@@ -2037,6 -2044,9 +2038,6 @@@ extern int __invalidate_device(struct b
   extern int invalidate_partition(struct gendisk *, int);
   #endif
   extern int invalidate_inodes(struct super_block *);
- -unsigned long __invalidate_mapping_pages(struct address_space *mapping,
- -                                      pgoff_t start, pgoff_t end,
- -                                      bool be_atomic);
   unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                         pgoff_t start, pgoff_t end);
   
@@@ -2073,8 -2083,12 +2074,8 @@@ extern int filemap_fdatawrite_range(str
   
   extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
   extern void sync_supers(void);
- -extern void sync_filesystems(int wait);
- -extern void __fsync_super(struct super_block *sb);
   extern void emergency_sync(void);
   extern void emergency_remount(void);
- -extern int do_remount_sb(struct super_block *sb, int flags,
- -                       void *data, int force);
   #ifdef CONFIG_BLOCK
   extern sector_t bmap(struct inode *, sector_t);
   #endif
@@@ -2192,8 -2206,6 +2193,8 @@@ extern int generic_segment_checks(cons
   /* fs/splice.c */
   extern ssize_t generic_file_splice_read(struct file *, loff_t *,
                 struct pipe_inode_info *, size_t, unsigned int);
+ +extern ssize_t default_file_splice_read(struct file *, loff_t *,
+ +              struct pipe_inode_info *, size_t, unsigned int);
   extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
                 struct file *, loff_t *, size_t, unsigned int);
   extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
@@@ -2343,8 -2355,6 +2344,8 @@@ extern void simple_release_fs(struct vf
   extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
                         loff_t *ppos, const void *from, size_t available);
   
+ +extern int simple_fsync(struct file *, struct dentry *, int);
+ +
   #ifdef CONFIG_MIGRATION
   extern int buffer_migrate_page(struct address_space *,
                                 struct page *, struct page *);
diff --combined include/linux/nfsd/state.h

index 7ef4b7ad1214e503a86035cf6073aa632f0315ee,f5a95fd343121d854fc77aa168054afbeaecc0d9..57ab2ed08459cc31576f3b486eb864cbfd6df202
--- 1/include/linux/nfsd/state.h
--- 2/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@@ -41,6 -41,7 +41,6 @@@
   #include <linux/kref.h>
   #include <linux/sunrpc/clnt.h>
   
- -#define NFS4_OPAQUE_LIMIT 1024
   typedef struct {
         u32             cl_boot;
         u32             cl_id;
@@@ -60,15 -61,6 +60,6 @@@ typedef struct 
   #define si_stateownerid   si_opaque.so_stateownerid
   #define si_fileid         si_opaque.so_fileid
   
- 
- struct nfs4_cb_recall {
-       u32                     cbr_ident;
-       int                     cbr_trunc;
-       stateid_t               cbr_stateid;
-       struct knfsd_fh         cbr_fh;
-       struct nfs4_delegation  *cbr_dp;
- };
- 
   struct nfs4_delegation {
         struct list_head        dl_perfile;
         struct list_head        dl_perclnt;
@@@ -80,22 -72,25 +71,25 @@@
         struct file             *dl_vfs_file;
         u32                     dl_type;
         time_t                  dl_time;
-       struct nfs4_cb_recall   dl_recall;
+ /* For recall: */
+       u32                     dl_ident;
+       stateid_t               dl_stateid;
+       struct knfsd_fh         dl_fh;
+       int                     dl_retries;
   };
   
- #define dl_stateid      dl_recall.cbr_stateid
- #define dl_fh           dl_recall.cbr_fh
- 
   /* client delegation callback info */
- struct nfs4_callback {
+ struct nfs4_cb_conn {
         /* SETCLIENTID info */
         u32                     cb_addr;
         unsigned short          cb_port;
         u32                     cb_prog;
-       u32                     cb_ident;
+       u32                     cb_minorversion;
+       u32                     cb_ident;       /* minorversion 0 only */
         /* RPC client info */
         atomic_t                cb_set;     /* successful CB_NULL call */
         struct rpc_clnt *       cb_client;
+       struct rpc_cred *       cb_cred;
   };
   
   /* Maximum number of slots per session. 128 is useful for long haul TCP */
@@@ -121,6 -116,17 +115,17 @@@ struct nfsd4_slot 
         struct nfsd4_cache_entry        sl_cache_entry;
   };
   
+ struct nfsd4_channel_attrs {
+       u32             headerpadsz;
+       u32             maxreq_sz;
+       u32             maxresp_sz;
+       u32             maxresp_cached;
+       u32             maxops;
+       u32             maxreqs;
+       u32             nr_rdma_attrs;
+       u32             rdma_attrs;
+ };
+ 
   struct nfsd4_session {
         struct kref             se_ref;
         struct list_head        se_hash;        /* hash by sessionid */
@@@ -128,11 -134,8 +133,8 @@@
         u32                     se_flags;
         struct nfs4_client      *se_client;     /* for expire_client */
         struct nfs4_sessionid   se_sessionid;
-       u32                     se_fmaxreq_sz;
-       u32                     se_fmaxresp_sz;
-       u32                     se_fmaxresp_cached;
-       u32                     se_fmaxops;
-       u32                     se_fnumslots;
+       struct nfsd4_channel_attrs se_fchannel;
+       struct nfsd4_channel_attrs se_bchannel;
         struct nfsd4_slot       se_slots[];     /* forward channel slots */
   };
   
@@@ -184,7 -187,7 +186,7 @@@ struct nfs4_client 
         struct svc_cred         cl_cred;        /* setclientid principal */
         clientid_t              cl_clientid;    /* generated by server */
         nfs4_verifier           cl_confirm;     /* generated by server */
-       struct nfs4_callback    cl_callback;    /* callback info */
+       struct nfs4_cb_conn     cl_cb_conn;     /* callback info */
         atomic_t                cl_count;       /* ref count */
         u32                     cl_firststate;  /* recovery dir creation */
   
diff --combined include/linux/sunrpc/svcsock.h

index 6bb1ec4ae3108dc963d597658b7332f0c0854aa9,8271631389491c690be8f01596bf0ce484833dfd..04dba23c59f2c4aead01ab313127368a63b93a05
--- 1/include/linux/sunrpc/svcsock.h
--- 2/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@@ -38,12 -38,13 +38,15 @@@ int                svc_recv(struct svc_rqst *, long)
   int           svc_send(struct svc_rqst *);
   void          svc_drop(struct svc_rqst *);
   void          svc_sock_update_bufs(struct svc_serv *serv);
- int           svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
- int           svc_addsock(struct svc_serv *serv, int fd, char *name_return);
+ int           svc_sock_names(struct svc_serv *serv, char *buf,
+                                       const size_t buflen,
+                                       const char *toclose);
+ int           svc_addsock(struct svc_serv *serv, const int fd,
+                                       char *name_return, const size_t len);
   void          svc_init_xprt_sock(void);
   void          svc_cleanup_xprt_sock(void);
+ +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
+ +void          svc_sock_destroy(struct svc_xprt *);
   
   /*
    * svc_makesock socket characteristics
diff --combined net/sunrpc/svcsock.c

index a2a03e500533a80f31a2c457bc198feb7448935c,b09c80c56ee3a452aff9c908affb4affbfa1fdec..23128ee191ae707973345e3438ccf9e055009bed
--- 1/net/sunrpc/svcsock.c
--- 2/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@@ -240,42 -240,76 +240,76 @@@ out
   /*
    * Report socket names for nfsdfs
    */
- static int one_sock_name(char *buf, struct svc_sock *svsk)
+ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
   {
+       const struct sock *sk = svsk->sk_sk;
+       const char *proto_name = sk->sk_protocol == IPPROTO_UDP ?
+                                                       "udp" : "tcp";
         int len;
   
-       switch(svsk->sk_sk->sk_family) {
-       case AF_INET:
-               len = sprintf(buf, "ipv4 %s %pI4 %d\n",
-                             svsk->sk_sk->sk_protocol == IPPROTO_UDP ?
-                             "udp" : "tcp",
-                             &inet_sk(svsk->sk_sk)->rcv_saddr,
-                             inet_sk(svsk->sk_sk)->num);
+       switch (sk->sk_family) {
+       case PF_INET:
+               len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
+                               proto_name,
+                               &inet_sk(sk)->rcv_saddr,
+                               inet_sk(sk)->num);
+               break;
+       case PF_INET6:
+               len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
+                               proto_name,
+                               &inet6_sk(sk)->rcv_saddr,
+                               inet_sk(sk)->num);
                 break;
         default:
-               len = sprintf(buf, "*unknown-%d*\n",
-                              svsk->sk_sk->sk_family);
+               len = snprintf(buf, remaining, "*unknown-%d*\n",
+                               sk->sk_family);
+       }
+ 
+       if (len >= remaining) {
+               *buf = '\0';
+               return -ENAMETOOLONG;
         }
         return len;
   }
   
- int
- svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
+ /**
+  * svc_sock_names - construct a list of listener names in a string
+  * @serv: pointer to RPC service
+  * @buf: pointer to a buffer to fill in with socket names
+  * @buflen: size of the buffer to be filled
+  * @toclose: pointer to '\0'-terminated C string containing the name
+  *            of a listener to be closed
+  *
+  * Fills in @buf with a '\n'-separated list of names of listener
+  * sockets.  If @toclose is not NULL, the socket named by @toclose
+  * is closed, and is not included in the output list.
+  *
+  * Returns positive length of the socket name string, or a negative
+  * errno value on error.
+  */
+ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen,
+                  const char *toclose)
   {
         struct svc_sock *svsk, *closesk = NULL;
         int len = 0;
   
         if (!serv)
                 return 0;
+ 
         spin_lock_bh(&serv->sv_lock);
         list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
-               int onelen = one_sock_name(buf+len, svsk);
-               if (toclose && strcmp(toclose, buf+len) == 0)
+               int onelen = svc_one_sock_name(svsk, buf + len, buflen - len);
+               if (onelen < 0) {
+                       len = onelen;
+                       break;
+               }
+               if (toclose && strcmp(toclose, buf + len) == 0)
                         closesk = svsk;
                 else
                         len += onelen;
         }
         spin_unlock_bh(&serv->sv_lock);
+ 
         if (closesk)
                 /* Should unregister with portmap, but you cannot
                  * unregister just one protocol...
@@@ -346,6 -380,7 +380,7 @@@ static void svc_sock_setbufsize(struct 
         sock->sk->sk_sndbuf = snd * 2;
         sock->sk->sk_rcvbuf = rcv * 2;
         sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
+       sock->sk->sk_write_space(sock->sk);
         release_sock(sock->sk);
   #endif
   }
@@@ -387,6 -422,15 +422,15 @@@ static void svc_write_space(struct soc
         }
   }
   
+ static void svc_tcp_write_space(struct sock *sk)
+ {
+       struct socket *sock = sk->sk_socket;
+ 
+       if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock)
+               clear_bit(SOCK_NOSPACE, &sock->flags);
+       svc_write_space(sk);
+ }
+ 
   /*
    * Copy the UDP datagram's destination address to the rqstp structure.
    * The 'destination' address in this case is the address to which the
@@@ -427,13 -471,14 +471,14 @@@ static int svc_udp_recvfrom(struct svc_
                 long            all[SVC_PKTINFO_SPACE / sizeof(long)];
         } buffer;
         struct cmsghdr *cmh = &buffer.hdr;
-       int             err, len;
         struct msghdr msg = {
                 .msg_name = svc_addr(rqstp),
                 .msg_control = cmh,
                 .msg_controllen = sizeof(buffer),
                 .msg_flags = MSG_DONTWAIT,
         };
+       size_t len;
+       int err;
   
         if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
             /* udp sockets need large rcvbuf as all pending
@@@ -465,8 -510,8 +510,8 @@@
                 return -EAGAIN;
         }
         len = svc_addr_len(svc_addr(rqstp));
-       if (len < 0)
-               return len;
+       if (len == 0)
+               return -EAFNOSUPPORT;
         rqstp->rq_addrlen = len;
         if (skb->tstamp.tv64 == 0) {
                 skb->tstamp = ktime_get_real();
@@@ -980,25 -1025,16 +1025,16 @@@ static void svc_tcp_prep_reply_hdr(stru
   static int svc_tcp_has_wspace(struct svc_xprt *xprt)
   {
         struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
-       struct svc_serv *serv = svsk->sk_xprt.xpt_server;
+       struct svc_serv *serv = svsk->sk_xprt.xpt_server;
         int required;
-       int wspace;
   
-       /*
-        * Set the SOCK_NOSPACE flag before checking the available
-        * sock space.
-        */
+       if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
+               return 1;
+       required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
+       if (sk_stream_wspace(svsk->sk_sk) >= required)
+               return 1;
         set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-       required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
-       wspace = sk_stream_wspace(svsk->sk_sk);
- 
-       if (wspace < sk_stream_min_wspace(svsk->sk_sk))
-               return 0;
-       if (required * 2 > wspace)
-               return 0;
- 
-       clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-       return 1;
+       return 0;
   }
   
   static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
@@@ -1054,7 -1090,7 +1090,7 @@@ static void svc_tcp_init(struct svc_soc
                 dprintk("setting up TCP socket for reading\n");
                 sk->sk_state_change = svc_tcp_state_change;
                 sk->sk_data_ready = svc_tcp_data_ready;
-               sk->sk_write_space = svc_write_space;
+               sk->sk_write_space = svc_tcp_write_space;
   
                 svsk->sk_reclen = 0;
                 svsk->sk_tcplen = 0;
@@@ -1148,9 -1184,19 +1184,19 @@@ static struct svc_sock *svc_setup_socke
         return svsk;
   }
   
- int svc_addsock(struct svc_serv *serv,
-               int fd,
-               char *name_return)
+ /**
+  * svc_addsock - add a listener socket to an RPC service
+  * @serv: pointer to RPC service to which to add a new listener
+  * @fd: file descriptor of the new listener
+  * @name_return: pointer to buffer to fill in with name of listener
+  * @len: size of the buffer
+  *
+  * Fills in socket name and returns positive length of name if successful.
+  * Name is terminated with '\n'.  On error, returns a negative errno
+  * value.
+  */
+ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
+               const size_t len)
   {
         int err = 0;
         struct socket *so = sockfd_lookup(fd, &err);
@@@ -1190,7 -1236,7 +1236,7 @@@
                 sockfd_put(so);
                 return err;
         }
-       return one_sock_name(name_return, svsk);
+       return svc_one_sock_name(svsk, name_return, len);
   }
   EXPORT_SYMBOL_GPL(svc_addsock);
   
@@@ -1327,42 -1373,3 +1373,42 @@@ static void svc_sock_free(struct svc_xp
                 sock_release(svsk->sk_sock);
         kfree(svsk);
   }
+ +
+ +/*
+ + * Create a svc_xprt.
+ + *
+ + * For internal use only (e.g. nfsv4.1 backchannel).
+ + * Callers should typically use the xpo_create() method.
+ + */
+ +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot)
+ +{
+ +      struct svc_sock *svsk;
+ +      struct svc_xprt *xprt = NULL;
+ +
+ +      dprintk("svc: %s\n", __func__);
+ +      svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
+ +      if (!svsk)
+ +              goto out;
+ +
+ +      xprt = &svsk->sk_xprt;
+ +      if (prot == IPPROTO_TCP)
+ +              svc_xprt_init(&svc_tcp_class, xprt, serv);
+ +      else if (prot == IPPROTO_UDP)
+ +              svc_xprt_init(&svc_udp_class, xprt, serv);
+ +      else
+ +              BUG();
+ +out:
+ +      dprintk("svc: %s return %p\n", __func__, xprt);
+ +      return xprt;
+ +}
+ +EXPORT_SYMBOL_GPL(svc_sock_create);
+ +
+ +/*
+ + * Destroy a svc_sock.
+ + */
+ +void svc_sock_destroy(struct svc_xprt *xprt)
+ +{
+ +      if (xprt)
+ +              kfree(container_of(xprt, struct svc_sock, sk_xprt));
+ +}
+ +EXPORT_SYMBOL_GPL(svc_sock_destroy);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 22 Jun 2009 19:55:50 +0000 (12:55 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 22 Jun 2009 19:55:50 +0000 (12:55 -0700)
		1	2
fs/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfsd/export.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfsd/vfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/nfsd/state.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sunrpc/svcsock.h	patch \|	diff1 \|	diff2 \|	blob \| history
net/sunrpc/svcsock.c	patch \|	diff1 \|	diff2 \|	blob \| history