NFS: Ensure the client submounts, when it crosses a server mountpoint.
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 9 Jun 2006 13:34:19 +0000 (09:34 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 9 Jun 2006 13:34:19 +0000 (09:34 -0400)
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/Makefile
fs/nfs/dir.c
fs/nfs/inode.c
fs/nfs/namespace.c [new file with mode: 0644]
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
include/linux/nfs_fs.h

index ec61fd56a1a93b693e7379ec0bbb80db02632a82..d9d494cee388446eab235c9711396c1af4279516 100644 (file)
@@ -5,7 +5,8 @@
 obj-$(CONFIG_NFS_FS) += nfs.o
 
 nfs-y                  := dir.o file.o inode.o nfs2xdr.o pagelist.o \
-                          proc.o read.o symlink.o unlink.o write.o
+                          proc.o read.o symlink.o unlink.o write.o \
+                          namespace.o
 nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)   += nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)       += nfs3acl.o
index 1d3d8922a66393fd1f9af82afe66154ec1fc22fb..3ddda6f7ecc2dfad9c9c2b000b9a406349a7f65d 100644 (file)
@@ -868,6 +868,17 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
        return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
+static inline int nfs_reval_fsid(struct inode *dir,
+               struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+       struct nfs_server *server = NFS_SERVER(dir);
+
+       if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
+               /* Revalidate fsid on root dir */
+               return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+       return 0;
+}
+
 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
        struct dentry *res;
@@ -900,6 +911,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
                res = ERR_PTR(error);
                goto out_unlock;
        }
+       error = nfs_reval_fsid(dir, &fhandle, &fattr);
+       if (error < 0) {
+               res = ERR_PTR(error);
+               goto out_unlock;
+       }
        inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
        res = (struct dentry *)inode;
        if (IS_ERR(res))
index 1a809f6f898960fda96adfbcc53241ef6a951ea8..47167ab64f5ba4595320118420125e4fe1968f9b 100644 (file)
@@ -221,6 +221,14 @@ nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
        return nfs_block_bits(bsize, nrbitsp);
 }
 
+static inline void
+nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
+{
+       sb->s_maxbytes = (loff_t)maxfilesize;
+       if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
+               sb->s_maxbytes = MAX_LFS_FILESIZE;
+}
+
 /*
  * Obtain the root inode of the file system.
  */
@@ -331,9 +339,7 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
        }
        server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
-       sb->s_maxbytes = fsinfo.maxfilesize;
-       if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
-               sb->s_maxbytes = MAX_LFS_FILESIZE; 
+       nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
 
        server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
        server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
@@ -877,6 +883,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                        if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
                            && fattr->size <= NFS_LIMIT_READDIRPLUS)
                                set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+                       /* Deal with crossing mountpoints */
+                       if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
+                               inode->i_op = &nfs_mountpoint_inode_operations;
+                               inode->i_fop = NULL;
+                       }
                } else if (S_ISLNK(inode->i_mode))
                        inode->i_op = &nfs_symlink_inode_operations;
                else
@@ -1650,6 +1661,141 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  * File system information
  */
 
+/*
+ * nfs_path - reconstruct the path given an arbitrary dentry
+ * @base - arbitrary string to prepend to the path
+ * @dentry - pointer to dentry
+ * @buffer - result buffer
+ * @buflen - length of buffer
+ *
+ * Helper function for constructing the path from the
+ * root dentry to an arbitrary hashed dentry.
+ *
+ * This is mainly for use in figuring out the path on the
+ * server side when automounting on top of an existing partition.
+ */
+static char *nfs_path(const char *base, const struct dentry *dentry,
+                     char *buffer, ssize_t buflen)
+{
+       char *end = buffer+buflen;
+       int namelen;
+
+       *--end = '\0';
+       buflen--;
+       spin_lock(&dcache_lock);
+       while (!IS_ROOT(dentry)) {
+               namelen = dentry->d_name.len;
+               buflen -= namelen + 1;
+               if (buflen < 0)
+                       goto Elong;
+               end -= namelen;
+               memcpy(end, dentry->d_name.name, namelen);
+               *--end = '/';
+               dentry = dentry->d_parent;
+       }
+       spin_unlock(&dcache_lock);
+       namelen = strlen(base);
+       /* Strip off excess slashes in base string */
+       while (namelen > 0 && base[namelen - 1] == '/')
+               namelen--;
+       buflen -= namelen;
+       if (buflen < 0)
+               goto Elong;
+       end -= namelen;
+       memcpy(end, base, namelen);
+       return end;
+Elong:
+       return ERR_PTR(-ENAMETOOLONG);
+}
+
+struct nfs_clone_mount {
+       const struct super_block *sb;
+       const struct dentry *dentry;
+       struct nfs_fh *fh;
+       struct nfs_fattr *fattr;
+};
+
+static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data,
+               struct super_block *(*clone_client)(struct nfs_server *, struct nfs_clone_mount *))
+{
+       struct nfs_server *server;
+       struct nfs_server *parent = NFS_SB(data->sb);
+       struct super_block *sb = ERR_PTR(-EINVAL);
+       void *err = ERR_PTR(-ENOMEM);
+       struct inode *root_inode;
+       struct nfs_fsinfo fsinfo;
+       int len;
+
+       server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+       if (server == NULL)
+               goto out_err;
+       memcpy(server, parent, sizeof(*server));
+       len = strlen(parent->hostname) + 1;
+       server->hostname = kmalloc(len, GFP_KERNEL);
+       if (server->hostname == NULL)
+               goto free_server;
+       memcpy(server->hostname, parent->hostname, len);
+       server->fsid = data->fattr->fsid;
+       nfs_copy_fh(&server->fh, data->fh);
+       if (rpciod_up() != 0)
+               goto free_hostname;
+
+       sb = clone_client(server, data);
+       if (IS_ERR((err = sb)) || sb->s_root)
+               goto kill_rpciod;
+
+       sb->s_op = data->sb->s_op;
+       sb->s_blocksize = data->sb->s_blocksize;
+       sb->s_blocksize_bits = data->sb->s_blocksize_bits;
+       sb->s_maxbytes = data->sb->s_maxbytes;
+
+       server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+       err = ERR_PTR(-ENOMEM);
+       server->io_stats = nfs_alloc_iostats();
+       if (server->io_stats == NULL)
+               goto out_deactivate;
+
+       server->client = rpc_clone_client(parent->client);
+       if (IS_ERR((err = server->client)))
+               goto out_deactivate;
+       if (!IS_ERR(parent->client_sys)) {
+               server->client_sys = rpc_clone_client(parent->client_sys);
+               if (IS_ERR((err = server->client_sys)))
+                       goto out_deactivate;
+       }
+       if (!IS_ERR(parent->client_acl)) {
+               server->client_acl = rpc_clone_client(parent->client_acl);
+               if (IS_ERR((err = server->client_acl)))
+                       goto out_deactivate;
+       }
+       root_inode = nfs_fhget(sb, data->fh, data->fattr);
+       if (!root_inode)
+               goto out_deactivate;
+       sb->s_root = d_alloc_root(root_inode);
+       if (!sb->s_root)
+               goto out_put_root;
+       fsinfo.fattr = data->fattr;
+       if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
+               nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
+       sb->s_root->d_op = server->rpc_ops->dentry_ops;
+       sb->s_flags |= MS_ACTIVE;
+       return sb;
+out_put_root:
+       iput(root_inode);
+out_deactivate:
+       up_write(&sb->s_umount);
+       deactivate_super(sb);
+       return (struct super_block *)err;
+kill_rpciod:
+       rpciod_down();
+free_hostname:
+       kfree(server->hostname);
+free_server:
+       kfree(server);
+out_err:
+       return (struct super_block *)err;
+}
+
 static int nfs_set_super(struct super_block *s, void *data)
 {
        s->s_fs_info = data;
@@ -1807,6 +1953,31 @@ static struct file_system_type nfs_fs_type = {
        .fs_flags       = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
+static struct super_block *nfs_clone_client(struct nfs_server *server, struct nfs_clone_mount *data)
+{
+       struct super_block *sb;
+
+       sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+       if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
+               lockd_up();
+       return sb;
+}
+
+static struct super_block *nfs_clone_nfs_sb(struct file_system_type *fs_type,
+               int flags, const char *dev_name, void *raw_data)
+{
+       struct nfs_clone_mount *data = raw_data;
+       return nfs_clone_generic_sb(data, nfs_clone_client);
+}
+
+static struct file_system_type clone_nfs_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "nfs",
+       .get_sb         = nfs_clone_nfs_sb,
+       .kill_sb        = nfs_kill_super,
+       .fs_flags       = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
 #ifdef CONFIG_NFS_V4
 
 static void nfs4_clear_inode(struct inode *);
@@ -2156,6 +2327,75 @@ static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
 module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
                 &nfs_idmap_cache_timeout, 0644);
 
+/* Constructs the SERVER-side path */
+static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
+{
+       return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
+}
+
+static inline char *nfs4_dup_path(const struct dentry *dentry)
+{
+       char *page = (char *) __get_free_page(GFP_USER);
+       char *path;
+
+       path = nfs4_path(dentry, page, PAGE_SIZE);
+       if (!IS_ERR(path)) {
+               int len = PAGE_SIZE + page - path;
+               char *tmp = path;
+
+               path = kmalloc(len, GFP_KERNEL);
+               if (path)
+                       memcpy(path, tmp, len);
+               else
+                       path = ERR_PTR(-ENOMEM);
+       }
+       free_page((unsigned long)page);
+       return path;
+}
+
+static struct super_block *nfs4_clone_client(struct nfs_server *server, struct nfs_clone_mount *data)
+{
+       const struct dentry *dentry = data->dentry;
+       struct nfs4_client *clp = server->nfs4_state;
+       struct super_block *sb;
+
+       server->mnt_path = nfs4_dup_path(dentry);
+       if (IS_ERR(server->mnt_path)) {
+               sb = (struct super_block *)server->mnt_path;
+               goto err;
+       }
+       sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
+       if (IS_ERR(sb) || sb->s_root)
+               goto free_path;
+       nfs4_server_capabilities(server, &server->fh);
+
+       down_write(&clp->cl_sem);
+       atomic_inc(&clp->cl_count);
+       list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+       up_write(&clp->cl_sem);
+       return sb;
+free_path:
+       kfree(server->mnt_path);
+err:
+       server->mnt_path = NULL;
+       return sb;
+}
+
+static struct super_block *nfs_clone_nfs4_sb(struct file_system_type *fs_type,
+               int flags, const char *dev_name, void *raw_data)
+{
+       struct nfs_clone_mount *data = raw_data;
+       return nfs_clone_generic_sb(data, nfs4_clone_client);
+}
+
+static struct file_system_type clone_nfs4_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "nfs",
+       .get_sb         = nfs_clone_nfs4_sb,
+       .kill_sb        = nfs4_kill_super,
+       .fs_flags       = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
 #define nfs4_init_once(nfsi) \
        do { \
                INIT_LIST_HEAD(&(nfsi)->open_states); \
@@ -2183,12 +2423,69 @@ static inline void unregister_nfs4fs(void)
        nfs_unregister_sysctl();
 }
 #else
+#define nfs4_clone_client(a,b) ERR_PTR(-EINVAL)
 #define nfs4_init_once(nfsi) \
        do { } while (0)
 #define register_nfs4fs() (0)
 #define unregister_nfs4fs()
 #endif
 
+static inline char *nfs_devname(const struct vfsmount *mnt_parent,
+                        const struct dentry *dentry,
+                        char *buffer, ssize_t buflen)
+{
+       return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+}
+
+/**
+ * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
+ * @mnt_parent - mountpoint of parent directory
+ * @dentry - parent directory
+ * @fh - filehandle for new root dentry
+ * @fattr - attributes for new root inode
+ *
+ */
+struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+               const struct dentry *dentry, struct nfs_fh *fh,
+               struct nfs_fattr *fattr)
+{
+       struct nfs_clone_mount mountdata = {
+               .sb = mnt_parent->mnt_sb,
+               .dentry = dentry,
+               .fh = fh,
+               .fattr = fattr,
+       };
+       struct vfsmount *mnt = ERR_PTR(-ENOMEM);
+       char *page = (char *) __get_free_page(GFP_USER);
+       char *devname;
+
+       dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
+                       dentry->d_parent->d_name.name,
+                       dentry->d_name.name);
+       if (page == NULL)
+               goto out;
+       devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
+       mnt = (struct vfsmount *)devname;
+       if (IS_ERR(devname))
+               goto free_page;
+       switch (NFS_SB(mnt_parent->mnt_sb)->rpc_ops->version) {
+               case 2:
+               case 3:
+                       mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata);
+                       break;
+               case 4:
+                       mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, &mountdata);
+                       break;
+               default:
+                       BUG();
+       }
+free_page:
+       free_page((unsigned long)page);
+out:
+       dprintk("%s: done\n", __FUNCTION__);
+       return mnt;
+}
+
 extern int nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
 extern int nfs_init_readpagecache(void);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
new file mode 100644 (file)
index 0000000..a155505
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * linux/fs/nfs/namespace.c
+ *
+ * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ * NFS namespace
+ */
+
+#include <linux/config.h>
+
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/nfs_fs.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/vfs.h>
+
+#define NFSDBG_FACILITY                NFSDBG_VFS
+
+/*
+ * nfs_follow_mountpoint - handle crossing a mountpoint on the server
+ * @dentry - dentry of mountpoint
+ * @nd - nameidata info
+ *
+ * When we encounter a mountpoint on the server, we want to set up
+ * a mountpoint on the client too, to prevent inode numbers from
+ * colliding, and to allow "df" to work properly.
+ * On NFSv4, we also want to allow for the fact that different
+ * filesystems may be migrated to different servers in a failover
+ * situation, and that different filesystems may want to use
+ * different security flavours.
+ */
+static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
+{
+       struct vfsmount *mnt;
+       struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+       struct dentry *parent;
+       struct nfs_fh fh;
+       struct nfs_fattr fattr;
+       int err;
+
+       BUG_ON(IS_ROOT(dentry));
+       dprintk("%s: enter\n", __FUNCTION__);
+       dput(nd->dentry);
+       nd->dentry = dget(dentry);
+       if (d_mountpoint(nd->dentry))
+               goto out_follow;
+       /* Look it up again */
+       parent = dget_parent(nd->dentry);
+       err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
+       dput(parent);
+       if (err != 0)
+               goto out_err;
+
+       mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr);
+       err = PTR_ERR(mnt);
+       if (IS_ERR(mnt))
+               goto out_err;
+
+       mntget(mnt);
+       err = do_add_mount(mnt, nd, nd->mnt->mnt_flags, NULL);
+       if (err < 0) {
+               mntput(mnt);
+               if (err == -EBUSY)
+                       goto out_follow;
+               goto out_err;
+       }
+       mntput(nd->mnt);
+       dput(nd->dentry);
+       nd->mnt = mnt;
+       nd->dentry = dget(mnt->mnt_root);
+out:
+       dprintk("%s: done, returned %d\n", __FUNCTION__, err);
+       return ERR_PTR(err);
+out_err:
+       path_release(nd);
+       goto out;
+out_follow:
+       while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
+               ;
+       err = 0;
+       goto out;
+}
+
+struct inode_operations nfs_mountpoint_inode_operations = {
+       .follow_link    = nfs_follow_mountpoint,
+       .getattr        = nfs_getattr,
+};
index 0f5e4e7cddecdd14bfc69d0d8215b6b787972fc2..307832fd1a49e0056756d836c2b36e7971de8f6b 100644 (file)
@@ -217,6 +217,7 @@ extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 
 extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
 extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
index ef4c6cccf958f8ca4365e610a11acda10d06d610..308407205e6c2f8d09b0b38c3d9ec040ec21441b 100644 (file)
@@ -1331,7 +1331,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
        return status;
 }
 
-static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
+int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
        struct nfs4_exception exception = { };
        int err;
index 6763a0089ee4f119513e0e73f32d3ed88ead7867..0ce8704732c217e43b8d95ff6da926f2c76fa9b2 100644 (file)
@@ -313,6 +313,10 @@ extern void nfs_end_data_update(struct inode *);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
+extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+                                       const struct dentry *dentry,
+                                       struct nfs_fh *fh,
+                                       struct nfs_fattr *fattr);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
@@ -398,6 +402,11 @@ extern void nfs_unregister_sysctl(void);
 #define nfs_unregister_sysctl() do { } while(0)
 #endif
 
+/*
+ * linux/fs/nfs/namespace.c
+ */
+extern struct inode_operations nfs_mountpoint_inode_operations;
+
 /*
  * linux/fs/nfs/unlink.c
  */