From 884be175351e73c515303118150f195dd611787c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 23:56:31 -0400 Subject: [PATCH] nfs: per-name sillyunlink exclusion use d_alloc_parallel() for sillyunlink/lookup exclusion and explicit rwsem (nfs_rmdir() being a writer and nfs_call_unlink() - a reader) for rmdir/sillyunlink one. That ought to make lookup/readdir/!O_CREAT atomic_open really parallel on NFS. Signed-off-by: Al Viro --- fs/nfs/dir.c | 9 +- fs/nfs/inode.c | 4 +- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfstrace.h | 2 +- fs/nfs/unlink.c | 193 +++++++++++----------------------------- include/linux/nfs_fs.h | 11 +-- include/linux/nfs_xdr.h | 4 +- 7 files changed, 61 insertions(+), 164 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cd3c754e3f89..aaf7bd0cbae2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -909,7 +909,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; - nfs_block_sillyrename(dentry); if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) @@ -945,7 +944,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; } while (!desc->eof); out: - nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); @@ -1398,7 +1396,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ trace_nfs_lookup_enter(dir, dentry, flags); - nfs_block_sillyrename(parent); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; @@ -1423,7 +1420,6 @@ no_entry: } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unblock_sillyrename: - nfs_unblock_sillyrename(parent); trace_nfs_lookup_exit(dir, dentry, flags, error); nfs4_label_free(label); out: @@ -1535,9 +1531,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, goto out; trace_nfs_atomic_open_enter(dir, ctx, open_flags); - nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); - nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); @@ -1781,7 +1775,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) trace_nfs_rmdir_enter(dir, dentry); if (d_really_is_positive(dentry)) { - nfs_wait_on_sillyrename(dentry); + down_write(&NFS_I(d_inode(dentry))->rmdir_sem); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ switch (error) { @@ -1791,6 +1785,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) case -ENOENT: nfs_dentry_handle_enoent(dentry); } + up_write(&NFS_I(d_inode(dentry))->rmdir_sem); } else error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); trace_nfs_rmdir_exit(dir, dentry, error); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 738c84a42eb0..52e7d6869e3b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1958,9 +1958,7 @@ static void init_once(void *foo) nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); - atomic_set(&nfsi->silly_count, 1); - INIT_HLIST_HEAD(&nfsi->silly_list); - init_waitqueue_head(&nfsi->waitqueue); + init_rwsem(&nfsi->rmdir_sem); nfs4_init_once(nfsi); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7a7ac1dafa02..084e8570da18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3777,7 +3777,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) { - nfs4_setup_sequence(NFS_SERVER(data->dir), + nfs4_setup_sequence(NFS_SB(data->dentry->d_sb), &data->args.seq_args, &data->res.seq_res, task); diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 9f80a086b612..0b9e5cc9a747 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -702,7 +702,7 @@ TRACE_EVENT(nfs_sillyrename_unlink, ), TP_fast_assign( - struct inode *dir = data->dir; + struct inode *dir = d_inode(data->dentry->d_parent); size_t len = data->args.name.len; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 7d6deaccf89b..1868246f56e6 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -30,45 +30,11 @@ static void nfs_free_unlinkdata(struct nfs_unlinkdata *data) { - iput(data->dir); put_rpccred(data->cred); kfree(data->args.name.name); kfree(data); } -#define NAME_ALLOC_LEN(len) ((len+16) & ~15) -/** - * nfs_copy_dname - copy dentry name to data structure - * @dentry: pointer to dentry - * @data: nfs_unlinkdata - */ -static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) -{ - char *str; - int len = dentry->d_name.len; - - str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL); - if (!str) - return -ENOMEM; - data->args.name.len = len; - data->args.name.name = str; - return 0; -} - -static void nfs_free_dname(struct nfs_unlinkdata *data) -{ - kfree(data->args.name.name); - data->args.name.name = NULL; - data->args.name.len = 0; -} - -static void nfs_dec_sillycount(struct inode *dir) -{ - struct nfs_inode *nfsi = NFS_I(dir); - if (atomic_dec_return(&nfsi->silly_count) == 1) - wake_up(&nfsi->waitqueue); -} - /** * nfs_async_unlink_done - Sillydelete post-processing * @task: rpc_task of the sillydelete @@ -78,7 +44,7 @@ static void nfs_dec_sillycount(struct inode *dir) static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - struct inode *dir = data->dir; + struct inode *dir = d_inode(data->dentry->d_parent); trace_nfs_sillyrename_unlink(data, task->tk_status); if (!NFS_PROTO(dir)->unlink_done(task, dir)) @@ -95,17 +61,21 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) static void nfs_async_unlink_release(void *calldata) { struct nfs_unlinkdata *data = calldata; - struct super_block *sb = data->dir->i_sb; + struct dentry *dentry = data->dentry; + struct super_block *sb = dentry->d_sb; - nfs_dec_sillycount(data->dir); + up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); + d_lookup_done(dentry); nfs_free_unlinkdata(data); + dput(dentry); nfs_sb_deactive(sb); } static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data); + struct inode *dir = d_inode(data->dentry->d_parent); + NFS_PROTO(dir)->unlink_rpc_prepare(task, data); } static const struct rpc_call_ops nfs_unlink_ops = { @@ -114,7 +84,7 @@ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_prepare = nfs_unlink_prepare, }; -static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) +static void nfs_do_call_unlink(struct nfs_unlinkdata *data) { struct rpc_message msg = { .rpc_argp = &data->args, @@ -129,10 +99,31 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n .flags = RPC_TASK_ASYNC, }; struct rpc_task *task; + struct inode *dir = d_inode(data->dentry->d_parent); + nfs_sb_active(dir->i_sb); + data->args.fh = NFS_FH(dir); + nfs_fattr_init(data->res.dir_attr); + + NFS_PROTO(dir)->unlink_setup(&msg, dir); + + task_setup_data.rpc_client = NFS_CLIENT(dir); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + +static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +{ + struct inode *dir = d_inode(dentry->d_parent); struct dentry *alias; - alias = d_lookup(parent, &data->args.name); - if (alias != NULL) { + down_read_non_owner(&NFS_I(dir)->rmdir_sem); + alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); + if (IS_ERR(alias)) { + up_read_non_owner(&NFS_I(dir)->rmdir_sem); + return 0; + } + if (!d_in_lookup(alias)) { int ret; void *devname_garbage = NULL; @@ -140,10 +131,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * Hey, we raced with lookup... See if we need to transfer * the sillyrename information to the aliased dentry. */ - nfs_free_dname(data); - ret = nfs_copy_dname(alias, data); spin_lock(&alias->d_lock); - if (ret == 0 && d_really_is_positive(alias) && + if (d_really_is_positive(alias) && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; @@ -152,8 +141,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n } else ret = 0; spin_unlock(&alias->d_lock); - nfs_dec_sillycount(dir); dput(alias); + up_read_non_owner(&NFS_I(dir)->rmdir_sem); /* * If we'd displaced old cached devname, free it. At that * point dentry is definitely not a root, so we won't need @@ -162,95 +151,18 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n kfree(devname_garbage); return ret; } - data->dir = igrab(dir); - if (!data->dir) { - nfs_dec_sillycount(dir); - return 0; - } - nfs_sb_active(dir->i_sb); - data->args.fh = NFS_FH(dir); - nfs_fattr_init(data->res.dir_attr); - - NFS_PROTO(dir)->unlink_setup(&msg, dir); - - task_setup_data.rpc_client = NFS_CLIENT(dir); - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task_async(task); + data->dentry = alias; + nfs_do_call_unlink(data); return 1; } -static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) -{ - struct dentry *parent; - struct inode *dir; - int ret = 0; - - - parent = dget_parent(dentry); - if (parent == NULL) - goto out_free; - dir = d_inode(parent); - /* Non-exclusive lock protects against concurrent lookup() calls */ - spin_lock(&dir->i_lock); - if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { - /* Deferred delete */ - hlist_add_head(&data->list, &NFS_I(dir)->silly_list); - spin_unlock(&dir->i_lock); - ret = 1; - goto out_dput; - } - spin_unlock(&dir->i_lock); - ret = nfs_do_call_unlink(parent, dir, data); -out_dput: - dput(parent); -out_free: - return ret; -} - -void nfs_wait_on_sillyrename(struct dentry *dentry) -{ - struct nfs_inode *nfsi = NFS_I(d_inode(dentry)); - - wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1); -} - -void nfs_block_sillyrename(struct dentry *dentry) -{ - struct nfs_inode *nfsi = NFS_I(d_inode(dentry)); - - wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1); -} - -void nfs_unblock_sillyrename(struct dentry *dentry) -{ - struct inode *dir = d_inode(dentry); - struct nfs_inode *nfsi = NFS_I(dir); - struct nfs_unlinkdata *data; - - atomic_inc(&nfsi->silly_count); - wake_up(&nfsi->waitqueue); - spin_lock(&dir->i_lock); - while (!hlist_empty(&nfsi->silly_list)) { - if (!atomic_inc_not_zero(&nfsi->silly_count)) - break; - data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list); - hlist_del(&data->list); - spin_unlock(&dir->i_lock); - if (nfs_do_call_unlink(dentry, dir, data) == 0) - nfs_free_unlinkdata(data); - spin_lock(&dir->i_lock); - } - spin_unlock(&dir->i_lock); -} - /** * nfs_async_unlink - asynchronous unlinking of a file * @dir: parent directory of dentry * @dentry: dentry to unlink */ static int -nfs_async_unlink(struct inode *dir, struct dentry *dentry) +nfs_async_unlink(struct dentry *dentry, struct qstr *name) { struct nfs_unlinkdata *data; int status = -ENOMEM; @@ -259,13 +171,18 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) goto out; + data->args.name.name = kstrdup(name->name, GFP_KERNEL); + if (!data->args.name.name) + goto out_free; + data->args.name.len = name->len; data->cred = rpc_lookup_cred(); if (IS_ERR(data->cred)) { status = PTR_ERR(data->cred); - goto out_free; + goto out_free_name; } data->res.dir_attr = &data->dir_attr; + init_waitqueue_head(&data->wq); status = -EBUSY; spin_lock(&dentry->d_lock); @@ -285,6 +202,8 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) out_unlock: spin_unlock(&dentry->d_lock); put_rpccred(data->cred); +out_free_name: + kfree(data->args.name.name); out_free: kfree(data); out: @@ -303,17 +222,15 @@ out: void nfs_complete_unlink(struct dentry *dentry, struct inode *inode) { - struct nfs_unlinkdata *data = NULL; + struct nfs_unlinkdata *data; spin_lock(&dentry->d_lock); - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; - data = dentry->d_fsdata; - dentry->d_fsdata = NULL; - } + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + data = dentry->d_fsdata; + dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); - if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)) nfs_free_unlinkdata(data); } @@ -560,18 +477,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) /* queue unlink first. Can't do this from rpc_release as it * has to allocate memory */ - error = nfs_async_unlink(dir, dentry); + error = nfs_async_unlink(dentry, &sdentry->d_name); if (error) goto out_dput; - /* populate unlinkdata with the right dname */ - error = nfs_copy_dname(sdentry, - (struct nfs_unlinkdata *)dentry->d_fsdata); - if (error) { - nfs_cancel_async_unlink(dentry); - goto out_dput; - } - /* run the rename task, undo unlink if it fails */ task = nfs_async_rename(dir, dir, dentry, sdentry, nfs_complete_sillyrename); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67300f8e5f2f..fa167f25465d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,11 +163,9 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; - /* Number of in-flight sillydelete RPC calls */ - atomic_t silly_count; - /* List of deferred sillydelete requests */ - struct hlist_head silly_list; - wait_queue_head_t waitqueue; + /* Readers: in-flight sillydelete RPC calls */ + /* Writers: rmdir */ + struct rw_semaphore rmdir_sem; #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; @@ -492,9 +490,6 @@ extern void nfs_release_automount_timer(void); * linux/fs/nfs/unlink.c */ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); -extern void nfs_wait_on_sillyrename(struct dentry *dentry); -extern void nfs_block_sillyrename(struct dentry *dentry); -extern void nfs_unblock_sillyrename(struct dentry *dentry); /* * linux/fs/nfs/write.c diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d320906cf13e..ee8491dadbf3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1468,10 +1468,10 @@ struct nfs_pgio_completion_ops { }; struct nfs_unlinkdata { - struct hlist_node list; struct nfs_removeargs args; struct nfs_removeres res; - struct inode *dir; + struct dentry *dentry; + wait_queue_head_t wq; struct rpc_cred *cred; struct nfs_fattr dir_attr; long timeout; -- 2.25.1