nfs: per-name sillyunlink exclusion
use d_alloc_parallel() for sillyunlink/lookup exclusion and explicit rwsem (nfs_rmdir() being a writer and nfs_call_unlink() - a reader) for rmdir/sillyunlink one. That ought to make lookup/readdir/!O_CREAT atomic_open really parallel on NFS. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
9ac3d3e846
commit
884be17535
7 changed files with 71 additions and 174 deletions
|
@ -909,7 +909,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
|
|||
desc->decode = NFS_PROTO(inode)->decode_dirent;
|
||||
desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
|
||||
|
||||
nfs_block_sillyrename(dentry);
|
||||
if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
|
||||
res = nfs_revalidate_mapping(inode, file->f_mapping);
|
||||
if (res < 0)
|
||||
|
@ -945,7 +944,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
|
|||
break;
|
||||
} while (!desc->eof);
|
||||
out:
|
||||
nfs_unblock_sillyrename(dentry);
|
||||
if (res > 0)
|
||||
res = 0;
|
||||
dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
|
||||
|
@ -1398,7 +1396,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
|
|||
parent = dentry->d_parent;
|
||||
/* Protect against concurrent sillydeletes */
|
||||
trace_nfs_lookup_enter(dir, dentry, flags);
|
||||
nfs_block_sillyrename(parent);
|
||||
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
|
||||
if (error == -ENOENT)
|
||||
goto no_entry;
|
||||
|
@ -1423,7 +1420,6 @@ no_entry:
|
|||
}
|
||||
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
||||
out_unblock_sillyrename:
|
||||
nfs_unblock_sillyrename(parent);
|
||||
trace_nfs_lookup_exit(dir, dentry, flags, error);
|
||||
nfs4_label_free(label);
|
||||
out:
|
||||
|
@ -1535,9 +1531,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
|
|||
goto out;
|
||||
|
||||
trace_nfs_atomic_open_enter(dir, ctx, open_flags);
|
||||
nfs_block_sillyrename(dentry->d_parent);
|
||||
inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
|
||||
nfs_unblock_sillyrename(dentry->d_parent);
|
||||
if (IS_ERR(inode)) {
|
||||
err = PTR_ERR(inode);
|
||||
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
|
||||
|
@ -1781,7 +1775,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
|
||||
trace_nfs_rmdir_enter(dir, dentry);
|
||||
if (d_really_is_positive(dentry)) {
|
||||
nfs_wait_on_sillyrename(dentry);
|
||||
down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
|
||||
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
|
||||
/* Ensure the VFS deletes this inode */
|
||||
switch (error) {
|
||||
|
@ -1791,6 +1785,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
case -ENOENT:
|
||||
nfs_dentry_handle_enoent(dentry);
|
||||
}
|
||||
up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
|
||||
} else
|
||||
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
|
||||
trace_nfs_rmdir_exit(dir, dentry, error);
|
||||
|
|
|
@ -1958,9 +1958,7 @@ static void init_once(void *foo)
|
|||
nfsi->nrequests = 0;
|
||||
nfsi->commit_info.ncommit = 0;
|
||||
atomic_set(&nfsi->commit_info.rpcs_out, 0);
|
||||
atomic_set(&nfsi->silly_count, 1);
|
||||
INIT_HLIST_HEAD(&nfsi->silly_list);
|
||||
init_waitqueue_head(&nfsi->waitqueue);
|
||||
init_rwsem(&nfsi->rmdir_sem);
|
||||
nfs4_init_once(nfsi);
|
||||
}
|
||||
|
||||
|
|
|
@ -3777,7 +3777,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
|
|||
|
||||
static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
|
||||
{
|
||||
nfs4_setup_sequence(NFS_SERVER(data->dir),
|
||||
nfs4_setup_sequence(NFS_SB(data->dentry->d_sb),
|
||||
&data->args.seq_args,
|
||||
&data->res.seq_res,
|
||||
task);
|
||||
|
|
|
@ -702,7 +702,7 @@ TRACE_EVENT(nfs_sillyrename_unlink,
|
|||
),
|
||||
|
||||
TP_fast_assign(
|
||||
struct inode *dir = data->dir;
|
||||
struct inode *dir = d_inode(data->dentry->d_parent);
|
||||
size_t len = data->args.name.len;
|
||||
__entry->dev = dir->i_sb->s_dev;
|
||||
__entry->dir = NFS_FILEID(dir);
|
||||
|
|
207
fs/nfs/unlink.c
207
fs/nfs/unlink.c
|
@ -30,45 +30,11 @@
|
|||
static void
|
||||
nfs_free_unlinkdata(struct nfs_unlinkdata *data)
|
||||
{
|
||||
iput(data->dir);
|
||||
put_rpccred(data->cred);
|
||||
kfree(data->args.name.name);
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
|
||||
/**
|
||||
* nfs_copy_dname - copy dentry name to data structure
|
||||
* @dentry: pointer to dentry
|
||||
* @data: nfs_unlinkdata
|
||||
*/
|
||||
static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
|
||||
{
|
||||
char *str;
|
||||
int len = dentry->d_name.len;
|
||||
|
||||
str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL);
|
||||
if (!str)
|
||||
return -ENOMEM;
|
||||
data->args.name.len = len;
|
||||
data->args.name.name = str;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nfs_free_dname(struct nfs_unlinkdata *data)
|
||||
{
|
||||
kfree(data->args.name.name);
|
||||
data->args.name.name = NULL;
|
||||
data->args.name.len = 0;
|
||||
}
|
||||
|
||||
static void nfs_dec_sillycount(struct inode *dir)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(dir);
|
||||
if (atomic_dec_return(&nfsi->silly_count) == 1)
|
||||
wake_up(&nfsi->waitqueue);
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_async_unlink_done - Sillydelete post-processing
|
||||
* @task: rpc_task of the sillydelete
|
||||
|
@ -78,7 +44,7 @@ static void nfs_dec_sillycount(struct inode *dir)
|
|||
static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
|
||||
{
|
||||
struct nfs_unlinkdata *data = calldata;
|
||||
struct inode *dir = data->dir;
|
||||
struct inode *dir = d_inode(data->dentry->d_parent);
|
||||
|
||||
trace_nfs_sillyrename_unlink(data, task->tk_status);
|
||||
if (!NFS_PROTO(dir)->unlink_done(task, dir))
|
||||
|
@ -95,17 +61,21 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
|
|||
static void nfs_async_unlink_release(void *calldata)
|
||||
{
|
||||
struct nfs_unlinkdata *data = calldata;
|
||||
struct super_block *sb = data->dir->i_sb;
|
||||
struct dentry *dentry = data->dentry;
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
|
||||
nfs_dec_sillycount(data->dir);
|
||||
up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
|
||||
d_lookup_done(dentry);
|
||||
nfs_free_unlinkdata(data);
|
||||
dput(dentry);
|
||||
nfs_sb_deactive(sb);
|
||||
}
|
||||
|
||||
static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
|
||||
{
|
||||
struct nfs_unlinkdata *data = calldata;
|
||||
NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data);
|
||||
struct inode *dir = d_inode(data->dentry->d_parent);
|
||||
NFS_PROTO(dir)->unlink_rpc_prepare(task, data);
|
||||
}
|
||||
|
||||
static const struct rpc_call_ops nfs_unlink_ops = {
|
||||
|
@ -114,7 +84,7 @@ static const struct rpc_call_ops nfs_unlink_ops = {
|
|||
.rpc_call_prepare = nfs_unlink_prepare,
|
||||
};
|
||||
|
||||
static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
|
||||
static void nfs_do_call_unlink(struct nfs_unlinkdata *data)
|
||||
{
|
||||
struct rpc_message msg = {
|
||||
.rpc_argp = &data->args,
|
||||
|
@ -129,44 +99,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
|
|||
.flags = RPC_TASK_ASYNC,
|
||||
};
|
||||
struct rpc_task *task;
|
||||
struct dentry *alias;
|
||||
|
||||
alias = d_lookup(parent, &data->args.name);
|
||||
if (alias != NULL) {
|
||||
int ret;
|
||||
void *devname_garbage = NULL;
|
||||
|
||||
/*
|
||||
* Hey, we raced with lookup... See if we need to transfer
|
||||
* the sillyrename information to the aliased dentry.
|
||||
*/
|
||||
nfs_free_dname(data);
|
||||
ret = nfs_copy_dname(alias, data);
|
||||
spin_lock(&alias->d_lock);
|
||||
if (ret == 0 && d_really_is_positive(alias) &&
|
||||
!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
|
||||
devname_garbage = alias->d_fsdata;
|
||||
alias->d_fsdata = data;
|
||||
alias->d_flags |= DCACHE_NFSFS_RENAMED;
|
||||
ret = 1;
|
||||
} else
|
||||
ret = 0;
|
||||
spin_unlock(&alias->d_lock);
|
||||
nfs_dec_sillycount(dir);
|
||||
dput(alias);
|
||||
/*
|
||||
* If we'd displaced old cached devname, free it. At that
|
||||
* point dentry is definitely not a root, so we won't need
|
||||
* that anymore.
|
||||
*/
|
||||
kfree(devname_garbage);
|
||||
return ret;
|
||||
}
|
||||
data->dir = igrab(dir);
|
||||
if (!data->dir) {
|
||||
nfs_dec_sillycount(dir);
|
||||
return 0;
|
||||
}
|
||||
struct inode *dir = d_inode(data->dentry->d_parent);
|
||||
nfs_sb_active(dir->i_sb);
|
||||
data->args.fh = NFS_FH(dir);
|
||||
nfs_fattr_init(data->res.dir_attr);
|
||||
|
@ -177,71 +110,50 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
|
|||
task = rpc_run_task(&task_setup_data);
|
||||
if (!IS_ERR(task))
|
||||
rpc_put_task_async(task);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
|
||||
{
|
||||
struct dentry *parent;
|
||||
struct inode *dir;
|
||||
int ret = 0;
|
||||
struct inode *dir = d_inode(dentry->d_parent);
|
||||
struct dentry *alias;
|
||||
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
if (parent == NULL)
|
||||
goto out_free;
|
||||
dir = d_inode(parent);
|
||||
/* Non-exclusive lock protects against concurrent lookup() calls */
|
||||
spin_lock(&dir->i_lock);
|
||||
if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
|
||||
/* Deferred delete */
|
||||
hlist_add_head(&data->list, &NFS_I(dir)->silly_list);
|
||||
spin_unlock(&dir->i_lock);
|
||||
ret = 1;
|
||||
goto out_dput;
|
||||
down_read_non_owner(&NFS_I(dir)->rmdir_sem);
|
||||
alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
|
||||
if (IS_ERR(alias)) {
|
||||
up_read_non_owner(&NFS_I(dir)->rmdir_sem);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&dir->i_lock);
|
||||
ret = nfs_do_call_unlink(parent, dir, data);
|
||||
out_dput:
|
||||
dput(parent);
|
||||
out_free:
|
||||
if (!d_in_lookup(alias)) {
|
||||
int ret;
|
||||
void *devname_garbage = NULL;
|
||||
|
||||
/*
|
||||
* Hey, we raced with lookup... See if we need to transfer
|
||||
* the sillyrename information to the aliased dentry.
|
||||
*/
|
||||
spin_lock(&alias->d_lock);
|
||||
if (d_really_is_positive(alias) &&
|
||||
!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
|
||||
devname_garbage = alias->d_fsdata;
|
||||
alias->d_fsdata = data;
|
||||
alias->d_flags |= DCACHE_NFSFS_RENAMED;
|
||||
ret = 1;
|
||||
} else
|
||||
ret = 0;
|
||||
spin_unlock(&alias->d_lock);
|
||||
dput(alias);
|
||||
up_read_non_owner(&NFS_I(dir)->rmdir_sem);
|
||||
/*
|
||||
* If we'd displaced old cached devname, free it. At that
|
||||
* point dentry is definitely not a root, so we won't need
|
||||
* that anymore.
|
||||
*/
|
||||
kfree(devname_garbage);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void nfs_wait_on_sillyrename(struct dentry *dentry)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
|
||||
|
||||
wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
|
||||
}
|
||||
|
||||
void nfs_block_sillyrename(struct dentry *dentry)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
|
||||
|
||||
wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
|
||||
}
|
||||
|
||||
void nfs_unblock_sillyrename(struct dentry *dentry)
|
||||
{
|
||||
struct inode *dir = d_inode(dentry);
|
||||
struct nfs_inode *nfsi = NFS_I(dir);
|
||||
struct nfs_unlinkdata *data;
|
||||
|
||||
atomic_inc(&nfsi->silly_count);
|
||||
wake_up(&nfsi->waitqueue);
|
||||
spin_lock(&dir->i_lock);
|
||||
while (!hlist_empty(&nfsi->silly_list)) {
|
||||
if (!atomic_inc_not_zero(&nfsi->silly_count))
|
||||
break;
|
||||
data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list);
|
||||
hlist_del(&data->list);
|
||||
spin_unlock(&dir->i_lock);
|
||||
if (nfs_do_call_unlink(dentry, dir, data) == 0)
|
||||
nfs_free_unlinkdata(data);
|
||||
spin_lock(&dir->i_lock);
|
||||
}
|
||||
spin_unlock(&dir->i_lock);
|
||||
data->dentry = alias;
|
||||
nfs_do_call_unlink(data);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -250,7 +162,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry)
|
|||
* @dentry: dentry to unlink
|
||||
*/
|
||||
static int
|
||||
nfs_async_unlink(struct inode *dir, struct dentry *dentry)
|
||||
nfs_async_unlink(struct dentry *dentry, struct qstr *name)
|
||||
{
|
||||
struct nfs_unlinkdata *data;
|
||||
int status = -ENOMEM;
|
||||
|
@ -259,13 +171,18 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
|
|||
data = kzalloc(sizeof(*data), GFP_KERNEL);
|
||||
if (data == NULL)
|
||||
goto out;
|
||||
data->args.name.name = kstrdup(name->name, GFP_KERNEL);
|
||||
if (!data->args.name.name)
|
||||
goto out_free;
|
||||
data->args.name.len = name->len;
|
||||
|
||||
data->cred = rpc_lookup_cred();
|
||||
if (IS_ERR(data->cred)) {
|
||||
status = PTR_ERR(data->cred);
|
||||
goto out_free;
|
||||
goto out_free_name;
|
||||
}
|
||||
data->res.dir_attr = &data->dir_attr;
|
||||
init_waitqueue_head(&data->wq);
|
||||
|
||||
status = -EBUSY;
|
||||
spin_lock(&dentry->d_lock);
|
||||
|
@ -285,6 +202,8 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
|
|||
out_unlock:
|
||||
spin_unlock(&dentry->d_lock);
|
||||
put_rpccred(data->cred);
|
||||
out_free_name:
|
||||
kfree(data->args.name.name);
|
||||
out_free:
|
||||
kfree(data);
|
||||
out:
|
||||
|
@ -303,17 +222,15 @@ out:
|
|||
void
|
||||
nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
|
||||
{
|
||||
struct nfs_unlinkdata *data = NULL;
|
||||
struct nfs_unlinkdata *data;
|
||||
|
||||
spin_lock(&dentry->d_lock);
|
||||
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
|
||||
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
|
||||
data = dentry->d_fsdata;
|
||||
dentry->d_fsdata = NULL;
|
||||
}
|
||||
spin_unlock(&dentry->d_lock);
|
||||
|
||||
if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
|
||||
if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))
|
||||
nfs_free_unlinkdata(data);
|
||||
}
|
||||
|
||||
|
@ -560,18 +477,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
|
|||
/* queue unlink first. Can't do this from rpc_release as it
|
||||
* has to allocate memory
|
||||
*/
|
||||
error = nfs_async_unlink(dir, dentry);
|
||||
error = nfs_async_unlink(dentry, &sdentry->d_name);
|
||||
if (error)
|
||||
goto out_dput;
|
||||
|
||||
/* populate unlinkdata with the right dname */
|
||||
error = nfs_copy_dname(sdentry,
|
||||
(struct nfs_unlinkdata *)dentry->d_fsdata);
|
||||
if (error) {
|
||||
nfs_cancel_async_unlink(dentry);
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
/* run the rename task, undo unlink if it fails */
|
||||
task = nfs_async_rename(dir, dir, dentry, sdentry,
|
||||
nfs_complete_sillyrename);
|
||||
|
|
|
@ -163,11 +163,9 @@ struct nfs_inode {
|
|||
/* Open contexts for shared mmap writes */
|
||||
struct list_head open_files;
|
||||
|
||||
/* Number of in-flight sillydelete RPC calls */
|
||||
atomic_t silly_count;
|
||||
/* List of deferred sillydelete requests */
|
||||
struct hlist_head silly_list;
|
||||
wait_queue_head_t waitqueue;
|
||||
/* Readers: in-flight sillydelete RPC calls */
|
||||
/* Writers: rmdir */
|
||||
struct rw_semaphore rmdir_sem;
|
||||
|
||||
#if IS_ENABLED(CONFIG_NFS_V4)
|
||||
struct nfs4_cached_acl *nfs4_acl;
|
||||
|
@ -492,9 +490,6 @@ extern void nfs_release_automount_timer(void);
|
|||
* linux/fs/nfs/unlink.c
|
||||
*/
|
||||
extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
|
||||
extern void nfs_wait_on_sillyrename(struct dentry *dentry);
|
||||
extern void nfs_block_sillyrename(struct dentry *dentry);
|
||||
extern void nfs_unblock_sillyrename(struct dentry *dentry);
|
||||
|
||||
/*
|
||||
* linux/fs/nfs/write.c
|
||||
|
|
|
@ -1468,10 +1468,10 @@ struct nfs_pgio_completion_ops {
|
|||
};
|
||||
|
||||
struct nfs_unlinkdata {
|
||||
struct hlist_node list;
|
||||
struct nfs_removeargs args;
|
||||
struct nfs_removeres res;
|
||||
struct inode *dir;
|
||||
struct dentry *dentry;
|
||||
wait_queue_head_t wq;
|
||||
struct rpc_cred *cred;
|
||||
struct nfs_fattr dir_attr;
|
||||
long timeout;
|
||||
|
|
Loading…
Reference in a new issue