fs/file.c: don't acquire files->file_lock in fd_install()
Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Mateusz Guzik <mguzik@redhat.com> Acked-by: Mateusz Guzik <mguzik@redhat.com> Tested-by: Mateusz Guzik <mguzik@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
1af95de6f0
commit
8a81252b77
3 changed files with 55 additions and 19 deletions
|
@ -500,3 +500,7 @@ in your dentry operations instead.
|
|||
dentry, it does not get nameidata at all and it gets called only when cookie
|
||||
is non-NULL. Note that link body isn't available anymore, so if you need it,
|
||||
store it as cookie.
|
||||
--
|
||||
[mandatory]
|
||||
__fd_install() & fd_install() can now sleep. Callers should not
|
||||
hold a spinlock or other resources that do not allow a schedule.
|
||||
|
|
67
fs/file.c
67
fs/file.c
|
@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr)
|
|||
|
||||
spin_unlock(&files->file_lock);
|
||||
new_fdt = alloc_fdtable(nr);
|
||||
|
||||
/* make sure all __fd_install() have seen resize_in_progress
|
||||
* or have finished their rcu_read_lock_sched() section.
|
||||
*/
|
||||
if (atomic_read(&files->count) > 1)
|
||||
synchronize_sched();
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
if (!new_fdt)
|
||||
return -ENOMEM;
|
||||
|
@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr)
|
|||
__free_fdtable(new_fdt);
|
||||
return -EMFILE;
|
||||
}
|
||||
/*
|
||||
* Check again since another task may have expanded the fd table while
|
||||
* we dropped the lock
|
||||
*/
|
||||
cur_fdt = files_fdtable(files);
|
||||
if (nr >= cur_fdt->max_fds) {
|
||||
/* Continue as planned */
|
||||
copy_fdtable(new_fdt, cur_fdt);
|
||||
rcu_assign_pointer(files->fdt, new_fdt);
|
||||
if (cur_fdt != &files->fdtab)
|
||||
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
|
||||
} else {
|
||||
/* Somebody else expanded, so undo our attempt */
|
||||
__free_fdtable(new_fdt);
|
||||
}
|
||||
BUG_ON(nr < cur_fdt->max_fds);
|
||||
copy_fdtable(new_fdt, cur_fdt);
|
||||
rcu_assign_pointer(files->fdt, new_fdt);
|
||||
if (cur_fdt != &files->fdtab)
|
||||
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
|
||||
/* coupled with smp_rmb() in __fd_install() */
|
||||
smp_wmb();
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr)
|
|||
* The files->file_lock should be held on entry, and will be held on exit.
|
||||
*/
|
||||
static int expand_files(struct files_struct *files, int nr)
|
||||
__releases(files->file_lock)
|
||||
__acquires(files->file_lock)
|
||||
{
|
||||
struct fdtable *fdt;
|
||||
int expanded = 0;
|
||||
|
||||
repeat:
|
||||
fdt = files_fdtable(files);
|
||||
|
||||
/* Do we need to expand? */
|
||||
if (nr < fdt->max_fds)
|
||||
return 0;
|
||||
return expanded;
|
||||
|
||||
/* Can we expand? */
|
||||
if (nr >= sysctl_nr_open)
|
||||
return -EMFILE;
|
||||
|
||||
if (unlikely(files->resize_in_progress)) {
|
||||
spin_unlock(&files->file_lock);
|
||||
expanded = 1;
|
||||
wait_event(files->resize_wait, !files->resize_in_progress);
|
||||
spin_lock(&files->file_lock);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* All good, so we try */
|
||||
return expand_fdtable(files, nr);
|
||||
files->resize_in_progress = true;
|
||||
expanded = expand_fdtable(files, nr);
|
||||
files->resize_in_progress = false;
|
||||
|
||||
wake_up_all(&files->resize_wait);
|
||||
return expanded;
|
||||
}
|
||||
|
||||
static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
|
||||
|
@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
|||
atomic_set(&newf->count, 1);
|
||||
|
||||
spin_lock_init(&newf->file_lock);
|
||||
newf->resize_in_progress = false;
|
||||
init_waitqueue_head(&newf->resize_wait);
|
||||
newf->next_fd = 0;
|
||||
new_fdt = &newf->fdtab;
|
||||
new_fdt->max_fds = NR_OPEN_DEFAULT;
|
||||
|
@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd,
|
|||
struct file *file)
|
||||
{
|
||||
struct fdtable *fdt;
|
||||
spin_lock(&files->file_lock);
|
||||
fdt = files_fdtable(files);
|
||||
|
||||
might_sleep();
|
||||
rcu_read_lock_sched();
|
||||
|
||||
while (unlikely(files->resize_in_progress)) {
|
||||
rcu_read_unlock_sched();
|
||||
wait_event(files->resize_wait, !files->resize_in_progress);
|
||||
rcu_read_lock_sched();
|
||||
}
|
||||
/* coupled with smp_wmb() in expand_fdtable() */
|
||||
smp_rmb();
|
||||
fdt = rcu_dereference_sched(files->fdt);
|
||||
BUG_ON(fdt->fd[fd] != NULL);
|
||||
rcu_assign_pointer(fdt->fd[fd], file);
|
||||
spin_unlock(&files->file_lock);
|
||||
rcu_read_unlock_sched();
|
||||
}
|
||||
|
||||
void fd_install(unsigned int fd, struct file *file)
|
||||
|
|
|
@ -47,6 +47,9 @@ struct files_struct {
|
|||
* read mostly part
|
||||
*/
|
||||
atomic_t count;
|
||||
bool resize_in_progress;
|
||||
wait_queue_head_t resize_wait;
|
||||
|
||||
struct fdtable __rcu *fdt;
|
||||
struct fdtable fdtab;
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue