Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs fixes from Al Viro:
 "dcache fixes + kvfree() (uninlined, exported by mm/util.c) + posix_acl
  bugfix from hch"

The dcache fixes are for a subtle LRU list corruption bug reported by
Miklos Szeredi, where people inside IBM saw list corruptions with the
LTP/host01 test.

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  nick kvfree() from apparmor
  posix_acl: handle NULL ACL in posix_acl_equiv_mode
  dcache: don't need rcu in shrink_dentry_list()
  more graceful recovery in umount_collect()
  don't remove from shrink list in select_collect()
  dentry_kill(): don't try to remove from shrink list
  expand the call of dentry_lru_del() in dentry_kill()
  new helper: dentry_free()
  fold try_prune_one_dentry()
  fold d_kill() and d_free()
  fix races between __d_instantiate() and checks of dentry flags
This commit is contained in:
Linus Torvalds 2014-05-06 12:22:20 -07:00
commit 8169d3005e
8 changed files with 125 additions and 234 deletions

View file

@ -246,16 +246,8 @@ static void __d_free(struct rcu_head *head)
kmem_cache_free(dentry_cache, dentry);
}
/*
* no locks, please.
*/
static void d_free(struct dentry *dentry)
static void dentry_free(struct dentry *dentry)
{
BUG_ON((int)dentry->d_lockref.count > 0);
this_cpu_dec(nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
/* if dentry was never visible to RCU, immediate free is OK */
if (!(dentry->d_flags & DCACHE_RCUACCESS))
__d_free(&dentry->d_u.d_rcu);
@ -403,56 +395,6 @@ static void dentry_lru_add(struct dentry *dentry)
d_lru_add(dentry);
}
/*
* Remove a dentry with references from the LRU.
*
* If we are on the shrink list, then we can get to try_prune_one_dentry() and
* lose our last reference through the parent walk. In this case, we need to
* remove ourselves from the shrink list, not the LRU.
*/
static void dentry_lru_del(struct dentry *dentry)
{
if (dentry->d_flags & DCACHE_LRU_LIST) {
if (dentry->d_flags & DCACHE_SHRINK_LIST)
return d_shrink_del(dentry);
d_lru_del(dentry);
}
}
/**
* d_kill - kill dentry and return parent
* @dentry: dentry to kill
* @parent: parent dentry
*
* The dentry must already be unhashed and removed from the LRU.
*
* If this is the root of the dentry tree, return NULL.
*
* dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
* d_kill.
*/
static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(dentry->d_lock)
__releases(parent->d_lock)
__releases(dentry->d_inode->i_lock)
{
list_del(&dentry->d_u.d_child);
/*
* Inform d_walk() that we are no longer attached to the
* dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
/*
* dentry_iput drops the locks, at which point nobody (except
* transient RCU lookups) can reach this dentry.
*/
d_free(dentry);
return parent;
}
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
@ -510,7 +452,14 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure)
__releases(dentry->d_lock)
{
struct inode *inode;
struct dentry *parent;
struct dentry *parent = NULL;
bool can_free = true;
if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
can_free = dentry->d_flags & DCACHE_MAY_FREE;
spin_unlock(&dentry->d_lock);
goto out;
}
inode = dentry->d_inode;
if (inode && !spin_trylock(&inode->i_lock)) {
@ -521,9 +470,7 @@ relock:
}
return dentry; /* try again with same dentry */
}
if (IS_ROOT(dentry))
parent = NULL;
else
if (!IS_ROOT(dentry))
parent = dentry->d_parent;
if (parent && !spin_trylock(&parent->d_lock)) {
if (inode)
@ -543,10 +490,40 @@ relock:
if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
dentry->d_op->d_prune(dentry);
dentry_lru_del(dentry);
if (dentry->d_flags & DCACHE_LRU_LIST) {
if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
d_lru_del(dentry);
}
/* if it was on the hash then remove it */
__d_drop(dentry);
return d_kill(dentry, parent);
list_del(&dentry->d_u.d_child);
/*
* Inform d_walk() that we are no longer attached to the
* dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
/*
* dentry_iput drops the locks, at which point nobody (except
* transient RCU lookups) can reach this dentry.
*/
BUG_ON((int)dentry->d_lockref.count > 0);
this_cpu_dec(nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
spin_lock(&dentry->d_lock);
if (dentry->d_flags & DCACHE_SHRINK_LIST) {
dentry->d_flags |= DCACHE_MAY_FREE;
can_free = false;
}
spin_unlock(&dentry->d_lock);
out:
if (likely(can_free))
dentry_free(dentry);
return parent;
}
/*
@ -815,65 +792,13 @@ restart:
}
EXPORT_SYMBOL(d_prune_aliases);
/*
* Try to throw away a dentry - free the inode, dput the parent.
* Requires dentry->d_lock is held, and dentry->d_count == 0.
* Releases dentry->d_lock.
*
* This may fail if locks cannot be acquired no problem, just try again.
*/
static struct dentry * try_prune_one_dentry(struct dentry *dentry)
__releases(dentry->d_lock)
{
struct dentry *parent;
parent = dentry_kill(dentry, 0);
/*
* If dentry_kill returns NULL, we have nothing more to do.
* if it returns the same dentry, trylocks failed. In either
* case, just loop again.
*
* Otherwise, we need to prune ancestors too. This is necessary
* to prevent quadratic behavior of shrink_dcache_parent(), but
* is also expected to be beneficial in reducing dentry cache
* fragmentation.
*/
if (!parent)
return NULL;
if (parent == dentry)
return dentry;
/* Prune ancestors. */
dentry = parent;
while (dentry) {
if (lockref_put_or_lock(&dentry->d_lockref))
return NULL;
dentry = dentry_kill(dentry, 1);
}
return NULL;
}
static void shrink_dentry_list(struct list_head *list)
{
struct dentry *dentry;
struct dentry *dentry, *parent;
rcu_read_lock();
for (;;) {
dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
if (&dentry->d_lru == list)
break; /* empty */
/*
* Get the dentry lock, and re-verify that the dentry is
* this on the shrinking list. If it is, we know that
* DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set.
*/
while (!list_empty(list)) {
dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock);
if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
spin_unlock(&dentry->d_lock);
continue;
}
/*
* The dispose list is isolated and dentries are not accounted
* to the LRU here, so we can simply remove it from the list
@ -885,30 +810,38 @@ static void shrink_dentry_list(struct list_head *list)
* We found an inuse dentry which was not removed from
* the LRU because of laziness during lookup. Do not free it.
*/
if (dentry->d_lockref.count) {
if ((int)dentry->d_lockref.count > 0) {
spin_unlock(&dentry->d_lock);
continue;
}
rcu_read_unlock();
parent = dentry_kill(dentry, 0);
/*
* If 'try_to_prune()' returns a dentry, it will
* be the same one we passed in, and d_lock will
* have been held the whole time, so it will not
* have been added to any other lists. We failed
* to get the inode lock.
*
* We just add it back to the shrink list.
* If dentry_kill returns NULL, we have nothing more to do.
*/
dentry = try_prune_one_dentry(dentry);
if (!parent)
continue;
rcu_read_lock();
if (dentry) {
if (unlikely(parent == dentry)) {
/*
* trylocks have failed and d_lock has been held the
* whole time, so it could not have been added to any
* other lists. Just add it back to the shrink list.
*/
d_shrink_add(dentry, list);
spin_unlock(&dentry->d_lock);
continue;
}
/*
* We need to prune ancestors too. This is necessary to prevent
* quadratic behavior of shrink_dcache_parent(), but is also
* expected to be beneficial in reducing dentry cache
* fragmentation.
*/
dentry = parent;
while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
dentry = dentry_kill(dentry, 1);
}
rcu_read_unlock();
}
static enum lru_status
@ -1261,34 +1194,23 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
if (data->start == dentry)
goto out;
/*
* move only zero ref count dentries to the dispose list.
*
* Those which are presently on the shrink list, being processed
* by shrink_dentry_list(), shouldn't be moved. Otherwise the
* loop in shrink_dcache_parent() might not make any progress
* and loop forever.
*/
if (dentry->d_lockref.count) {
dentry_lru_del(dentry);
} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
/*
* We can't use d_lru_shrink_move() because we
* need to get the global LRU lock and do the
* LRU accounting.
*/
d_lru_del(dentry);
d_shrink_add(dentry, &data->dispose);
if (dentry->d_flags & DCACHE_SHRINK_LIST) {
data->found++;
ret = D_WALK_NORETRY;
} else {
if (dentry->d_flags & DCACHE_LRU_LIST)
d_lru_del(dentry);
if (!dentry->d_lockref.count) {
d_shrink_add(dentry, &data->dispose);
data->found++;
}
}
/*
* We can return to the caller if we have found some (this
* ensures forward progress). We'll be coming back to find
* the rest.
*/
if (data->found && need_resched())
ret = D_WALK_QUIT;
if (!list_empty(&data->dispose))
ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
out:
return ret;
}
@ -1318,45 +1240,35 @@ void shrink_dcache_parent(struct dentry *parent)
}
EXPORT_SYMBOL(shrink_dcache_parent);
static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
{
struct select_data *data = _data;
enum d_walk_ret ret = D_WALK_CONTINUE;
/* it has busy descendents; complain about those instead */
if (!list_empty(&dentry->d_subdirs))
return D_WALK_CONTINUE;
if (dentry->d_lockref.count) {
dentry_lru_del(dentry);
if (likely(!list_empty(&dentry->d_subdirs)))
goto out;
if (dentry == data->start && dentry->d_lockref.count == 1)
goto out;
printk(KERN_ERR
"BUG: Dentry %p{i=%lx,n=%s}"
" still in use (%d)"
" [unmount of %s %s]\n",
/* root with refcount 1 is fine */
if (dentry == _data && dentry->d_lockref.count == 1)
return D_WALK_CONTINUE;
printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
" still in use (%d) [unmount of %s %s]\n",
dentry,
dentry->d_inode ?
dentry->d_inode->i_ino : 0UL,
dentry->d_name.name,
dentry,
dentry->d_lockref.count,
dentry->d_sb->s_type->name,
dentry->d_sb->s_id);
BUG();
} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
/*
* We can't use d_lru_shrink_move() because we
* need to get the global LRU lock and do the
* LRU accounting.
*/
if (dentry->d_flags & DCACHE_LRU_LIST)
d_lru_del(dentry);
d_shrink_add(dentry, &data->dispose);
data->found++;
ret = D_WALK_NORETRY;
}
out:
if (data->found && need_resched())
ret = D_WALK_QUIT;
return ret;
WARN_ON(1);
return D_WALK_CONTINUE;
}
static void do_one_tree(struct dentry *dentry)
{
shrink_dcache_parent(dentry);
d_walk(dentry, dentry, umount_check, NULL);
d_drop(dentry);
dput(dentry);
}
/*
@ -1366,40 +1278,15 @@ void shrink_dcache_for_umount(struct super_block *sb)
{
struct dentry *dentry;
if (down_read_trylock(&sb->s_umount))
BUG();
WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
dentry = sb->s_root;
sb->s_root = NULL;
for (;;) {
struct select_data data;
INIT_LIST_HEAD(&data.dispose);
data.start = dentry;
data.found = 0;
d_walk(dentry, &data, umount_collect, NULL);
if (!data.found)
break;
shrink_dentry_list(&data.dispose);
cond_resched();
}
d_drop(dentry);
dput(dentry);
do_one_tree(dentry);
while (!hlist_bl_empty(&sb->s_anon)) {
struct select_data data;
dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
INIT_LIST_HEAD(&data.dispose);
data.start = NULL;
data.found = 0;
d_walk(dentry, &data, umount_collect, NULL);
if (data.found)
shrink_dentry_list(&data.dispose);
cond_resched();
dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash));
do_one_tree(dentry);
}
}
@ -1647,8 +1534,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
unsigned add_flags = d_flags_for_inode(inode);
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
dentry->d_flags |= add_flags;
__d_set_type(dentry, add_flags);
if (inode)
hlist_add_head(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;

View file

@ -1542,7 +1542,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
inode = path->dentry->d_inode;
}
err = -ENOENT;
if (!inode)
if (!inode || d_is_negative(path->dentry))
goto out_path_put;
if (should_follow_link(path->dentry, follow)) {
@ -2249,7 +2249,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
mutex_unlock(&dir->d_inode->i_mutex);
done:
if (!dentry->d_inode) {
if (!dentry->d_inode || d_is_negative(dentry)) {
error = -ENOENT;
dput(dentry);
goto out;
@ -2994,7 +2994,7 @@ retry_lookup:
finish_lookup:
/* we _can_ be in RCU mode here */
error = -ENOENT;
if (d_is_negative(path->dentry)) {
if (!inode || d_is_negative(path->dentry)) {
path_to_nameidata(path, nd);
goto out;
}

View file

@ -246,6 +246,12 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
umode_t mode = 0;
int not_equiv = 0;
/*
* A null ACL can always be presented as mode bits.
*/
if (!acl)
return 0;
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch (pa->e_tag) {
case ACL_USER_OBJ:

View file

@ -221,6 +221,8 @@ struct dentry_operations {
#define DCACHE_SYMLINK_TYPE 0x00300000 /* Symlink */
#define DCACHE_FILE_TYPE 0x00400000 /* Other file type */
#define DCACHE_MAY_FREE 0x00800000
extern seqlock_t rename_lock;
static inline int dname_external(const struct dentry *dentry)

View file

@ -370,6 +370,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
}
#endif
extern void kvfree(const void *addr);
static inline void compound_lock(struct page *page)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE

View file

@ -10,6 +10,7 @@
#include <linux/swapops.h>
#include <linux/mman.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
@ -387,6 +388,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
}
EXPORT_SYMBOL(vm_mmap);
void kvfree(const void *addr)
{
if (is_vmalloc_addr(addr))
vfree(addr);
else
kfree(addr);
}
EXPORT_SYMBOL(kvfree);
struct address_space *page_mapping(struct page *page)
{
struct address_space *mapping = page->mapping;

View file

@ -66,7 +66,6 @@ extern int apparmor_initialized __initdata;
char *aa_split_fqname(char *args, char **ns_name);
void aa_info_message(const char *str);
void *__aa_kvmalloc(size_t size, gfp_t flags);
void kvfree(void *buffer);
static inline void *kvmalloc(size_t size)
{

View file

@ -104,17 +104,3 @@ void *__aa_kvmalloc(size_t size, gfp_t flags)
}
return buffer;
}
/**
* kvfree - free an allocation do by kvmalloc
* @buffer: buffer to free (MAYBE_NULL)
*
* Free a buffer allocated by kvmalloc
*/
void kvfree(void *buffer)
{
if (is_vmalloc_addr(buffer))
vfree(buffer);
else
kfree(buffer);
}