Merge branch 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "A few more btrfs fixes. These range from corners Filipe found in the new free space cache writeback to a grab bag of fixes from the list" * 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: btrfs_release_extent_buffer_page didn't free pages of dummy extent Btrfs: fill ->last_trans for delayed inode in btrfs_fill_inode. btrfs: unlock i_mutex after attempting to delete subvolume during send btrfs: check io_ctl_prepare_pages return in __btrfs_write_out_cache btrfs: fix race on ENOMEM in alloc_extent_buffer btrfs: handle ENOMEM in btrfs_alloc_tree_block Btrfs: fix find_free_dev_extent() malfunction in case device tree has hole Btrfs: don't check for delalloc_bytes in cache_save_setup Btrfs: fix deadlock when starting writeback of bg caches Btrfs: fix race between start dirty bg cache writeout and bg deletion
This commit is contained in:
commit
64887b6882
7 changed files with 119 additions and 78 deletions
|
@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
|
|||
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
|
||||
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
|
||||
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
|
||||
BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
|
||||
|
||||
inode->i_version = btrfs_stack_inode_sequence(inode_item);
|
||||
inode->i_rdev = 0;
|
||||
*rdev = btrfs_stack_inode_rdev(inode_item);
|
||||
|
|
|
@ -3178,8 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
|
|||
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
|
||||
write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
btrfs_release_path(path);
|
||||
fail:
|
||||
btrfs_release_path(path);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
return ret;
|
||||
|
@ -3305,8 +3305,7 @@ again:
|
|||
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->cached != BTRFS_CACHE_FINISHED ||
|
||||
!btrfs_test_opt(root, SPACE_CACHE) ||
|
||||
block_group->delalloc_bytes) {
|
||||
!btrfs_test_opt(root, SPACE_CACHE)) {
|
||||
/*
|
||||
* don't bother trying to write stuff out _if_
|
||||
* a) we're not cached,
|
||||
|
@ -3408,17 +3407,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
|
|||
int loops = 0;
|
||||
|
||||
spin_lock(&cur_trans->dirty_bgs_lock);
|
||||
if (!list_empty(&cur_trans->dirty_bgs)) {
|
||||
list_splice_init(&cur_trans->dirty_bgs, &dirty);
|
||||
if (list_empty(&cur_trans->dirty_bgs)) {
|
||||
spin_unlock(&cur_trans->dirty_bgs_lock);
|
||||
return 0;
|
||||
}
|
||||
list_splice_init(&cur_trans->dirty_bgs, &dirty);
|
||||
spin_unlock(&cur_trans->dirty_bgs_lock);
|
||||
|
||||
again:
|
||||
if (list_empty(&dirty)) {
|
||||
btrfs_free_path(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* make sure all the block groups on our dirty list actually
|
||||
* exist
|
||||
|
@ -3431,18 +3427,16 @@ again:
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* cache_write_mutex is here only to save us from balance or automatic
|
||||
* removal of empty block groups deleting this block group while we are
|
||||
* writing out the cache
|
||||
*/
|
||||
mutex_lock(&trans->transaction->cache_write_mutex);
|
||||
while (!list_empty(&dirty)) {
|
||||
cache = list_first_entry(&dirty,
|
||||
struct btrfs_block_group_cache,
|
||||
dirty_list);
|
||||
|
||||
/*
|
||||
* cache_write_mutex is here only to save us from balance
|
||||
* deleting this block group while we are writing out the
|
||||
* cache
|
||||
*/
|
||||
mutex_lock(&trans->transaction->cache_write_mutex);
|
||||
|
||||
/*
|
||||
* this can happen if something re-dirties a block
|
||||
* group that is already under IO. Just wait for it to
|
||||
|
@ -3495,7 +3489,6 @@ again:
|
|||
}
|
||||
if (!ret)
|
||||
ret = write_one_cache_group(trans, root, path, cache);
|
||||
mutex_unlock(&trans->transaction->cache_write_mutex);
|
||||
|
||||
/* if its not on the io list, we need to put the block group */
|
||||
if (should_put)
|
||||
|
@ -3503,7 +3496,16 @@ again:
|
|||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Avoid blocking other tasks for too long. It might even save
|
||||
* us from writing caches for block groups that are going to be
|
||||
* removed.
|
||||
*/
|
||||
mutex_unlock(&trans->transaction->cache_write_mutex);
|
||||
mutex_lock(&trans->transaction->cache_write_mutex);
|
||||
}
|
||||
mutex_unlock(&trans->transaction->cache_write_mutex);
|
||||
|
||||
/*
|
||||
* go through delayed refs for all the stuff we've just kicked off
|
||||
|
@ -3514,8 +3516,15 @@ again:
|
|||
loops++;
|
||||
spin_lock(&cur_trans->dirty_bgs_lock);
|
||||
list_splice_init(&cur_trans->dirty_bgs, &dirty);
|
||||
/*
|
||||
* dirty_bgs_lock protects us from concurrent block group
|
||||
* deletes too (not just cache_write_mutex).
|
||||
*/
|
||||
if (!list_empty(&dirty)) {
|
||||
spin_unlock(&cur_trans->dirty_bgs_lock);
|
||||
goto again;
|
||||
}
|
||||
spin_unlock(&cur_trans->dirty_bgs_lock);
|
||||
goto again;
|
||||
}
|
||||
|
||||
btrfs_free_path(path);
|
||||
|
@ -7537,7 +7546,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
|
|||
* returns the key for the extent through ins, and a tree buffer for
|
||||
* the first block of the extent through buf.
|
||||
*
|
||||
* returns the tree buffer or NULL.
|
||||
* returns the tree buffer or an ERR_PTR on error.
|
||||
*/
|
||||
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
|
@ -7548,6 +7557,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_key ins;
|
||||
struct btrfs_block_rsv *block_rsv;
|
||||
struct extent_buffer *buf;
|
||||
struct btrfs_delayed_extent_op *extent_op;
|
||||
u64 flags = 0;
|
||||
int ret;
|
||||
u32 blocksize = root->nodesize;
|
||||
|
@ -7568,13 +7578,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|||
|
||||
ret = btrfs_reserve_extent(root, blocksize, blocksize,
|
||||
empty_size, hint, &ins, 0, 0);
|
||||
if (ret) {
|
||||
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
if (ret)
|
||||
goto out_unuse;
|
||||
|
||||
buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
|
||||
BUG_ON(IS_ERR(buf)); /* -ENOMEM */
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto out_free_reserved;
|
||||
}
|
||||
|
||||
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
|
||||
if (parent == 0)
|
||||
|
@ -7584,9 +7595,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|||
BUG_ON(parent > 0);
|
||||
|
||||
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
|
||||
struct btrfs_delayed_extent_op *extent_op;
|
||||
extent_op = btrfs_alloc_delayed_extent_op();
|
||||
BUG_ON(!extent_op); /* -ENOMEM */
|
||||
if (!extent_op) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_buf;
|
||||
}
|
||||
if (key)
|
||||
memcpy(&extent_op->key, key, sizeof(extent_op->key));
|
||||
else
|
||||
|
@ -7601,13 +7614,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|||
extent_op->level = level;
|
||||
|
||||
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
|
||||
ins.objectid,
|
||||
ins.offset, parent, root_objectid,
|
||||
level, BTRFS_ADD_DELAYED_EXTENT,
|
||||
extent_op, 0);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
ins.objectid, ins.offset,
|
||||
parent, root_objectid, level,
|
||||
BTRFS_ADD_DELAYED_EXTENT,
|
||||
extent_op, 0);
|
||||
if (ret)
|
||||
goto out_free_delayed;
|
||||
}
|
||||
return buf;
|
||||
|
||||
out_free_delayed:
|
||||
btrfs_free_delayed_extent_op(extent_op);
|
||||
out_free_buf:
|
||||
free_extent_buffer(buf);
|
||||
out_free_reserved:
|
||||
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
|
||||
out_unuse:
|
||||
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
struct walk_control {
|
||||
|
|
|
@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
|
|||
do {
|
||||
index--;
|
||||
page = eb->pages[index];
|
||||
if (page && mapped) {
|
||||
if (!page)
|
||||
continue;
|
||||
if (mapped)
|
||||
spin_lock(&page->mapping->private_lock);
|
||||
/*
|
||||
* We do this since we'll remove the pages after we've
|
||||
* removed the eb from the radix tree, so we could race
|
||||
* and have this page now attached to the new eb. So
|
||||
* only clear page_private if it's still connected to
|
||||
* this eb.
|
||||
*/
|
||||
if (PagePrivate(page) &&
|
||||
page->private == (unsigned long)eb) {
|
||||
BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
||||
BUG_ON(PageDirty(page));
|
||||
BUG_ON(PageWriteback(page));
|
||||
/*
|
||||
* We do this since we'll remove the pages after we've
|
||||
* removed the eb from the radix tree, so we could race
|
||||
* and have this page now attached to the new eb. So
|
||||
* only clear page_private if it's still connected to
|
||||
* this eb.
|
||||
* We need to make sure we haven't be attached
|
||||
* to a new eb.
|
||||
*/
|
||||
if (PagePrivate(page) &&
|
||||
page->private == (unsigned long)eb) {
|
||||
BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
||||
BUG_ON(PageDirty(page));
|
||||
BUG_ON(PageWriteback(page));
|
||||
/*
|
||||
* We need to make sure we haven't be attached
|
||||
* to a new eb.
|
||||
*/
|
||||
ClearPagePrivate(page);
|
||||
set_page_private(page, 0);
|
||||
/* One for the page private */
|
||||
page_cache_release(page);
|
||||
}
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
|
||||
}
|
||||
if (page) {
|
||||
/* One for when we alloced the page */
|
||||
ClearPagePrivate(page);
|
||||
set_page_private(page, 0);
|
||||
/* One for the page private */
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
if (mapped)
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
|
||||
/* One for when we alloced the page */
|
||||
page_cache_release(page);
|
||||
} while (index != 0);
|
||||
}
|
||||
|
||||
|
@ -4870,6 +4871,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
|||
mark_extent_buffer_accessed(exists, p);
|
||||
goto free_eb;
|
||||
}
|
||||
exists = NULL;
|
||||
|
||||
/*
|
||||
* Do this so attach doesn't complain and we need to
|
||||
|
@ -4933,12 +4935,12 @@ again:
|
|||
return eb;
|
||||
|
||||
free_eb:
|
||||
WARN_ON(!atomic_dec_and_test(&eb->refs));
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
if (eb->pages[i])
|
||||
unlock_page(eb->pages[i]);
|
||||
}
|
||||
|
||||
WARN_ON(!atomic_dec_and_test(&eb->refs));
|
||||
btrfs_release_extent_buffer(eb);
|
||||
return exists;
|
||||
}
|
||||
|
|
|
@ -1218,7 +1218,7 @@ out:
|
|||
*
|
||||
* This function writes out a free space cache struct to disk for quick recovery
|
||||
* on mount. This will return 0 if it was successfull in writing the cache out,
|
||||
* and -1 if it was not.
|
||||
* or an errno if it was not.
|
||||
*/
|
||||
static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||
struct btrfs_free_space_ctl *ctl,
|
||||
|
@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
|||
int must_iput = 0;
|
||||
|
||||
if (!i_size_read(inode))
|
||||
return -1;
|
||||
return -EIO;
|
||||
|
||||
WARN_ON(io_ctl->pages);
|
||||
ret = io_ctl_init(io_ctl, inode, root, 1);
|
||||
if (ret)
|
||||
return -1;
|
||||
return ret;
|
||||
|
||||
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
|
||||
down_write(&block_group->data_rwsem);
|
||||
|
@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
|||
}
|
||||
|
||||
/* Lock all pages first so we can lock the extent safely. */
|
||||
io_ctl_prepare_pages(io_ctl, inode, 0);
|
||||
ret = io_ctl_prepare_pages(io_ctl, inode, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
|
||||
0, &cached_state);
|
||||
|
|
|
@ -3632,16 +3632,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
|
|||
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
|
||||
BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
|
||||
|
||||
/*
|
||||
* If we were modified in the current generation and evicted from memory
|
||||
* and then re-read we need to do a full sync since we don't have any
|
||||
* idea about which extents were modified before we were evicted from
|
||||
* cache.
|
||||
*/
|
||||
if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
|
||||
inode->i_version = btrfs_inode_sequence(leaf, inode_item);
|
||||
inode->i_generation = BTRFS_I(inode)->generation;
|
||||
inode->i_rdev = 0;
|
||||
|
@ -3651,6 +3641,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
|
|||
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
|
||||
|
||||
cache_index:
|
||||
/*
|
||||
* If we were modified in the current generation and evicted from memory
|
||||
* and then re-read we need to do a full sync since we don't have any
|
||||
* idea about which extents were modified before we were evicted from
|
||||
* cache.
|
||||
*
|
||||
* This is required for both inode re-read from disk and delayed inode
|
||||
* in delayed_nodes_tree.
|
||||
*/
|
||||
if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
|
||||
path->slots[0]++;
|
||||
if (inode->i_nlink != 1 ||
|
||||
path->slots[0] >= btrfs_header_nritems(leaf))
|
||||
|
|
|
@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
|
|||
"Attempt to delete subvolume %llu during send",
|
||||
dest->root_key.objectid);
|
||||
err = -EPERM;
|
||||
goto out_dput;
|
||||
goto out_unlock_inode;
|
||||
}
|
||||
|
||||
d_invalidate(dentry);
|
||||
|
@ -2505,6 +2505,7 @@ out_up_write:
|
|||
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
|
||||
spin_unlock(&dest->root_item_lock);
|
||||
}
|
||||
out_unlock_inode:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (!err) {
|
||||
shrink_dcache_sb(root->fs_info->sb);
|
||||
|
|
|
@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
|
|||
struct extent_map *em;
|
||||
struct list_head *search_list = &trans->transaction->pending_chunks;
|
||||
int ret = 0;
|
||||
u64 physical_start = *start;
|
||||
|
||||
again:
|
||||
list_for_each_entry(em, search_list, list) {
|
||||
|
@ -1068,9 +1069,9 @@ again:
|
|||
for (i = 0; i < map->num_stripes; i++) {
|
||||
if (map->stripes[i].dev != device)
|
||||
continue;
|
||||
if (map->stripes[i].physical >= *start + len ||
|
||||
if (map->stripes[i].physical >= physical_start + len ||
|
||||
map->stripes[i].physical + em->orig_block_len <=
|
||||
*start)
|
||||
physical_start)
|
||||
continue;
|
||||
*start = map->stripes[i].physical +
|
||||
em->orig_block_len;
|
||||
|
@ -1193,8 +1194,14 @@ again:
|
|||
*/
|
||||
if (contains_pending_extent(trans, device,
|
||||
&search_start,
|
||||
hole_size))
|
||||
hole_size = 0;
|
||||
hole_size)) {
|
||||
if (key.offset >= search_start) {
|
||||
hole_size = key.offset - search_start;
|
||||
} else {
|
||||
WARN_ON_ONCE(1);
|
||||
hole_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (hole_size > max_hole_size) {
|
||||
max_hole_start = search_start;
|
||||
|
|
Loading…
Reference in a new issue