Btrfs: fix race between scrub and block group deletion
Scrub can race with the cleaner kthread deleting block groups that are unused (and with relocation too) leading to a failure with error -EINVAL that gets returned to user space. The following diagram illustrates how it happens: CPU 1 CPU 2 cleaner kthread btrfs_delete_unused_bgs() gets block group X from fs_info->unused_bgs sets block group to RO btrfs_remove_chunk(bg X) deletes device extents scrub_enumerate_chunks() searches device tree using its commit root finds device extent for block group X gets block group X from the tree fs_info->block_group_cache_tree (via btrfs_lookup_block_group()) sets bg X to RO (again) btrfs_remove_block_group(bg X) deletes block group from fs_info->block_group_cache_tree removes extent map from fs_info->mapping_tree scrub_chunk(offset X) searches fs_info->mapping_tree for extent map starting at offset X --> doesn't find any such extent map --> returns -EINVAL and scrub errors out to userspace with -EINVAL Fix this by dealing with an extent map lookup failure as an indicator of block group deletion. Issue reproduced with fstest btrfs/071. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
31388ab2ed
commit
020d5b7366
1 changed files with 16 additions and 4 deletions
|
@ -3432,7 +3432,9 @@ out:
|
|||
static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
|
||||
struct btrfs_device *scrub_dev,
|
||||
u64 chunk_offset, u64 length,
|
||||
u64 dev_offset, int is_dev_replace)
|
||||
u64 dev_offset,
|
||||
struct btrfs_block_group_cache *cache,
|
||||
int is_dev_replace)
|
||||
{
|
||||
struct btrfs_mapping_tree *map_tree =
|
||||
&sctx->dev_root->fs_info->mapping_tree;
|
||||
|
@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
|
|||
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
|
||||
read_unlock(&map_tree->map_tree.lock);
|
||||
|
||||
if (!em)
|
||||
return -EINVAL;
|
||||
if (!em) {
|
||||
/*
|
||||
* Might have been an unused block group deleted by the cleaner
|
||||
* kthread or relocation.
|
||||
*/
|
||||
spin_lock(&cache->lock);
|
||||
if (!cache->removed)
|
||||
ret = -EINVAL;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
map = (struct map_lookup *)em->bdev;
|
||||
if (em->start != chunk_offset)
|
||||
|
@ -3592,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
|||
dev_replace->cursor_left = found_key.offset;
|
||||
dev_replace->item_needs_writeback = 1;
|
||||
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
|
||||
found_key.offset, is_dev_replace);
|
||||
found_key.offset, cache, is_dev_replace);
|
||||
|
||||
/*
|
||||
* flush, submit all pending read and write bios, afterwards
|
||||
|
|
Loading…
Reference in a new issue