Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: fix fiemap bugs with delalloc
  Btrfs: set FMODE_EXCL in btrfs_device->mode
  Btrfs: make btrfs_rm_device() fail gracefully
  Btrfs: Avoid accessing unmapped kernel address
  Btrfs: Fix BTRFS_IOC_SUBVOL_SETFLAGS ioctl
  Btrfs: allow balance to explicitly allocate chunks as it relocates
  Btrfs: put ENOSPC debugging under a mount option
This commit is contained in:
Linus Torvalds 2011-02-25 14:03:39 -08:00
commit 4660ba63f1
10 changed files with 282 additions and 57 deletions

View file

@ -1254,6 +1254,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@ -2218,6 +2219,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,

View file

@ -5376,7 +5376,7 @@ again:
num_bytes, data, 1);
goto again;
}
if (ret == -ENOSPC) {
if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
@ -8065,6 +8065,13 @@ out:
return ret;
}
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type)
{
u64 alloc_flags = get_alloc_profile(root, type);
return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
}
/*
* helper to account the unused space of all the readonly block group in the
* list. takes mirrors into account.

View file

@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
unsigned long bits)
unsigned long bits, int contig)
{
struct rb_node *node;
struct extent_state *state;
u64 cur_start = *start;
u64 total_bytes = 0;
u64 last = 0;
int found = 0;
if (search_end <= cur_start) {
@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
state = rb_entry(node, struct extent_state, rb_node);
if (state->start > search_end)
break;
if (state->end >= cur_start && (state->state & bits)) {
if (contig && found && state->start > last + 1)
break;
if (state->end >= cur_start && (state->state & bits) == bits) {
total_bytes += min(search_end, state->end) + 1 -
max(cur_start, state->start);
if (total_bytes >= max_bytes)
@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
*start = state->start;
found = 1;
}
last = state->end;
} else if (contig && found) {
break;
}
node = rb_next(node);
if (!node)
@ -2912,6 +2918,46 @@ out:
return sector;
}
/*
* helper function for fiemap, which doesn't want to see any holes.
* This maps until we find something past 'last'
*/
static struct extent_map *get_extent_skip_holes(struct inode *inode,
u64 offset,
u64 last,
get_extent_t *get_extent)
{
u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
struct extent_map *em;
u64 len;
if (offset >= last)
return NULL;
while(1) {
len = last - offset;
if (len == 0)
break;
len = (len + sectorsize - 1) & ~(sectorsize - 1);
em = get_extent(inode, NULL, 0, offset, len, 0);
if (!em || IS_ERR(em))
return em;
/* if this isn't a hole return it */
if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
em->block_start != EXTENT_MAP_HOLE) {
return em;
}
/* this is a hole, advance to the next extent */
offset = extent_map_end(em);
free_extent_map(em);
if (offset >= last)
break;
}
return NULL;
}
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u32 flags = 0;
u32 found_type;
u64 last;
u64 last_for_get_extent = 0;
u64 disko = 0;
u64 isize = i_size_read(inode);
struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_file_extent_item *item;
int end = 0;
u64 em_start = 0, em_len = 0;
u64 em_start = 0;
u64 em_len = 0;
u64 em_end = 0;
unsigned long emflags;
int hole = 0;
if (len == 0)
return -EINVAL;
@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
/*
* lookup the last file extent. We're not using i_size here
* because there might be preallocation past i_size
*/
ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
path, inode->i_ino, -1, 0);
if (ret < 0) {
@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
/* No extents, just return */
/* No extents, but there might be delalloc bits */
if (found_key.objectid != inode->i_ino ||
found_type != BTRFS_EXTENT_DATA_KEY) {
btrfs_free_path(path);
return 0;
/* have to trust i_size as the end */
last = (u64)-1;
last_for_get_extent = isize;
} else {
/*
* remember the start of the last extent. There are a
* bunch of different factors that go into the length of the
* extent, so its much less complex to remember where it started
*/
last = found_key.offset;
last_for_get_extent = last + 1;
}
last = found_key.offset;
btrfs_free_path(path);
/*
* we might have some extents allocated but more delalloc past those
* extents. so, we trust isize unless the start of the last extent is
* beyond isize
*/
if (last < isize) {
last = (u64)-1;
last_for_get_extent = isize;
}
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
em = get_extent(inode, NULL, 0, off, max - off, 0);
em = get_extent_skip_holes(inode, off, last_for_get_extent,
get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@ -2973,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
hole = 0;
off = em->start + em->len;
off = extent_map_end(em);
if (off >= max)
end = 1;
if (em->block_start == EXTENT_MAP_HOLE) {
hole = 1;
goto next;
}
em_start = em->start;
em_len = em->len;
em_end = extent_map_end(em);
emflags = em->flags;
disko = 0;
flags = 0;
@ -3004,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
next:
emflags = em->flags;
free_extent_map(em);
em = NULL;
if (!end) {
em = get_extent(inode, NULL, 0, off, max - off, 0);
if (!em)
goto out;
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
}
emflags = em->flags;
}
if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
if ((em_start >= last) || em_len == (u64)-1 ||
(last == (u64)-1 && isize <= em_end)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
if (em_start == last) {
/* now scan forward to see if this is really the last extent. */
em = get_extent_skip_holes(inode, off, last_for_get_extent,
get_extent);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
}
if (!em) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
if (!hole) {
ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
em_len, flags);
if (ret)
goto out_free;
}
ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
em_len, flags);
if (ret)
goto out_free;
}
out_free:
free_extent_map(em);

View file

@ -191,7 +191,7 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned long bits);
u64 max_bytes, unsigned long bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,

View file

@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
private = 0;
if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
(u64)-1, 1, EXTENT_DIRTY)) {
(u64)-1, 1, EXTENT_DIRTY, 0)) {
ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
start, &private_failure);
if (ret == 0) {
@ -5280,6 +5280,128 @@ out:
return em;
}
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
size_t pg_offset, u64 start, u64 len,
int create)
{
struct extent_map *em;
struct extent_map *hole_em = NULL;
u64 range_start = start;
u64 end;
u64 found;
u64 found_end;
int err = 0;
em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
if (IS_ERR(em))
return em;
if (em) {
/*
* if our em maps to a hole, there might
* actually be delalloc bytes behind it
*/
if (em->block_start != EXTENT_MAP_HOLE)
return em;
else
hole_em = em;
}
/* check to see if we've wrapped (len == -1 or similar) */
end = start + len;
if (end < start)
end = (u64)-1;
else
end -= 1;
em = NULL;
/* ok, we didn't find anything, lets look for delalloc */
found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
end, len, EXTENT_DELALLOC, 1);
found_end = range_start + found;
if (found_end < range_start)
found_end = (u64)-1;
/*
* we didn't find anything useful, return
* the original results from get_extent()
*/
if (range_start > end || found_end <= start) {
em = hole_em;
hole_em = NULL;
goto out;
}
/* adjust the range_start to make sure it doesn't
* go backwards from the start they passed in
*/
range_start = max(start,range_start);
found = found_end - range_start;
if (found > 0) {
u64 hole_start = start;
u64 hole_len = len;
em = alloc_extent_map(GFP_NOFS);
if (!em) {
err = -ENOMEM;
goto out;
}
/*
* when btrfs_get_extent can't find anything it
* returns one huge hole
*
* make sure what it found really fits our range, and
* adjust to make sure it is based on the start from
* the caller
*/
if (hole_em) {
u64 calc_end = extent_map_end(hole_em);
if (calc_end <= start || (hole_em->start > end)) {
free_extent_map(hole_em);
hole_em = NULL;
} else {
hole_start = max(hole_em->start, start);
hole_len = calc_end - hole_start;
}
}
em->bdev = NULL;
if (hole_em && range_start > hole_start) {
/* our hole starts before our delalloc, so we
* have to return just the parts of the hole
* that go until the delalloc starts
*/
em->len = min(hole_len,
range_start - hole_start);
em->start = hole_start;
em->orig_start = hole_start;
/*
* don't adjust block start at all,
* it is fixed at EXTENT_MAP_HOLE
*/
em->block_start = hole_em->block_start;
em->block_len = hole_len;
} else {
em->start = range_start;
em->len = found;
em->orig_start = range_start;
em->block_start = EXTENT_MAP_DELALLOC;
em->block_len = found;
}
} else if (hole_em) {
return hole_em;
}
out:
free_extent_map(hole_em);
if (err) {
free_extent_map(em);
return ERR_PTR(err);
}
return em;
}
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
@ -6102,7 +6224,7 @@ out:
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
int btrfs_readpage(struct file *file, struct page *page)

View file

@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
return -EINVAL;
if (flags & ~BTRFS_SUBVOL_RDONLY)
return -EOPNOTSUPP;
if (!is_owner_or_cap(inode))
return -EACCES;
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_reset;
}
ret = btrfs_update_root(trans, root,
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
btrfs_commit_transaction(trans, root);

View file

@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
unsigned long tot_out;
unsigned long tot_len;
char *buf;
bool may_late_unmap, need_unmap;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
tot_in += in_len;
working_bytes = in_len;
may_late_unmap = need_unmap = false;
/* fast path: avoid using the working buffer */
if (in_page_bytes_left >= in_len) {
buf = data_in + in_offset;
bytes = in_len;
may_late_unmap = true;
goto cont;
}
@ -329,14 +332,17 @@ cont:
if (working_bytes == 0 && tot_in >= tot_len)
break;
kunmap(pages_in[page_in_index]);
page_in_index++;
if (page_in_index >= total_pages_in) {
if (page_in_index + 1 >= total_pages_in) {
ret = -1;
data_in = NULL;
goto done;
}
data_in = kmap(pages_in[page_in_index]);
if (may_late_unmap)
need_unmap = true;
else
kunmap(pages_in[page_in_index]);
data_in = kmap(pages_in[++page_in_index]);
in_page_bytes_left = PAGE_CACHE_SIZE;
in_offset = 0;
@ -346,6 +352,8 @@ cont:
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
if (need_unmap)
kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "btrfs decompress failed\n");
ret = -1;
@ -363,8 +371,7 @@ cont:
break;
}
done:
if (data_in)
kunmap(pages_in[page_in_index]);
kunmap(pages_in[page_in_index]);
return ret;
}

View file

@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
u32 item_size;
int ret;
int err = 0;
int progress = 0;
path = btrfs_alloc_path();
if (!path)
@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
while (1) {
progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
BUG_ON(IS_ERR(trans));
restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
continue;
@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
}
}
if (trans && progress && err == -ENOSPC) {
ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
rc->block_group->flags);
if (ret == 0) {
err = 0;
progress = 0;
goto restart;
}
}
btrfs_release_path(rc->extent_root, path);
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,

View file

@ -155,7 +155,8 @@ enum {
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
Opt_enospc_debug, Opt_err,
};
static match_table_t tokens = {
@ -184,6 +185,7 @@ static match_table_t tokens = {
{Opt_space_cache, "space_cache"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
{Opt_enospc_debug, "enospc_debug"},
{Opt_err, NULL},
};
@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_user_subvol_rm_allowed:
btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
break;
case Opt_enospc_debug:
btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);

View file

@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = btrfs_shrink_device(device, 0);
if (ret)
goto error_brelse;
goto error_undo;
ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
if (ret)
goto error_brelse;
goto error_undo;
device->in_fs_metadata = 0;
@ -1416,6 +1416,13 @@ out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
return ret;
error_undo:
if (device->writeable) {
list_add(&device->dev_alloc_list,
&root->fs_info->fs_devices->alloc_list);
root->fs_info->fs_devices->rw_devices++;
}
goto error_brelse;
}
/*
@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
device->mode = 0;
device->mode = FMODE_EXCL;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {