6e80d4f8c4
block_group removal is a little tricky. It can race with the extent allocator, the cleaner thread, and balancing. The current path is for a block_group to be added to the unused_bgs list. Then, when the cleaner thread comes around, it starts a transaction and then proceeds with removing the block_group. Extents that are pinned are subsequently removed from the pinned trees and then eventually a discard is issued for the entire block_group. Async discard introduces another player into the game, the discard workqueue. While it has none of the racing issues, the new problem is ensuring we don't leave free space untrimmed prior to forgetting the block_group. This is handled by placing fully free block_groups on a separate discard queue. This is necessary to maintain discarding order as in the future we will slowly trim even fully free block_groups. The ordering helps us make progress on the same block_group rather than say the last fully freed block_group or needing to search through the fully freed block groups at the beginning of a list and insert after. The new order of events is a fully freed block group gets placed on the unused discard queue first. Once it's processed, it will be placed on the unusued_bgs list and then the original sequence of events will happen, just without the final whole block_group discard. The mount flags can change when processing unused_bgs, so when flipping from DISCARD to DISCARD_ASYNC, the unused_bgs must be punted to the discard_list to be trimmed. If we flip off DISCARD_ASYNC, we punt free block groups on the discard_list to the unused_bg queue which will do the final discard for us. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Dennis Zhou <dennis@kernel.org> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
149 lines
5 KiB
C
149 lines
5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2009 Oracle. All rights reserved.
|
|
*/
|
|
|
|
#ifndef BTRFS_FREE_SPACE_CACHE_H
|
|
#define BTRFS_FREE_SPACE_CACHE_H
|
|
|
|
/*
|
|
* This is the trim state of an extent or bitmap.
|
|
*
|
|
* BTRFS_TRIM_STATE_TRIMMING is special and used to maintain the state of a
|
|
* bitmap as we may need several trims to fully trim a single bitmap entry.
|
|
* This is reset should any free space other than trimmed space be added to the
|
|
* bitmap.
|
|
*/
|
|
enum btrfs_trim_state {
|
|
BTRFS_TRIM_STATE_UNTRIMMED,
|
|
BTRFS_TRIM_STATE_TRIMMED,
|
|
BTRFS_TRIM_STATE_TRIMMING,
|
|
};
|
|
|
|
struct btrfs_free_space {
|
|
struct rb_node offset_index;
|
|
u64 offset;
|
|
u64 bytes;
|
|
u64 max_extent_size;
|
|
unsigned long *bitmap;
|
|
struct list_head list;
|
|
enum btrfs_trim_state trim_state;
|
|
};
|
|
|
|
static inline bool btrfs_free_space_trimmed(struct btrfs_free_space *info)
|
|
{
|
|
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMED);
|
|
}
|
|
|
|
static inline bool btrfs_free_space_trimming_bitmap(
|
|
struct btrfs_free_space *info)
|
|
{
|
|
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMING);
|
|
}
|
|
|
|
struct btrfs_free_space_ctl {
|
|
spinlock_t tree_lock;
|
|
struct rb_root free_space_offset;
|
|
u64 free_space;
|
|
int extents_thresh;
|
|
int free_extents;
|
|
int total_bitmaps;
|
|
int unit;
|
|
u64 start;
|
|
const struct btrfs_free_space_op *op;
|
|
void *private;
|
|
struct mutex cache_writeout_mutex;
|
|
struct list_head trimming_ranges;
|
|
};
|
|
|
|
struct btrfs_free_space_op {
|
|
void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
|
|
bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
|
|
struct btrfs_free_space *info);
|
|
};
|
|
|
|
struct btrfs_io_ctl {
|
|
void *cur, *orig;
|
|
struct page *page;
|
|
struct page **pages;
|
|
struct btrfs_fs_info *fs_info;
|
|
struct inode *inode;
|
|
unsigned long size;
|
|
int index;
|
|
int num_pages;
|
|
int entries;
|
|
int bitmaps;
|
|
unsigned check_crcs:1;
|
|
};
|
|
|
|
struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path);
|
|
int create_free_space_inode(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path);
|
|
|
|
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_block_rsv *rsv);
|
|
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct inode *inode);
|
|
int load_free_space_cache(struct btrfs_block_group *block_group);
|
|
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path);
|
|
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_path *path);
|
|
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
|
|
struct btrfs_path *path);
|
|
int create_free_ino_inode(struct btrfs_root *root,
|
|
struct btrfs_trans_handle *trans,
|
|
struct btrfs_path *path);
|
|
int load_free_ino_cache(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_root *root);
|
|
int btrfs_write_out_ino_cache(struct btrfs_root *root,
|
|
struct btrfs_trans_handle *trans,
|
|
struct btrfs_path *path,
|
|
struct inode *inode);
|
|
|
|
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
|
|
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
|
|
struct btrfs_free_space_ctl *ctl,
|
|
u64 bytenr, u64 size,
|
|
enum btrfs_trim_state trim_state);
|
|
int btrfs_add_free_space(struct btrfs_block_group *block_group,
|
|
u64 bytenr, u64 size);
|
|
int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
|
|
u64 bytenr, u64 size);
|
|
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
|
|
u64 bytenr, u64 size);
|
|
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
|
|
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
|
|
bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
|
|
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
|
u64 offset, u64 bytes, u64 empty_size,
|
|
u64 *max_extent_size);
|
|
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
|
|
void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
|
u64 bytes);
|
|
int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
|
|
struct btrfs_free_cluster *cluster,
|
|
u64 offset, u64 bytes, u64 empty_size);
|
|
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
|
|
u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
|
|
struct btrfs_free_cluster *cluster, u64 bytes,
|
|
u64 min_start, u64 *max_extent_size);
|
|
int btrfs_return_cluster_to_free_space(
|
|
struct btrfs_block_group *block_group,
|
|
struct btrfs_free_cluster *cluster);
|
|
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
|
|
u64 *trimmed, u64 start, u64 end, u64 minlen);
|
|
|
|
/* Support functions for running our sanity tests */
|
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
|
int test_add_free_space_entry(struct btrfs_block_group *cache,
|
|
u64 offset, u64 bytes, bool bitmap);
|
|
int test_check_exists(struct btrfs_block_group *cache, u64 offset, u64 bytes);
|
|
#endif
|
|
|
|
#endif
|