Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (28 commits) Ocfs2: Teach local-mounted ocfs2 to handle unwritten_extents correctly. ocfs2/dlm: Do not migrate resource to a node that is leaving the domain ocfs2/dlm: Add new dlm message DLM_BEGIN_EXIT_DOMAIN_MSG Ocfs2/move_extents: Set several trivial constraints for threshold. Ocfs2/move_extents: Let defrag handle partial extent moving. Ocfs2/move_extents: move/defrag extents within a certain range. Ocfs2/move_extents: helper to calculate the defraging length in one run. Ocfs2/move_extents: move entire/partial extent. Ocfs2/move_extents: helpers to update the group descriptor and global bitmap inode. Ocfs2/move_extents: helper to probe a proper region to move in an alloc group. Ocfs2/move_extents: helper to validate and adjust moving goal. Ocfs2/move_extents: find the victim alloc group, where the given #blk fits. Ocfs2/move_extents: defrag a range of extent. Ocfs2/move_extents: move a range of extent. Ocfs2/move_extents: lock allocators and reserve metadata blocks and data clusters for extents moving. Ocfs2/move_extents: Add basic framework and source files for extent moving. Ocfs2/move_extents: Adding new ioctl code 'OCFS2_IOC_MOVE_EXT' to ocfs2. Ocfs2/refcounttree: Publicize couple of funcs from refcounttree.c Ocfs2: Add a new code 'OCFS2_INFO_FREEFRAG' for o2info ioctl. Ocfs2: Add a new code 'OCFS2_INFO_FREEINODE' for o2info ioctl. ...
This commit is contained in:
commit
a74b81b0af
22 changed files with 2147 additions and 263 deletions
|
@ -1,11 +1,10 @@
|
|||
What: /sys/o2cb symlink
|
||||
Date: Dec 2005
|
||||
KernelVersion: 2.6.16
|
||||
Date: May 2011
|
||||
KernelVersion: 2.6.40
|
||||
Contact: ocfs2-devel@oss.oracle.com
|
||||
Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will
|
||||
be removed when new versions of ocfs2-tools which know to look
|
||||
Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink is
|
||||
removed when new versions of ocfs2-tools which know to look
|
||||
in /sys/fs/o2cb are sufficiently prevalent. Don't code new
|
||||
software to look here, it should try /sys/fs/o2cb instead.
|
||||
See Documentation/ABI/stable/o2cb for more information on usage.
|
||||
Users: ocfs2-tools. It's sufficient to mail proposed changes to
|
||||
ocfs2-devel@oss.oracle.com.
|
|
@ -262,16 +262,6 @@ Who: Michael Buesch <mb@bu3sch.de>
|
|||
|
||||
---------------------------
|
||||
|
||||
What: /sys/o2cb symlink
|
||||
When: January 2010
|
||||
Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb
|
||||
exists as a symlink for backwards compatibility for old versions of
|
||||
ocfs2-tools. 2 years should be sufficient time to phase in new versions
|
||||
which know to look in /sys/fs/o2cb.
|
||||
Who: ocfs2-devel@oss.oracle.com
|
||||
|
||||
---------------------------
|
||||
|
||||
What: Ability for non root users to shm_get hugetlb pages based on mlock
|
||||
resource limits
|
||||
When: 2.6.31
|
||||
|
|
|
@ -46,9 +46,15 @@ errors=panic Panic and halt the machine if an error occurs.
|
|||
intr (*) Allow signals to interrupt cluster operations.
|
||||
nointr Do not allow signals to interrupt cluster
|
||||
operations.
|
||||
noatime Do not update access time.
|
||||
relatime(*) Update atime if the previous atime is older than
|
||||
mtime or ctime
|
||||
strictatime Always update atime, but the minimum update interval
|
||||
is specified by atime_quantum.
|
||||
atime_quantum=60(*) OCFS2 will not update atime unless this number
|
||||
of seconds has passed since the last update.
|
||||
Set to zero to always update atime.
|
||||
Set to zero to always update atime. This option need
|
||||
work with strictatime.
|
||||
data=ordered (*) All data are forced directly out to the main file
|
||||
system prior to its metadata being committed to the
|
||||
journal.
|
||||
|
|
|
@ -30,6 +30,7 @@ ocfs2-objs := \
|
|||
namei.o \
|
||||
refcounttree.o \
|
||||
reservations.o \
|
||||
move_extents.o \
|
||||
resize.o \
|
||||
slot_map.o \
|
||||
suballoc.o \
|
||||
|
|
166
fs/ocfs2/alloc.c
166
fs/ocfs2/alloc.c
|
@ -29,6 +29,7 @@
|
|||
#include <linux/highmem.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
#include <cluster/masklog.h>
|
||||
|
||||
|
@ -7184,3 +7185,168 @@ out_commit:
|
|||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_trim_extent(struct super_block *sb,
|
||||
struct ocfs2_group_desc *gd,
|
||||
u32 start, u32 count)
|
||||
{
|
||||
u64 discard, bcount;
|
||||
|
||||
bcount = ocfs2_clusters_to_blocks(sb, count);
|
||||
discard = le64_to_cpu(gd->bg_blkno) +
|
||||
ocfs2_clusters_to_blocks(sb, start);
|
||||
|
||||
trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
|
||||
|
||||
return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
|
||||
}
|
||||
|
||||
static int ocfs2_trim_group(struct super_block *sb,
|
||||
struct ocfs2_group_desc *gd,
|
||||
u32 start, u32 max, u32 minbits)
|
||||
{
|
||||
int ret = 0, count = 0, next;
|
||||
void *bitmap = gd->bg_bitmap;
|
||||
|
||||
if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
|
||||
return 0;
|
||||
|
||||
trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
|
||||
start, max, minbits);
|
||||
|
||||
while (start < max) {
|
||||
start = ocfs2_find_next_zero_bit(bitmap, max, start);
|
||||
if (start >= max)
|
||||
break;
|
||||
next = ocfs2_find_next_bit(bitmap, max, start);
|
||||
|
||||
if ((next - start) >= minbits) {
|
||||
ret = ocfs2_trim_extent(sb, gd,
|
||||
start, next - start);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
break;
|
||||
}
|
||||
count += next - start;
|
||||
}
|
||||
start = next + 1;
|
||||
|
||||
if (fatal_signal_pending(current)) {
|
||||
count = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
count = ret;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||
{
|
||||
struct ocfs2_super *osb = OCFS2_SB(sb);
|
||||
u64 start, len, trimmed, first_group, last_group, group;
|
||||
int ret, cnt;
|
||||
u32 first_bit, last_bit, minlen;
|
||||
struct buffer_head *main_bm_bh = NULL;
|
||||
struct inode *main_bm_inode = NULL;
|
||||
struct buffer_head *gd_bh = NULL;
|
||||
struct ocfs2_dinode *main_bm;
|
||||
struct ocfs2_group_desc *gd = NULL;
|
||||
|
||||
start = range->start >> osb->s_clustersize_bits;
|
||||
len = range->len >> osb->s_clustersize_bits;
|
||||
minlen = range->minlen >> osb->s_clustersize_bits;
|
||||
trimmed = 0;
|
||||
|
||||
if (!len) {
|
||||
range->len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (minlen >= osb->bitmap_cpg)
|
||||
return -EINVAL;
|
||||
|
||||
main_bm_inode = ocfs2_get_system_file_inode(osb,
|
||||
GLOBAL_BITMAP_SYSTEM_INODE,
|
||||
OCFS2_INVALID_SLOT);
|
||||
if (!main_bm_inode) {
|
||||
ret = -EIO;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&main_bm_inode->i_mutex);
|
||||
|
||||
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out_mutex;
|
||||
}
|
||||
main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
|
||||
|
||||
if (start >= le32_to_cpu(main_bm->i_clusters)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (start + len > le32_to_cpu(main_bm->i_clusters))
|
||||
len = le32_to_cpu(main_bm->i_clusters) - start;
|
||||
|
||||
trace_ocfs2_trim_fs(start, len, minlen);
|
||||
|
||||
/* Determine first and last group to examine based on start and len */
|
||||
first_group = ocfs2_which_cluster_group(main_bm_inode, start);
|
||||
if (first_group == osb->first_cluster_group_blkno)
|
||||
first_bit = start;
|
||||
else
|
||||
first_bit = start - ocfs2_blocks_to_clusters(sb, first_group);
|
||||
last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
|
||||
last_bit = osb->bitmap_cpg;
|
||||
|
||||
for (group = first_group; group <= last_group;) {
|
||||
if (first_bit + len >= osb->bitmap_cpg)
|
||||
last_bit = osb->bitmap_cpg;
|
||||
else
|
||||
last_bit = first_bit + len;
|
||||
|
||||
ret = ocfs2_read_group_descriptor(main_bm_inode,
|
||||
main_bm, group,
|
||||
&gd_bh);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
break;
|
||||
}
|
||||
|
||||
gd = (struct ocfs2_group_desc *)gd_bh->b_data;
|
||||
cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen);
|
||||
brelse(gd_bh);
|
||||
gd_bh = NULL;
|
||||
if (cnt < 0) {
|
||||
ret = cnt;
|
||||
mlog_errno(ret);
|
||||
break;
|
||||
}
|
||||
|
||||
trimmed += cnt;
|
||||
len -= osb->bitmap_cpg - first_bit;
|
||||
first_bit = 0;
|
||||
if (group == osb->first_cluster_group_blkno)
|
||||
group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
|
||||
else
|
||||
group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
|
||||
}
|
||||
range->len = trimmed * sb->s_blocksize;
|
||||
out_unlock:
|
||||
ocfs2_inode_unlock(main_bm_inode, 0);
|
||||
brelse(main_bm_bh);
|
||||
out_mutex:
|
||||
mutex_unlock(&main_bm_inode->i_mutex);
|
||||
iput(main_bm_inode);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
|
|||
struct buffer_head **leaf_bh);
|
||||
int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
|
||||
|
||||
int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range);
|
||||
/*
|
||||
* Helper function to look at the # of clusters in an extent record.
|
||||
*/
|
||||
|
|
|
@ -57,7 +57,6 @@ static struct kset *o2cb_kset;
|
|||
void o2cb_sys_shutdown(void)
|
||||
{
|
||||
mlog_sys_shutdown();
|
||||
sysfs_remove_link(NULL, "o2cb");
|
||||
kset_unregister(o2cb_kset);
|
||||
}
|
||||
|
||||
|
@ -69,14 +68,6 @@ int o2cb_sys_init(void)
|
|||
if (!o2cb_kset)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Create this symlink for backwards compatibility with old
|
||||
* versions of ocfs2-tools which look for things in /sys/o2cb.
|
||||
*/
|
||||
ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb");
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
|
|
@ -144,6 +144,7 @@ struct dlm_ctxt
|
|||
wait_queue_head_t dlm_join_events;
|
||||
unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
struct dlm_recovery_ctxt reco;
|
||||
spinlock_t master_lock;
|
||||
|
@ -401,6 +402,18 @@ static inline int dlm_lvb_is_empty(char *lvb)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static inline char *dlm_list_in_text(enum dlm_lockres_list idx)
|
||||
{
|
||||
if (idx == DLM_GRANTED_LIST)
|
||||
return "granted";
|
||||
else if (idx == DLM_CONVERTING_LIST)
|
||||
return "converting";
|
||||
else if (idx == DLM_BLOCKED_LIST)
|
||||
return "blocked";
|
||||
else
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static inline struct list_head *
|
||||
dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
|
||||
{
|
||||
|
@ -448,6 +461,7 @@ enum {
|
|||
DLM_FINALIZE_RECO_MSG = 518,
|
||||
DLM_QUERY_REGION = 519,
|
||||
DLM_QUERY_NODEINFO = 520,
|
||||
DLM_BEGIN_EXIT_DOMAIN_MSG = 521,
|
||||
};
|
||||
|
||||
struct dlm_reco_node_data
|
||||
|
|
|
@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* Exit Domain Map: xx xx xx */
|
||||
out += snprintf(buf + out, len - out, "Exit Domain Map: ");
|
||||
out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* Live Map: xx xx xx */
|
||||
out += snprintf(buf + out, len - out, "Live Map: ");
|
||||
out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
|
||||
|
|
|
@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
|
|||
* New in version 1.1:
|
||||
* - Message DLM_QUERY_REGION added to support global heartbeat
|
||||
* - Message DLM_QUERY_NODEINFO added to allow online node removes
|
||||
* New in version 1.2:
|
||||
* - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain
|
||||
*/
|
||||
static const struct dlm_protocol_version dlm_protocol = {
|
||||
.pv_major = 1,
|
||||
.pv_minor = 1,
|
||||
.pv_minor = 2,
|
||||
};
|
||||
|
||||
#define DLM_DOMAIN_BACKOFF_MS 200
|
||||
|
@ -449,14 +451,18 @@ redo_bucket:
|
|||
dropped = dlm_empty_lockres(dlm, res);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
__dlm_lockres_calc_usage(dlm, res);
|
||||
iter = res->hash_node.next;
|
||||
if (dropped)
|
||||
__dlm_lockres_calc_usage(dlm, res);
|
||||
else
|
||||
iter = res->hash_node.next;
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
dlm_lockres_put(res);
|
||||
|
||||
if (dropped)
|
||||
if (dropped) {
|
||||
cond_resched_lock(&dlm->spinlock);
|
||||
goto redo_bucket;
|
||||
}
|
||||
}
|
||||
cond_resched_lock(&dlm->spinlock);
|
||||
num += n;
|
||||
|
@ -486,6 +492,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len,
|
||||
void *data, void **ret_data)
|
||||
{
|
||||
struct dlm_ctxt *dlm = data;
|
||||
unsigned int node;
|
||||
struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf;
|
||||
|
||||
if (!dlm_grab(dlm))
|
||||
return 0;
|
||||
|
||||
node = exit_msg->node_idx;
|
||||
mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node);
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
set_bit(node, dlm->exit_domain_map);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
|
||||
dlm_put(dlm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm)
|
||||
{
|
||||
/* Yikes, a double spinlock! I need domain_lock for the dlm
|
||||
|
@ -542,6 +570,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
|
||||
spin_lock(&dlm->spinlock);
|
||||
clear_bit(node, dlm->domain_map);
|
||||
clear_bit(node, dlm->exit_domain_map);
|
||||
__dlm_print_nodes(dlm);
|
||||
|
||||
/* notify anything attached to the heartbeat events */
|
||||
|
@ -554,29 +583,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
|
||||
static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type,
|
||||
unsigned int node)
|
||||
{
|
||||
int status;
|
||||
struct dlm_exit_domain leave_msg;
|
||||
|
||||
mlog(0, "Asking node %u if we can leave the domain %s me = %u\n",
|
||||
node, dlm->name, dlm->node_num);
|
||||
mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name,
|
||||
msg_type, node);
|
||||
|
||||
memset(&leave_msg, 0, sizeof(leave_msg));
|
||||
leave_msg.node_idx = dlm->node_num;
|
||||
|
||||
status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
|
||||
&leave_msg, sizeof(leave_msg), node,
|
||||
NULL);
|
||||
status = o2net_send_message(msg_type, dlm->key, &leave_msg,
|
||||
sizeof(leave_msg), node, NULL);
|
||||
if (status < 0)
|
||||
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
|
||||
"node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
|
||||
mlog(0, "status return %d from o2net_send_message\n", status);
|
||||
mlog(ML_ERROR, "Error %d sending domain exit message %u "
|
||||
"to node %u on domain %s\n", status, msg_type, node,
|
||||
dlm->name);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void dlm_begin_exit_domain(struct dlm_ctxt *dlm)
|
||||
{
|
||||
int node = -1;
|
||||
|
||||
/* Support for begin exit domain was added in 1.2 */
|
||||
if (dlm->dlm_locking_proto.pv_major == 1 &&
|
||||
dlm->dlm_locking_proto.pv_minor < 2)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely
|
||||
* informational. Meaning if a node does not receive the message,
|
||||
* so be it.
|
||||
*/
|
||||
spin_lock(&dlm->spinlock);
|
||||
while (1) {
|
||||
node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1);
|
||||
if (node >= O2NM_MAX_NODES)
|
||||
break;
|
||||
if (node == dlm->node_num)
|
||||
continue;
|
||||
|
||||
spin_unlock(&dlm->spinlock);
|
||||
dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node);
|
||||
spin_lock(&dlm->spinlock);
|
||||
}
|
||||
spin_unlock(&dlm->spinlock);
|
||||
}
|
||||
|
||||
static void dlm_leave_domain(struct dlm_ctxt *dlm)
|
||||
{
|
||||
|
@ -602,7 +658,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
|
|||
|
||||
clear_node = 1;
|
||||
|
||||
status = dlm_send_one_domain_exit(dlm, node);
|
||||
status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG,
|
||||
node);
|
||||
if (status < 0 &&
|
||||
status != -ENOPROTOOPT &&
|
||||
status != -ENOTCONN) {
|
||||
|
@ -677,6 +734,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
|
|||
|
||||
if (leave) {
|
||||
mlog(0, "shutting down domain %s\n", dlm->name);
|
||||
dlm_begin_exit_domain(dlm);
|
||||
|
||||
/* We changed dlm state, notify the thread */
|
||||
dlm_kick_thread(dlm, NULL);
|
||||
|
@ -909,6 +967,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
* leftover join state. */
|
||||
BUG_ON(dlm->joining_node != assert->node_idx);
|
||||
set_bit(assert->node_idx, dlm->domain_map);
|
||||
clear_bit(assert->node_idx, dlm->exit_domain_map);
|
||||
__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
|
||||
|
||||
printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
|
||||
|
@ -1793,6 +1852,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
|
|||
if (status)
|
||||
goto bail;
|
||||
|
||||
status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key,
|
||||
sizeof(struct dlm_exit_domain),
|
||||
dlm_begin_exit_domain_handler,
|
||||
dlm, NULL, &dlm->dlm_domain_handlers);
|
||||
if (status)
|
||||
goto bail;
|
||||
|
||||
bail:
|
||||
if (status)
|
||||
dlm_unregister_domain_handlers(dlm);
|
||||
|
|
|
@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
|
|||
dlm_lockres_put(res);
|
||||
}
|
||||
|
||||
/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0
|
||||
* if not. If 0, numlocks is set to the number of locks in the lockres.
|
||||
/*
|
||||
* A migrateable resource is one that is :
|
||||
* 1. locally mastered, and,
|
||||
* 2. zero local locks, and,
|
||||
* 3. one or more non-local locks, or, one or more references
|
||||
* Returns 1 if yes, 0 if not.
|
||||
*/
|
||||
static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
int *numlocks,
|
||||
int *hasrefs)
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
int ret;
|
||||
int i;
|
||||
int count = 0;
|
||||
enum dlm_lockres_list idx;
|
||||
int nonlocal = 0, node_ref;
|
||||
struct list_head *queue;
|
||||
struct dlm_lock *lock;
|
||||
u64 cookie;
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
*numlocks = 0;
|
||||
*hasrefs = 0;
|
||||
if (res->owner != dlm->node_num)
|
||||
return 0;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
|
||||
mlog(0, "cannot migrate lockres with unknown owner!\n");
|
||||
goto leave;
|
||||
}
|
||||
|
||||
if (res->owner != dlm->node_num) {
|
||||
mlog(0, "cannot migrate lockres this node doesn't own!\n");
|
||||
goto leave;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
queue = &res->granted;
|
||||
for (i = 0; i < 3; i++) {
|
||||
for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
|
||||
queue = dlm_list_idx_to_ptr(res, idx);
|
||||
list_for_each_entry(lock, queue, list) {
|
||||
++count;
|
||||
if (lock->ml.node == dlm->node_num) {
|
||||
mlog(0, "found a lock owned by this node still "
|
||||
"on the %s queue! will not migrate this "
|
||||
"lockres\n", (i == 0 ? "granted" :
|
||||
(i == 1 ? "converting" :
|
||||
"blocked")));
|
||||
ret = -ENOTEMPTY;
|
||||
goto leave;
|
||||
if (lock->ml.node != dlm->node_num) {
|
||||
nonlocal++;
|
||||
continue;
|
||||
}
|
||||
cookie = be64_to_cpu(lock->ml.cookie);
|
||||
mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on "
|
||||
"%s list\n", dlm->name, res->lockname.len,
|
||||
res->lockname.name,
|
||||
dlm_get_lock_cookie_node(cookie),
|
||||
dlm_get_lock_cookie_seq(cookie),
|
||||
dlm_list_in_text(idx));
|
||||
return 0;
|
||||
}
|
||||
queue++;
|
||||
}
|
||||
|
||||
*numlocks = count;
|
||||
if (!nonlocal) {
|
||||
node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
|
||||
if (node_ref >= O2NM_MAX_NODES)
|
||||
return 0;
|
||||
}
|
||||
|
||||
count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
|
||||
if (count < O2NM_MAX_NODES)
|
||||
*hasrefs = 1;
|
||||
mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len,
|
||||
res->lockname.name);
|
||||
|
||||
mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
|
||||
res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
|
||||
|
||||
leave:
|
||||
return ret;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2406,8 +2396,7 @@ leave:
|
|||
|
||||
|
||||
static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
u8 target)
|
||||
struct dlm_lock_resource *res, u8 target)
|
||||
{
|
||||
struct dlm_master_list_entry *mle = NULL;
|
||||
struct dlm_master_list_entry *oldmle = NULL;
|
||||
|
@ -2416,37 +2405,20 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
|
|||
const char *name;
|
||||
unsigned int namelen;
|
||||
int mle_added = 0;
|
||||
int numlocks, hasrefs;
|
||||
int wake = 0;
|
||||
|
||||
if (!dlm_grab(dlm))
|
||||
return -EINVAL;
|
||||
|
||||
BUG_ON(target == O2NM_MAX_NODES);
|
||||
|
||||
name = res->lockname.name;
|
||||
namelen = res->lockname.len;
|
||||
|
||||
mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
|
||||
|
||||
/*
|
||||
* ensure this lockres is a proper candidate for migration
|
||||
*/
|
||||
spin_lock(&res->spinlock);
|
||||
ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
|
||||
if (ret < 0) {
|
||||
spin_unlock(&res->spinlock);
|
||||
goto leave;
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
/* no work to do */
|
||||
if (numlocks == 0 && !hasrefs)
|
||||
goto leave;
|
||||
|
||||
/*
|
||||
* preallocate up front
|
||||
* if this fails, abort
|
||||
*/
|
||||
mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name,
|
||||
target);
|
||||
|
||||
/* preallocate up front. if this fails, abort */
|
||||
ret = -ENOMEM;
|
||||
mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
|
||||
if (!mres) {
|
||||
|
@ -2461,36 +2433,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
|
|||
}
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* find a node to migrate the lockres to
|
||||
*/
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
/* pick a new node */
|
||||
if (!test_bit(target, dlm->domain_map) ||
|
||||
target >= O2NM_MAX_NODES) {
|
||||
target = dlm_pick_migration_target(dlm, res);
|
||||
}
|
||||
mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
|
||||
namelen, name, target);
|
||||
|
||||
if (target >= O2NM_MAX_NODES ||
|
||||
!test_bit(target, dlm->domain_map)) {
|
||||
/* target chosen is not alive */
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
spin_unlock(&dlm->spinlock);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
mlog(0, "continuing with target = %u\n", target);
|
||||
|
||||
/*
|
||||
* clear any existing master requests and
|
||||
* add the migration mle to the list
|
||||
*/
|
||||
spin_lock(&dlm->spinlock);
|
||||
spin_lock(&dlm->master_lock);
|
||||
ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
|
||||
namelen, target, dlm->node_num);
|
||||
|
@ -2531,6 +2478,7 @@ fail:
|
|||
dlm_put_mle(mle);
|
||||
} else if (mle) {
|
||||
kmem_cache_free(dlm_mle_cache, mle);
|
||||
mle = NULL;
|
||||
}
|
||||
goto leave;
|
||||
}
|
||||
|
@ -2652,69 +2600,52 @@ leave:
|
|||
if (wake)
|
||||
wake_up(&res->wq);
|
||||
|
||||
/* TODO: cleanup */
|
||||
if (mres)
|
||||
free_page((unsigned long)mres);
|
||||
|
||||
dlm_put(dlm);
|
||||
|
||||
mlog(0, "returning %d\n", ret);
|
||||
mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen,
|
||||
name, target, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DLM_MIGRATION_RETRY_MS 100
|
||||
|
||||
/* Should be called only after beginning the domain leave process.
|
||||
/*
|
||||
* Should be called only after beginning the domain leave process.
|
||||
* There should not be any remaining locks on nonlocal lock resources,
|
||||
* and there should be no local locks left on locally mastered resources.
|
||||
*
|
||||
* Called with the dlm spinlock held, may drop it to do migration, but
|
||||
* will re-acquire before exit.
|
||||
*
|
||||
* Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */
|
||||
* Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped
|
||||
*/
|
||||
int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
|
||||
{
|
||||
int ret;
|
||||
int lock_dropped = 0;
|
||||
int numlocks, hasrefs;
|
||||
u8 target = O2NM_MAX_NODES;
|
||||
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
if (res->owner != dlm->node_num) {
|
||||
if (!__dlm_lockres_unused(res)) {
|
||||
mlog(ML_ERROR, "%s:%.*s: this node is not master, "
|
||||
"trying to free this but locks remain\n",
|
||||
dlm->name, res->lockname.len, res->lockname.name);
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/* No need to migrate a lockres having no locks */
|
||||
ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
|
||||
if (ret >= 0 && numlocks == 0 && !hasrefs) {
|
||||
spin_unlock(&res->spinlock);
|
||||
goto leave;
|
||||
}
|
||||
if (dlm_is_lockres_migrateable(dlm, res))
|
||||
target = dlm_pick_migration_target(dlm, res);
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
if (target == O2NM_MAX_NODES)
|
||||
goto leave;
|
||||
|
||||
/* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
|
||||
spin_unlock(&dlm->spinlock);
|
||||
lock_dropped = 1;
|
||||
while (1) {
|
||||
ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES);
|
||||
if (ret >= 0)
|
||||
break;
|
||||
if (ret == -ENOTEMPTY) {
|
||||
mlog(ML_ERROR, "lockres %.*s still has local locks!\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
BUG();
|
||||
}
|
||||
|
||||
mlog(0, "lockres %.*s: migrate failed, "
|
||||
"retrying\n", res->lockname.len,
|
||||
res->lockname.name);
|
||||
msleep(DLM_MIGRATION_RETRY_MS);
|
||||
}
|
||||
ret = dlm_migrate_lockres(dlm, res, target);
|
||||
if (ret)
|
||||
mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n",
|
||||
dlm->name, res->lockname.len, res->lockname.name,
|
||||
target, ret);
|
||||
spin_lock(&dlm->spinlock);
|
||||
leave:
|
||||
return lock_dropped;
|
||||
|
@ -2898,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
|
|||
}
|
||||
}
|
||||
|
||||
/* for now this is not too intelligent. we will
|
||||
* need stats to make this do the right thing.
|
||||
* this just finds the first lock on one of the
|
||||
* queues and uses that node as the target. */
|
||||
/*
|
||||
* Pick a node to migrate the lock resource to. This function selects a
|
||||
* potential target based first on the locks and then on refmap. It skips
|
||||
* nodes that are in the process of exiting the domain.
|
||||
*/
|
||||
static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
int i;
|
||||
enum dlm_lockres_list idx;
|
||||
struct list_head *queue = &res->granted;
|
||||
struct dlm_lock *lock;
|
||||
int nodenum;
|
||||
int noderef;
|
||||
u8 nodenum = O2NM_MAX_NODES;
|
||||
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
for (i=0; i<3; i++) {
|
||||
/* Go through all the locks */
|
||||
for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
|
||||
queue = dlm_list_idx_to_ptr(res, idx);
|
||||
list_for_each_entry(lock, queue, list) {
|
||||
/* up to the caller to make sure this node
|
||||
* is alive */
|
||||
if (lock->ml.node != dlm->node_num) {
|
||||
spin_unlock(&res->spinlock);
|
||||
return lock->ml.node;
|
||||
}
|
||||
if (lock->ml.node == dlm->node_num)
|
||||
continue;
|
||||
if (test_bit(lock->ml.node, dlm->exit_domain_map))
|
||||
continue;
|
||||
nodenum = lock->ml.node;
|
||||
goto bail;
|
||||
}
|
||||
queue++;
|
||||
}
|
||||
|
||||
nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
|
||||
if (nodenum < O2NM_MAX_NODES) {
|
||||
spin_unlock(&res->spinlock);
|
||||
return nodenum;
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
mlog(0, "have not found a suitable target yet! checking domain map\n");
|
||||
|
||||
/* ok now we're getting desperate. pick anyone alive. */
|
||||
nodenum = -1;
|
||||
/* Go thru the refmap */
|
||||
noderef = -1;
|
||||
while (1) {
|
||||
nodenum = find_next_bit(dlm->domain_map,
|
||||
O2NM_MAX_NODES, nodenum+1);
|
||||
mlog(0, "found %d in domain map\n", nodenum);
|
||||
if (nodenum >= O2NM_MAX_NODES)
|
||||
noderef = find_next_bit(res->refmap, O2NM_MAX_NODES,
|
||||
noderef + 1);
|
||||
if (noderef >= O2NM_MAX_NODES)
|
||||
break;
|
||||
if (nodenum != dlm->node_num) {
|
||||
mlog(0, "picking %d\n", nodenum);
|
||||
return nodenum;
|
||||
}
|
||||
if (noderef == dlm->node_num)
|
||||
continue;
|
||||
if (test_bit(noderef, dlm->exit_domain_map))
|
||||
continue;
|
||||
nodenum = noderef;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
mlog(0, "giving up. no master to migrate to\n");
|
||||
return DLM_LOCK_RES_OWNER_UNKNOWN;
|
||||
bail:
|
||||
return nodenum;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* this is called by the new master once all lockres
|
||||
* data has been received */
|
||||
static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
|
||||
|
|
|
@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
|
|||
|
||||
mlog(0, "node %u being removed from domain map!\n", idx);
|
||||
clear_bit(idx, dlm->domain_map);
|
||||
clear_bit(idx, dlm->exit_domain_map);
|
||||
/* wake up migration waiters if a node goes down.
|
||||
* perhaps later we can genericize this for other waiters. */
|
||||
wake_up(&dlm->migration_wq);
|
||||
|
|
|
@ -88,7 +88,7 @@ struct workqueue_struct *user_dlm_worker;
|
|||
* signifies a bast fired on the lock.
|
||||
*/
|
||||
#define DLMFS_CAPABILITIES "bast stackglue"
|
||||
extern int param_set_dlmfs_capabilities(const char *val,
|
||||
static int param_set_dlmfs_capabilities(const char *val,
|
||||
struct kernel_param *kp)
|
||||
{
|
||||
printk(KERN_ERR "%s: readonly parameter\n", kp->name);
|
||||
|
|
|
@ -2670,6 +2670,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
|
|||
.flock = ocfs2_flock,
|
||||
.splice_read = ocfs2_file_splice_read,
|
||||
.splice_write = ocfs2_file_splice_write,
|
||||
.fallocate = ocfs2_fallocate,
|
||||
};
|
||||
|
||||
const struct file_operations ocfs2_dops_no_plocks = {
|
||||
|
|
492
fs/ocfs2/ioctl.c
492
fs/ocfs2/ioctl.c
|
@ -22,6 +22,11 @@
|
|||
#include "ioctl.h"
|
||||
#include "resize.h"
|
||||
#include "refcounttree.h"
|
||||
#include "sysfile.h"
|
||||
#include "dir.h"
|
||||
#include "buffer_head_io.h"
|
||||
#include "suballoc.h"
|
||||
#include "move_extents.h"
|
||||
|
||||
#include <linux/ext2_fs.h>
|
||||
|
||||
|
@ -35,31 +40,27 @@
|
|||
* be -EFAULT. The error will be returned from the ioctl(2) call. It's
|
||||
* just a best-effort to tell userspace that this request caused the error.
|
||||
*/
|
||||
static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
|
||||
static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
|
||||
struct ocfs2_info_request __user *req)
|
||||
{
|
||||
kreq->ir_flags |= OCFS2_INFO_FL_ERROR;
|
||||
(void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags));
|
||||
}
|
||||
|
||||
#define o2info_set_request_error(a, b) \
|
||||
__o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
|
||||
|
||||
static inline void __o2info_set_request_filled(struct ocfs2_info_request *req)
|
||||
static inline void o2info_set_request_filled(struct ocfs2_info_request *req)
|
||||
{
|
||||
req->ir_flags |= OCFS2_INFO_FL_FILLED;
|
||||
}
|
||||
|
||||
#define o2info_set_request_filled(a) \
|
||||
__o2info_set_request_filled((struct ocfs2_info_request *)&(a))
|
||||
|
||||
static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req)
|
||||
static inline void o2info_clear_request_filled(struct ocfs2_info_request *req)
|
||||
{
|
||||
req->ir_flags &= ~OCFS2_INFO_FL_FILLED;
|
||||
}
|
||||
|
||||
#define o2info_clear_request_filled(a) \
|
||||
__o2info_clear_request_filled((struct ocfs2_info_request *)&(a))
|
||||
static inline int o2info_coherent(struct ocfs2_info_request *req)
|
||||
{
|
||||
return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT));
|
||||
}
|
||||
|
||||
static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
|
||||
{
|
||||
|
@ -153,7 +154,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
|
|||
|
||||
oib.ib_blocksize = inode->i_sb->s_blocksize;
|
||||
|
||||
o2info_set_request_filled(oib);
|
||||
o2info_set_request_filled(&oib.ib_req);
|
||||
|
||||
if (o2info_to_user(oib, req))
|
||||
goto bail;
|
||||
|
@ -161,7 +162,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oib, req);
|
||||
o2info_set_request_error(&oib.ib_req, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -178,7 +179,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
|
|||
|
||||
oic.ic_clustersize = osb->s_clustersize;
|
||||
|
||||
o2info_set_request_filled(oic);
|
||||
o2info_set_request_filled(&oic.ic_req);
|
||||
|
||||
if (o2info_to_user(oic, req))
|
||||
goto bail;
|
||||
|
@ -186,7 +187,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oic, req);
|
||||
o2info_set_request_error(&oic.ic_req, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -203,7 +204,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
|
|||
|
||||
oim.im_max_slots = osb->max_slots;
|
||||
|
||||
o2info_set_request_filled(oim);
|
||||
o2info_set_request_filled(&oim.im_req);
|
||||
|
||||
if (o2info_to_user(oim, req))
|
||||
goto bail;
|
||||
|
@ -211,7 +212,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oim, req);
|
||||
o2info_set_request_error(&oim.im_req, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -228,7 +229,7 @@ int ocfs2_info_handle_label(struct inode *inode,
|
|||
|
||||
memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
|
||||
|
||||
o2info_set_request_filled(oil);
|
||||
o2info_set_request_filled(&oil.il_req);
|
||||
|
||||
if (o2info_to_user(oil, req))
|
||||
goto bail;
|
||||
|
@ -236,7 +237,7 @@ int ocfs2_info_handle_label(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oil, req);
|
||||
o2info_set_request_error(&oil.il_req, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -253,7 +254,7 @@ int ocfs2_info_handle_uuid(struct inode *inode,
|
|||
|
||||
memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
|
||||
|
||||
o2info_set_request_filled(oiu);
|
||||
o2info_set_request_filled(&oiu.iu_req);
|
||||
|
||||
if (o2info_to_user(oiu, req))
|
||||
goto bail;
|
||||
|
@ -261,7 +262,7 @@ int ocfs2_info_handle_uuid(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oiu, req);
|
||||
o2info_set_request_error(&oiu.iu_req, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -280,7 +281,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
|
|||
oif.if_incompat_features = osb->s_feature_incompat;
|
||||
oif.if_ro_compat_features = osb->s_feature_ro_compat;
|
||||
|
||||
o2info_set_request_filled(oif);
|
||||
o2info_set_request_filled(&oif.if_req);
|
||||
|
||||
if (o2info_to_user(oif, req))
|
||||
goto bail;
|
||||
|
@ -288,7 +289,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oif, req);
|
||||
o2info_set_request_error(&oif.if_req, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -305,7 +306,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
|
|||
|
||||
oij.ij_journal_size = osb->journal->j_inode->i_size;
|
||||
|
||||
o2info_set_request_filled(oij);
|
||||
o2info_set_request_filled(&oij.ij_req);
|
||||
|
||||
if (o2info_to_user(oij, req))
|
||||
goto bail;
|
||||
|
@ -313,7 +314,408 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oij, req);
|
||||
o2info_set_request_error(&oij.ij_req, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
|
||||
struct inode *inode_alloc, u64 blkno,
|
||||
struct ocfs2_info_freeinode *fi, u32 slot)
|
||||
{
|
||||
int status = 0, unlock = 0;
|
||||
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_dinode *dinode_alloc = NULL;
|
||||
|
||||
if (inode_alloc)
|
||||
mutex_lock(&inode_alloc->i_mutex);
|
||||
|
||||
if (o2info_coherent(&fi->ifi_req)) {
|
||||
status = ocfs2_inode_lock(inode_alloc, &bh, 0);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
unlock = 1;
|
||||
} else {
|
||||
status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
dinode_alloc = (struct ocfs2_dinode *)bh->b_data;
|
||||
|
||||
fi->ifi_stat[slot].lfi_total =
|
||||
le32_to_cpu(dinode_alloc->id1.bitmap1.i_total);
|
||||
fi->ifi_stat[slot].lfi_free =
|
||||
le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) -
|
||||
le32_to_cpu(dinode_alloc->id1.bitmap1.i_used);
|
||||
|
||||
bail:
|
||||
if (unlock)
|
||||
ocfs2_inode_unlock(inode_alloc, 0);
|
||||
|
||||
if (inode_alloc)
|
||||
mutex_unlock(&inode_alloc->i_mutex);
|
||||
|
||||
brelse(bh);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_info_handle_freeinode(struct inode *inode,
|
||||
struct ocfs2_info_request __user *req)
|
||||
{
|
||||
u32 i;
|
||||
u64 blkno = -1;
|
||||
char namebuf[40];
|
||||
int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE;
|
||||
struct ocfs2_info_freeinode *oifi = NULL;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct inode *inode_alloc = NULL;
|
||||
|
||||
oifi = kzalloc(sizeof(struct ocfs2_info_freeinode), GFP_KERNEL);
|
||||
if (!oifi) {
|
||||
status = -ENOMEM;
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (o2info_from_user(*oifi, req))
|
||||
goto bail;
|
||||
|
||||
oifi->ifi_slotnum = osb->max_slots;
|
||||
|
||||
for (i = 0; i < oifi->ifi_slotnum; i++) {
|
||||
if (o2info_coherent(&oifi->ifi_req)) {
|
||||
inode_alloc = ocfs2_get_system_file_inode(osb, type, i);
|
||||
if (!inode_alloc) {
|
||||
mlog(ML_ERROR, "unable to get alloc inode in "
|
||||
"slot %u\n", i);
|
||||
status = -EIO;
|
||||
goto bail;
|
||||
}
|
||||
} else {
|
||||
ocfs2_sprintf_system_inode_name(namebuf,
|
||||
sizeof(namebuf),
|
||||
type, i);
|
||||
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
|
||||
namebuf,
|
||||
strlen(namebuf),
|
||||
&blkno);
|
||||
if (status < 0) {
|
||||
status = -ENOENT;
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i);
|
||||
if (status < 0)
|
||||
goto bail;
|
||||
|
||||
iput(inode_alloc);
|
||||
inode_alloc = NULL;
|
||||
}
|
||||
|
||||
o2info_set_request_filled(&oifi->ifi_req);
|
||||
|
||||
if (o2info_to_user(*oifi, req))
|
||||
goto bail;
|
||||
|
||||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(&oifi->ifi_req, req);
|
||||
|
||||
kfree(oifi);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist,
|
||||
unsigned int chunksize)
|
||||
{
|
||||
int index;
|
||||
|
||||
index = __ilog2_u32(chunksize);
|
||||
if (index >= OCFS2_INFO_MAX_HIST)
|
||||
index = OCFS2_INFO_MAX_HIST - 1;
|
||||
|
||||
hist->fc_chunks[index]++;
|
||||
hist->fc_clusters[index] += chunksize;
|
||||
}
|
||||
|
||||
static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats,
|
||||
unsigned int chunksize)
|
||||
{
|
||||
if (chunksize > stats->ffs_max)
|
||||
stats->ffs_max = chunksize;
|
||||
|
||||
if (chunksize < stats->ffs_min)
|
||||
stats->ffs_min = chunksize;
|
||||
|
||||
stats->ffs_avg += chunksize;
|
||||
stats->ffs_free_chunks_real++;
|
||||
}
|
||||
|
||||
void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg,
|
||||
unsigned int chunksize)
|
||||
{
|
||||
o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize);
|
||||
o2ffg_update_stats(&(ffg->iff_ffs), chunksize);
|
||||
}
|
||||
|
||||
int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb,
|
||||
struct inode *gb_inode,
|
||||
struct ocfs2_dinode *gb_dinode,
|
||||
struct ocfs2_chain_rec *rec,
|
||||
struct ocfs2_info_freefrag *ffg,
|
||||
u32 chunks_in_group)
|
||||
{
|
||||
int status = 0, used;
|
||||
u64 blkno;
|
||||
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_group_desc *bg = NULL;
|
||||
|
||||
unsigned int max_bits, num_clusters;
|
||||
unsigned int offset = 0, cluster, chunk;
|
||||
unsigned int chunk_free, last_chunksize = 0;
|
||||
|
||||
if (!le32_to_cpu(rec->c_free))
|
||||
goto bail;
|
||||
|
||||
do {
|
||||
if (!bg)
|
||||
blkno = le64_to_cpu(rec->c_blkno);
|
||||
else
|
||||
blkno = le64_to_cpu(bg->bg_next_group);
|
||||
|
||||
if (bh) {
|
||||
brelse(bh);
|
||||
bh = NULL;
|
||||
}
|
||||
|
||||
if (o2info_coherent(&ffg->iff_req))
|
||||
status = ocfs2_read_group_descriptor(gb_inode,
|
||||
gb_dinode,
|
||||
blkno, &bh);
|
||||
else
|
||||
status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
|
||||
|
||||
if (status < 0) {
|
||||
mlog(ML_ERROR, "Can't read the group descriptor # "
|
||||
"%llu from device.", (unsigned long long)blkno);
|
||||
status = -EIO;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
bg = (struct ocfs2_group_desc *)bh->b_data;
|
||||
|
||||
if (!le16_to_cpu(bg->bg_free_bits_count))
|
||||
continue;
|
||||
|
||||
max_bits = le16_to_cpu(bg->bg_bits);
|
||||
offset = 0;
|
||||
|
||||
for (chunk = 0; chunk < chunks_in_group; chunk++) {
|
||||
/*
|
||||
* last chunk may be not an entire one.
|
||||
*/
|
||||
if ((offset + ffg->iff_chunksize) > max_bits)
|
||||
num_clusters = max_bits - offset;
|
||||
else
|
||||
num_clusters = ffg->iff_chunksize;
|
||||
|
||||
chunk_free = 0;
|
||||
for (cluster = 0; cluster < num_clusters; cluster++) {
|
||||
used = ocfs2_test_bit(offset,
|
||||
(unsigned long *)bg->bg_bitmap);
|
||||
/*
|
||||
* - chunk_free counts free clusters in #N chunk.
|
||||
* - last_chunksize records the size(in) clusters
|
||||
* for the last real free chunk being counted.
|
||||
*/
|
||||
if (!used) {
|
||||
last_chunksize++;
|
||||
chunk_free++;
|
||||
}
|
||||
|
||||
if (used && last_chunksize) {
|
||||
ocfs2_info_update_ffg(ffg,
|
||||
last_chunksize);
|
||||
last_chunksize = 0;
|
||||
}
|
||||
|
||||
offset++;
|
||||
}
|
||||
|
||||
if (chunk_free == ffg->iff_chunksize)
|
||||
ffg->iff_ffs.ffs_free_chunks++;
|
||||
}
|
||||
|
||||
/*
|
||||
* need to update the info for last free chunk.
|
||||
*/
|
||||
if (last_chunksize)
|
||||
ocfs2_info_update_ffg(ffg, last_chunksize);
|
||||
|
||||
} while (le64_to_cpu(bg->bg_next_group));
|
||||
|
||||
bail:
|
||||
brelse(bh);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
|
||||
struct inode *gb_inode, u64 blkno,
|
||||
struct ocfs2_info_freefrag *ffg)
|
||||
{
|
||||
u32 chunks_in_group;
|
||||
int status = 0, unlock = 0, i;
|
||||
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_chain_list *cl = NULL;
|
||||
struct ocfs2_chain_rec *rec = NULL;
|
||||
struct ocfs2_dinode *gb_dinode = NULL;
|
||||
|
||||
if (gb_inode)
|
||||
mutex_lock(&gb_inode->i_mutex);
|
||||
|
||||
if (o2info_coherent(&ffg->iff_req)) {
|
||||
status = ocfs2_inode_lock(gb_inode, &bh, 0);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
unlock = 1;
|
||||
} else {
|
||||
status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
gb_dinode = (struct ocfs2_dinode *)bh->b_data;
|
||||
cl = &(gb_dinode->id2.i_chain);
|
||||
|
||||
/*
|
||||
* Chunksize(in) clusters from userspace should be
|
||||
* less than clusters in a group.
|
||||
*/
|
||||
if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) {
|
||||
status = -EINVAL;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats));
|
||||
|
||||
ffg->iff_ffs.ffs_min = ~0U;
|
||||
ffg->iff_ffs.ffs_clusters =
|
||||
le32_to_cpu(gb_dinode->id1.bitmap1.i_total);
|
||||
ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters -
|
||||
le32_to_cpu(gb_dinode->id1.bitmap1.i_used);
|
||||
|
||||
chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1;
|
||||
|
||||
for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {
|
||||
rec = &(cl->cl_recs[i]);
|
||||
status = ocfs2_info_freefrag_scan_chain(osb, gb_inode,
|
||||
gb_dinode,
|
||||
rec, ffg,
|
||||
chunks_in_group);
|
||||
if (status)
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (ffg->iff_ffs.ffs_free_chunks_real)
|
||||
ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg /
|
||||
ffg->iff_ffs.ffs_free_chunks_real);
|
||||
bail:
|
||||
if (unlock)
|
||||
ocfs2_inode_unlock(gb_inode, 0);
|
||||
|
||||
if (gb_inode)
|
||||
mutex_unlock(&gb_inode->i_mutex);
|
||||
|
||||
if (gb_inode)
|
||||
iput(gb_inode);
|
||||
|
||||
brelse(bh);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int ocfs2_info_handle_freefrag(struct inode *inode,
|
||||
struct ocfs2_info_request __user *req)
|
||||
{
|
||||
u64 blkno = -1;
|
||||
char namebuf[40];
|
||||
int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE;
|
||||
|
||||
struct ocfs2_info_freefrag *oiff;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct inode *gb_inode = NULL;
|
||||
|
||||
oiff = kzalloc(sizeof(struct ocfs2_info_freefrag), GFP_KERNEL);
|
||||
if (!oiff) {
|
||||
status = -ENOMEM;
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (o2info_from_user(*oiff, req))
|
||||
goto bail;
|
||||
/*
|
||||
* chunksize from userspace should be power of 2.
|
||||
*/
|
||||
if ((oiff->iff_chunksize & (oiff->iff_chunksize - 1)) ||
|
||||
(!oiff->iff_chunksize)) {
|
||||
status = -EINVAL;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (o2info_coherent(&oiff->iff_req)) {
|
||||
gb_inode = ocfs2_get_system_file_inode(osb, type,
|
||||
OCFS2_INVALID_SLOT);
|
||||
if (!gb_inode) {
|
||||
mlog(ML_ERROR, "unable to get global_bitmap inode\n");
|
||||
status = -EIO;
|
||||
goto bail;
|
||||
}
|
||||
} else {
|
||||
ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type,
|
||||
OCFS2_INVALID_SLOT);
|
||||
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
|
||||
namebuf,
|
||||
strlen(namebuf),
|
||||
&blkno);
|
||||
if (status < 0) {
|
||||
status = -ENOENT;
|
||||
goto bail;
|
||||
}
|
||||
}
|
||||
|
||||
status = ocfs2_info_freefrag_scan_bitmap(osb, gb_inode, blkno, oiff);
|
||||
if (status < 0)
|
||||
goto bail;
|
||||
|
||||
o2info_set_request_filled(&oiff->iff_req);
|
||||
|
||||
if (o2info_to_user(*oiff, req))
|
||||
goto bail;
|
||||
|
||||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(&oiff->iff_req, req);
|
||||
|
||||
kfree(oiff);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -327,7 +729,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
|
|||
if (o2info_from_user(oir, req))
|
||||
goto bail;
|
||||
|
||||
o2info_clear_request_filled(oir);
|
||||
o2info_clear_request_filled(&oir);
|
||||
|
||||
if (o2info_to_user(oir, req))
|
||||
goto bail;
|
||||
|
@ -335,7 +737,7 @@ int ocfs2_info_handle_unknown(struct inode *inode,
|
|||
status = 0;
|
||||
bail:
|
||||
if (status)
|
||||
o2info_set_request_error(oir, req);
|
||||
o2info_set_request_error(&oir, req);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -389,6 +791,14 @@ int ocfs2_info_handle_request(struct inode *inode,
|
|||
if (oir.ir_size == sizeof(struct ocfs2_info_journal_size))
|
||||
status = ocfs2_info_handle_journal_size(inode, req);
|
||||
break;
|
||||
case OCFS2_INFO_FREEINODE:
|
||||
if (oir.ir_size == sizeof(struct ocfs2_info_freeinode))
|
||||
status = ocfs2_info_handle_freeinode(inode, req);
|
||||
break;
|
||||
case OCFS2_INFO_FREEFRAG:
|
||||
if (oir.ir_size == sizeof(struct ocfs2_info_freefrag))
|
||||
status = ocfs2_info_handle_freefrag(inode, req);
|
||||
break;
|
||||
default:
|
||||
status = ocfs2_info_handle_unknown(inode, req);
|
||||
break;
|
||||
|
@ -542,6 +952,31 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|||
return -EFAULT;
|
||||
|
||||
return ocfs2_info_handle(inode, &info, 0);
|
||||
case FITRIM:
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct fstrim_range range;
|
||||
int ret = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&range, (struct fstrim_range *)arg,
|
||||
sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = ocfs2_trim_fs(sb, &range);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (copy_to_user((struct fstrim_range *)arg, &range,
|
||||
sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
case OCFS2_IOC_MOVE_EXT:
|
||||
return ocfs2_ioctl_move_extents(filp, (void __user *)arg);
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
@ -569,6 +1004,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
|||
case OCFS2_IOC_GROUP_EXTEND:
|
||||
case OCFS2_IOC_GROUP_ADD:
|
||||
case OCFS2_IOC_GROUP_ADD64:
|
||||
case FITRIM:
|
||||
break;
|
||||
case OCFS2_IOC_REFLINK:
|
||||
if (copy_from_user(&args, (struct reflink_arguments *)arg,
|
||||
|
@ -584,6 +1020,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
|||
return -EFAULT;
|
||||
|
||||
return ocfs2_info_handle(inode, &info, 1);
|
||||
case OCFS2_IOC_MOVE_EXT:
|
||||
break;
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
|
1153
fs/ocfs2/move_extents.c
Normal file
1153
fs/ocfs2/move_extents.c
Normal file
File diff suppressed because it is too large
Load diff
22
fs/ocfs2/move_extents.h
Normal file
22
fs/ocfs2/move_extents.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* move_extents.h
|
||||
*
|
||||
* Copyright (C) 2011 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License version 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#ifndef OCFS2_MOVE_EXTENTS_H
|
||||
#define OCFS2_MOVE_EXTENTS_H
|
||||
|
||||
int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp);
|
||||
|
||||
#endif /* OCFS2_MOVE_EXTENTS_H */
|
|
@ -142,6 +142,38 @@ struct ocfs2_info_journal_size {
|
|||
__u64 ij_journal_size;
|
||||
};
|
||||
|
||||
struct ocfs2_info_freeinode {
|
||||
struct ocfs2_info_request ifi_req;
|
||||
struct ocfs2_info_local_freeinode {
|
||||
__u64 lfi_total;
|
||||
__u64 lfi_free;
|
||||
} ifi_stat[OCFS2_MAX_SLOTS];
|
||||
__u32 ifi_slotnum; /* out */
|
||||
__u32 ifi_pad;
|
||||
};
|
||||
|
||||
#define OCFS2_INFO_MAX_HIST (32)
|
||||
|
||||
struct ocfs2_info_freefrag {
|
||||
struct ocfs2_info_request iff_req;
|
||||
struct ocfs2_info_freefrag_stats { /* (out) */
|
||||
struct ocfs2_info_free_chunk_list {
|
||||
__u32 fc_chunks[OCFS2_INFO_MAX_HIST];
|
||||
__u32 fc_clusters[OCFS2_INFO_MAX_HIST];
|
||||
} ffs_fc_hist;
|
||||
__u32 ffs_clusters;
|
||||
__u32 ffs_free_clusters;
|
||||
__u32 ffs_free_chunks;
|
||||
__u32 ffs_free_chunks_real;
|
||||
__u32 ffs_min; /* Minimum free chunksize in clusters */
|
||||
__u32 ffs_max;
|
||||
__u32 ffs_avg;
|
||||
__u32 ffs_pad;
|
||||
} iff_ffs;
|
||||
__u32 iff_chunksize; /* chunksize in clusters(in) */
|
||||
__u32 iff_pad;
|
||||
};
|
||||
|
||||
/* Codes for ocfs2_info_request */
|
||||
enum ocfs2_info_type {
|
||||
OCFS2_INFO_CLUSTERSIZE = 1,
|
||||
|
@ -151,6 +183,8 @@ enum ocfs2_info_type {
|
|||
OCFS2_INFO_UUID,
|
||||
OCFS2_INFO_FS_FEATURES,
|
||||
OCFS2_INFO_JOURNAL_SIZE,
|
||||
OCFS2_INFO_FREEINODE,
|
||||
OCFS2_INFO_FREEFRAG,
|
||||
OCFS2_INFO_NUM_TYPES
|
||||
};
|
||||
|
||||
|
@ -171,4 +205,38 @@ enum ocfs2_info_type {
|
|||
|
||||
#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info)
|
||||
|
||||
struct ocfs2_move_extents {
|
||||
/* All values are in bytes */
|
||||
/* in */
|
||||
__u64 me_start; /* Virtual start in the file to move */
|
||||
__u64 me_len; /* Length of the extents to be moved */
|
||||
__u64 me_goal; /* Physical offset of the goal,
|
||||
it's in block unit */
|
||||
__u64 me_threshold; /* Maximum distance from goal or threshold
|
||||
for auto defragmentation */
|
||||
__u64 me_flags; /* Flags for the operation:
|
||||
* - auto defragmentation.
|
||||
* - refcount,xattr cases.
|
||||
*/
|
||||
/* out */
|
||||
__u64 me_moved_len; /* Moved/defraged length */
|
||||
__u64 me_new_offset; /* Resulting physical location */
|
||||
__u32 me_reserved[2]; /* Reserved for futhure */
|
||||
};
|
||||
|
||||
#define OCFS2_MOVE_EXT_FL_AUTO_DEFRAG (0x00000001) /* Kernel manages to
|
||||
claim new clusters
|
||||
as the goal place
|
||||
for extents moving */
|
||||
#define OCFS2_MOVE_EXT_FL_PART_DEFRAG (0x00000002) /* Allow partial extent
|
||||
moving, is to make
|
||||
movement less likely
|
||||
to fail, may make fs
|
||||
even more fragmented */
|
||||
#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation
|
||||
completely gets done.
|
||||
*/
|
||||
|
||||
#define OCFS2_IOC_MOVE_EXT _IOW('o', 6, struct ocfs2_move_extents)
|
||||
|
||||
#endif /* OCFS2_IOCTL_H */
|
||||
|
|
|
@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc,
|
|||
__entry->blkno, __entry->bit)
|
||||
);
|
||||
|
||||
TRACE_EVENT(ocfs2_trim_extent,
|
||||
TP_PROTO(struct super_block *sb, unsigned long long blk,
|
||||
unsigned long long count),
|
||||
TP_ARGS(sb, blk, count),
|
||||
TP_STRUCT__entry(
|
||||
__field(int, dev_major)
|
||||
__field(int, dev_minor)
|
||||
__field(unsigned long long, blk)
|
||||
__field(__u64, count)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev_major = MAJOR(sb->s_dev);
|
||||
__entry->dev_minor = MINOR(sb->s_dev);
|
||||
__entry->blk = blk;
|
||||
__entry->count = count;
|
||||
),
|
||||
TP_printk("%d %d %llu %llu",
|
||||
__entry->dev_major, __entry->dev_minor,
|
||||
__entry->blk, __entry->count)
|
||||
);
|
||||
|
||||
DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group);
|
||||
|
||||
DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs);
|
||||
|
||||
/* End of trace events for fs/ocfs2/alloc.c. */
|
||||
|
||||
/* Trace events for fs/ocfs2/localalloc.c. */
|
||||
|
|
|
@ -66,7 +66,7 @@ struct ocfs2_cow_context {
|
|||
u32 *num_clusters,
|
||||
unsigned int *extent_flags);
|
||||
int (*cow_duplicate_clusters)(handle_t *handle,
|
||||
struct ocfs2_cow_context *context,
|
||||
struct file *file,
|
||||
u32 cpos, u32 old_cluster,
|
||||
u32 new_cluster, u32 new_len);
|
||||
};
|
||||
|
@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
||||
struct ocfs2_cow_context *context,
|
||||
u32 cpos, u32 old_cluster,
|
||||
u32 new_cluster, u32 new_len)
|
||||
int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
||||
struct file *file,
|
||||
u32 cpos, u32 old_cluster,
|
||||
u32 new_cluster, u32 new_len)
|
||||
{
|
||||
int ret = 0, partial;
|
||||
struct ocfs2_caching_info *ci = context->data_et.et_ci;
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
struct ocfs2_caching_info *ci = INODE_CACHE(inode);
|
||||
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
|
||||
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
|
||||
struct page *page;
|
||||
pgoff_t page_index;
|
||||
unsigned int from, to, readahead_pages;
|
||||
loff_t offset, end, map_end;
|
||||
struct address_space *mapping = context->inode->i_mapping;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
|
||||
trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
|
||||
new_cluster, new_len);
|
||||
|
@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
|||
* We only duplicate pages until we reach the page contains i_size - 1.
|
||||
* So trim 'end' to i_size.
|
||||
*/
|
||||
if (end > i_size_read(context->inode))
|
||||
end = i_size_read(context->inode);
|
||||
if (end > i_size_read(inode))
|
||||
end = i_size_read(inode);
|
||||
|
||||
while (offset < end) {
|
||||
page_index = offset >> PAGE_CACHE_SHIFT;
|
||||
|
@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
|||
if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
|
||||
BUG_ON(PageDirty(page));
|
||||
|
||||
if (PageReadahead(page) && context->file) {
|
||||
if (PageReadahead(page)) {
|
||||
page_cache_async_readahead(mapping,
|
||||
&context->file->f_ra,
|
||||
context->file,
|
||||
&file->f_ra, file,
|
||||
page, page_index,
|
||||
readahead_pages);
|
||||
}
|
||||
|
@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
|||
}
|
||||
}
|
||||
|
||||
ocfs2_map_and_dirty_page(context->inode,
|
||||
handle, from, to,
|
||||
ocfs2_map_and_dirty_page(inode, handle, from, to,
|
||||
page, 0, &new_block);
|
||||
mark_page_accessed(page);
|
||||
unlock:
|
||||
|
@ -3015,14 +3014,15 @@ unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
|
||||
struct ocfs2_cow_context *context,
|
||||
u32 cpos, u32 old_cluster,
|
||||
u32 new_cluster, u32 new_len)
|
||||
int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
|
||||
struct file *file,
|
||||
u32 cpos, u32 old_cluster,
|
||||
u32 new_cluster, u32 new_len)
|
||||
{
|
||||
int ret = 0;
|
||||
struct super_block *sb = context->inode->i_sb;
|
||||
struct ocfs2_caching_info *ci = context->data_et.et_ci;
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct ocfs2_caching_info *ci = INODE_CACHE(inode);
|
||||
int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
|
||||
u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
|
||||
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
|
||||
|
@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle,
|
|||
|
||||
/*If the old clusters is unwritten, no need to duplicate. */
|
||||
if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
|
||||
ret = context->cow_duplicate_clusters(handle, context, cpos,
|
||||
old, new, len);
|
||||
ret = context->cow_duplicate_clusters(handle, context->file,
|
||||
cpos, old, new, len);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
|
@ -3162,22 +3162,22 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_cow_sync_writeback(struct super_block *sb,
|
||||
struct ocfs2_cow_context *context,
|
||||
u32 cpos, u32 num_clusters)
|
||||
int ocfs2_cow_sync_writeback(struct super_block *sb,
|
||||
struct inode *inode,
|
||||
u32 cpos, u32 num_clusters)
|
||||
{
|
||||
int ret = 0;
|
||||
loff_t offset, end, map_end;
|
||||
pgoff_t page_index;
|
||||
struct page *page;
|
||||
|
||||
if (ocfs2_should_order_data(context->inode))
|
||||
if (ocfs2_should_order_data(inode))
|
||||
return 0;
|
||||
|
||||
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
|
||||
end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
|
||||
|
||||
ret = filemap_fdatawrite_range(context->inode->i_mapping,
|
||||
ret = filemap_fdatawrite_range(inode->i_mapping,
|
||||
offset, end - 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
|
@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
|
|||
if (map_end > end)
|
||||
map_end = end;
|
||||
|
||||
page = find_or_create_page(context->inode->i_mapping,
|
||||
page = find_or_create_page(inode->i_mapping,
|
||||
page_index, GFP_NOFS);
|
||||
BUG_ON(!page);
|
||||
|
||||
|
@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
|
|||
* in write-back mode.
|
||||
*/
|
||||
if (context->get_clusters == ocfs2_di_get_clusters) {
|
||||
ret = ocfs2_cow_sync_writeback(sb, context, cpos,
|
||||
ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos,
|
||||
orig_num_clusters);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
|
|
@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
|
|||
struct buffer_head *ref_root_bh,
|
||||
u32 cpos, u32 write_len,
|
||||
struct ocfs2_post_refcount *post);
|
||||
int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
||||
struct file *file,
|
||||
u32 cpos, u32 old_cluster,
|
||||
u32 new_cluster, u32 new_len);
|
||||
int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
|
||||
struct file *file,
|
||||
u32 cpos, u32 old_cluster,
|
||||
u32 new_cluster, u32 new_len);
|
||||
int ocfs2_cow_sync_writeback(struct super_block *sb,
|
||||
struct inode *inode,
|
||||
u32 cpos, u32 num_clusters);
|
||||
int ocfs2_add_refcount_flag(struct inode *inode,
|
||||
struct ocfs2_extent_tree *data_et,
|
||||
struct ocfs2_caching_info *ref_ci,
|
||||
|
|
|
@ -1567,7 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
|
|||
if (osb->preferred_slot != OCFS2_INVALID_SLOT)
|
||||
seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
|
||||
|
||||
if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM)
|
||||
if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME))
|
||||
seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
|
||||
|
||||
if (osb->osb_commit_interval)
|
||||
|
|
Loading…
Reference in a new issue