hugetlb: handle memory hot-plug events
Register per node hstate attributes only for nodes with memory. As suggested by David Rientjes. With Memory Hotplug, memory can be added to a memoryless node and a node with memory can become memoryless. Therefore, add a memory on/off-line notifier callback to [un]register a node's attributes on transition to/from memoryless state. N.B., Only tested build, boot, libhugetlbfs regression. i.e., no memory hotplug testing. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Reviewed-by: Andi Kleen <andi@firstfloor.org> Acked-by: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
8fe23e0571
commit
4faf8d950e
2 changed files with 50 additions and 6 deletions
|
@ -231,7 +231,8 @@ resulting effect on persistent huge page allocation is as follows:
|
||||||
Per Node Hugepages Attributes
|
Per Node Hugepages Attributes
|
||||||
|
|
||||||
A subset of the contents of the root huge page control directory in sysfs,
|
A subset of the contents of the root huge page control directory in sysfs,
|
||||||
described above, has been replicated under each "node" system device in:
|
described above, will be replicated under each the system device of each
|
||||||
|
NUMA node with memory in:
|
||||||
|
|
||||||
/sys/devices/system/node/node[0-9]*/hugepages/
|
/sys/devices/system/node/node[0-9]*/hugepages/
|
||||||
|
|
||||||
|
|
|
@ -177,8 +177,8 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
|
||||||
/*
|
/*
|
||||||
* hugetlbfs per node attributes registration interface:
|
* hugetlbfs per node attributes registration interface:
|
||||||
* When/if hugetlb[fs] subsystem initializes [sometime after this module],
|
* When/if hugetlb[fs] subsystem initializes [sometime after this module],
|
||||||
* it will register its per node attributes for all nodes online at that
|
* it will register its per node attributes for all online nodes with
|
||||||
* time. It will also call register_hugetlbfs_with_node(), below, to
|
* memory. It will also call register_hugetlbfs_with_node(), below, to
|
||||||
* register its attribute registration functions with this node driver.
|
* register its attribute registration functions with this node driver.
|
||||||
* Once these hooks have been initialized, the node driver will call into
|
* Once these hooks have been initialized, the node driver will call into
|
||||||
* the hugetlb module to [un]register attributes for hot-plugged nodes.
|
* the hugetlb module to [un]register attributes for hot-plugged nodes.
|
||||||
|
@ -188,7 +188,8 @@ static node_registration_func_t __hugetlb_unregister_node;
|
||||||
|
|
||||||
static inline void hugetlb_register_node(struct node *node)
|
static inline void hugetlb_register_node(struct node *node)
|
||||||
{
|
{
|
||||||
if (__hugetlb_register_node)
|
if (__hugetlb_register_node &&
|
||||||
|
node_state(node->sysdev.id, N_HIGH_MEMORY))
|
||||||
__hugetlb_register_node(node);
|
__hugetlb_register_node(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,6 +234,7 @@ int register_node(struct node *node, int num, struct node *parent)
|
||||||
sysdev_create_file(&node->sysdev, &attr_distance);
|
sysdev_create_file(&node->sysdev, &attr_distance);
|
||||||
|
|
||||||
scan_unevictable_register_node(node);
|
scan_unevictable_register_node(node);
|
||||||
|
|
||||||
hugetlb_register_node(node);
|
hugetlb_register_node(node);
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
|
@ -254,7 +256,7 @@ void unregister_node(struct node *node)
|
||||||
sysdev_remove_file(&node->sysdev, &attr_distance);
|
sysdev_remove_file(&node->sysdev, &attr_distance);
|
||||||
|
|
||||||
scan_unevictable_unregister_node(node);
|
scan_unevictable_unregister_node(node);
|
||||||
hugetlb_unregister_node(node);
|
hugetlb_unregister_node(node); /* no-op, if memoryless node */
|
||||||
|
|
||||||
sysdev_unregister(&node->sysdev);
|
sysdev_unregister(&node->sysdev);
|
||||||
}
|
}
|
||||||
|
@ -384,8 +386,45 @@ static int link_mem_sections(int nid)
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle per node hstate attribute [un]registration on transistions
|
||||||
|
* to/from memoryless state.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int node_memory_callback(struct notifier_block *self,
|
||||||
|
unsigned long action, void *arg)
|
||||||
|
{
|
||||||
|
struct memory_notify *mnb = arg;
|
||||||
|
int nid = mnb->status_change_nid;
|
||||||
|
|
||||||
|
switch (action) {
|
||||||
|
case MEM_ONLINE: /* memory successfully brought online */
|
||||||
|
if (nid != NUMA_NO_NODE)
|
||||||
|
hugetlb_register_node(&node_devices[nid]);
|
||||||
|
break;
|
||||||
|
case MEM_OFFLINE: /* or offline */
|
||||||
|
if (nid != NUMA_NO_NODE)
|
||||||
|
hugetlb_unregister_node(&node_devices[nid]);
|
||||||
|
break;
|
||||||
|
case MEM_GOING_ONLINE:
|
||||||
|
case MEM_GOING_OFFLINE:
|
||||||
|
case MEM_CANCEL_ONLINE:
|
||||||
|
case MEM_CANCEL_OFFLINE:
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NOTIFY_OK;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static int link_mem_sections(int nid) { return 0; }
|
static int link_mem_sections(int nid) { return 0; }
|
||||||
|
|
||||||
|
static inline int node_memory_callback(struct notifier_block *self,
|
||||||
|
unsigned long action, void *arg)
|
||||||
|
{
|
||||||
|
return NOTIFY_OK;
|
||||||
|
}
|
||||||
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
|
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
|
||||||
|
|
||||||
int register_one_node(int nid)
|
int register_one_node(int nid)
|
||||||
|
@ -499,13 +538,17 @@ static int node_states_init(void)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
|
||||||
static int __init register_node_type(void)
|
static int __init register_node_type(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = sysdev_class_register(&node_class);
|
ret = sysdev_class_register(&node_class);
|
||||||
if (!ret)
|
if (!ret) {
|
||||||
ret = node_states_init();
|
ret = node_states_init();
|
||||||
|
hotplug_memory_notifier(node_memory_callback,
|
||||||
|
NODE_CALLBACK_PRI);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: we're not going to unregister the node class if we fail
|
* Note: we're not going to unregister the node class if we fail
|
||||||
|
|
Loading…
Reference in a new issue