[PATCH] sparsemem base: simple NUMA remap space allocator
Introduce a simple allocator for the NUMA remap space. This space is very scarce, used for structures which are best allocated node local. This mechanism is also used on non-NUMA ia64 systems with a vmem_map to keep the pgdat->node_mem_map initialized in a consistent place for all architectures. Issues: o alloc_remap takes a node_id where we might expect a pgdat which was intended to allow us to allocate the pgdat's using this mechanism; which we do not yet do. Could have alloc_remap_node() and alloc_remap_nid() for this purpose. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
c2ebaa425e
commit
6f167ec721
4 changed files with 50 additions and 29 deletions
|
@ -803,6 +803,11 @@ config NEED_NODE_MEMMAP_SIZE
|
|||
depends on DISCONTIGMEM
|
||||
default y
|
||||
|
||||
config HAVE_ARCH_ALLOC_REMAP
|
||||
bool
|
||||
depends on NUMA
|
||||
default y
|
||||
|
||||
config HIGHPTE
|
||||
bool "Allocate 3rd-level pagetables from highmem"
|
||||
depends on HIGHMEM4G || HIGHMEM64G
|
||||
|
|
|
@ -108,6 +108,9 @@ unsigned long node_remap_offset[MAX_NUMNODES];
|
|||
void *node_remap_start_vaddr[MAX_NUMNODES];
|
||||
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
|
||||
|
||||
void *node_remap_end_vaddr[MAX_NUMNODES];
|
||||
void *node_remap_alloc_vaddr[MAX_NUMNODES];
|
||||
|
||||
/*
|
||||
* FLAT - support for basic PC memory model with discontig enabled, essentially
|
||||
* a single node with all available processors in it with a flat
|
||||
|
@ -178,6 +181,21 @@ static void __init allocate_pgdat(int nid)
|
|||
}
|
||||
}
|
||||
|
||||
void *alloc_remap(int nid, unsigned long size)
|
||||
{
|
||||
void *allocation = node_remap_alloc_vaddr[nid];
|
||||
|
||||
size = ALIGN(size, L1_CACHE_BYTES);
|
||||
|
||||
if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
|
||||
return 0;
|
||||
|
||||
node_remap_alloc_vaddr[nid] += size;
|
||||
memset(allocation, 0, size);
|
||||
|
||||
return allocation;
|
||||
}
|
||||
|
||||
void __init remap_numa_kva(void)
|
||||
{
|
||||
void *vaddr;
|
||||
|
@ -185,8 +203,6 @@ void __init remap_numa_kva(void)
|
|||
int node;
|
||||
|
||||
for_each_online_node(node) {
|
||||
if (node == 0)
|
||||
continue;
|
||||
for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
|
||||
vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
|
||||
set_pmd_pfn((ulong) vaddr,
|
||||
|
@ -202,11 +218,6 @@ static unsigned long calculate_numa_remap_pages(void)
|
|||
unsigned long size, reserve_pages = 0;
|
||||
|
||||
for_each_online_node(nid) {
|
||||
if (nid == 0)
|
||||
continue;
|
||||
if (!node_remap_size[nid])
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The acpi/srat node info can show hot-add memroy zones
|
||||
* where memory could be added but not currently present.
|
||||
|
@ -226,8 +237,8 @@ static unsigned long calculate_numa_remap_pages(void)
|
|||
printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
|
||||
size, nid);
|
||||
node_remap_size[nid] = size;
|
||||
reserve_pages += size;
|
||||
node_remap_offset[nid] = reserve_pages;
|
||||
reserve_pages += size;
|
||||
printk("Shrinking node %d from %ld pages to %ld pages\n",
|
||||
nid, node_end_pfn[nid], node_end_pfn[nid] - size);
|
||||
node_end_pfn[nid] -= size;
|
||||
|
@ -280,12 +291,18 @@ unsigned long __init setup_memory(void)
|
|||
(ulong) pfn_to_kaddr(max_low_pfn));
|
||||
for_each_online_node(nid) {
|
||||
node_remap_start_vaddr[nid] = pfn_to_kaddr(
|
||||
(highstart_pfn + reserve_pages) - node_remap_offset[nid]);
|
||||
highstart_pfn + node_remap_offset[nid]);
|
||||
/* Init the node remap allocator */
|
||||
node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
|
||||
(node_remap_size[nid] * PAGE_SIZE);
|
||||
node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
|
||||
ALIGN(sizeof(pg_data_t), PAGE_SIZE);
|
||||
|
||||
allocate_pgdat(nid);
|
||||
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
|
||||
(ulong) node_remap_start_vaddr[nid],
|
||||
(ulong) pfn_to_kaddr(highstart_pfn + reserve_pages
|
||||
- node_remap_offset[nid] + node_remap_size[nid]));
|
||||
(ulong) pfn_to_kaddr(highstart_pfn
|
||||
+ node_remap_offset[nid] + node_remap_size[nid]));
|
||||
}
|
||||
printk("High memory starts at vaddr %08lx\n",
|
||||
(ulong) pfn_to_kaddr(highstart_pfn));
|
||||
|
@ -348,23 +365,9 @@ void __init zone_sizes_init(void)
|
|||
}
|
||||
|
||||
zholes_size = get_zholes_size(nid);
|
||||
/*
|
||||
* We let the lmem_map for node 0 be allocated from the
|
||||
* normal bootmem allocator, but other nodes come from the
|
||||
* remapped KVA area - mbligh
|
||||
*/
|
||||
if (!nid)
|
||||
free_area_init_node(nid, NODE_DATA(nid),
|
||||
zones_size, start, zholes_size);
|
||||
else {
|
||||
unsigned long lmem_map;
|
||||
lmem_map = (unsigned long)node_remap_start_vaddr[nid];
|
||||
lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
|
||||
lmem_map &= PAGE_MASK;
|
||||
NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
|
||||
free_area_init_node(nid, NODE_DATA(nid), zones_size,
|
||||
start, zholes_size);
|
||||
}
|
||||
|
||||
free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
|
||||
zholes_size);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size,
|
|||
__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
|
||||
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
|
||||
extern void *alloc_remap(int nid, unsigned long size);
|
||||
#else
|
||||
static inline void *alloc_remap(int nid, unsigned long size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern unsigned long __initdata nr_kernel_pages;
|
||||
extern unsigned long __initdata nr_all_pages;
|
||||
|
||||
|
|
|
@ -1936,6 +1936,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
|
|||
static void __init alloc_node_mem_map(struct pglist_data *pgdat)
|
||||
{
|
||||
unsigned long size;
|
||||
struct page *map;
|
||||
|
||||
/* Skip empty nodes */
|
||||
if (!pgdat->node_spanned_pages)
|
||||
|
@ -1944,7 +1945,10 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
|
|||
/* ia64 gets its own node_mem_map, before this, without bootmem */
|
||||
if (!pgdat->node_mem_map) {
|
||||
size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
|
||||
pgdat->node_mem_map = alloc_bootmem_node(pgdat, size);
|
||||
map = alloc_remap(pgdat->node_id, size);
|
||||
if (!map)
|
||||
map = alloc_bootmem_node(pgdat, size);
|
||||
pgdat->node_mem_map = map;
|
||||
}
|
||||
#ifndef CONFIG_DISCONTIGMEM
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue