From: Dave Hansen Introduce a simple allocator for the NUMA remap space. This space is very scarce, used for structures which are best allocated node local. This mechanism is also used on non-NUMA ia64 systems with a vmem_map to keep the pgdat->node_mem_map initialized in a consistent place for all architectures. Issues: o alloc_remap takes a node_id where we might expect a pgdat which was intended to allow us to allocate the pgdat's using this mechanism; which we do not yet do. Could have alloc_remap_node() and alloc_remap_nid() for this purpose. Signed-off-by: Andy Whitcroft Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton --- arch/i386/Kconfig | 5 +++ arch/i386/mm/discontig.c | 59 ++++++++++++++++++++++++----------------------- include/linux/bootmem.h | 9 +++++++ mm/page_alloc.c | 6 +++- 4 files changed, 50 insertions(+), 29 deletions(-) diff -puN arch/i386/Kconfig~resubmit-sparsemem-base-simple-numa-remap-space-allocator arch/i386/Kconfig --- 25/arch/i386/Kconfig~resubmit-sparsemem-base-simple-numa-remap-space-allocator 2005-05-09 20:09:11.000000000 -0700 +++ 25-akpm/arch/i386/Kconfig 2005-05-09 20:09:11.000000000 -0700 @@ -803,6 +803,11 @@ config NEED_NODE_MEMMAP_SIZE depends on DISCONTIGMEM default y +config HAVE_ARCH_ALLOC_REMAP + bool + depends on NUMA + default y + config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" depends on HIGHMEM4G || HIGHMEM64G diff -puN arch/i386/mm/discontig.c~resubmit-sparsemem-base-simple-numa-remap-space-allocator arch/i386/mm/discontig.c --- 25/arch/i386/mm/discontig.c~resubmit-sparsemem-base-simple-numa-remap-space-allocator 2005-05-09 20:09:11.000000000 -0700 +++ 25-akpm/arch/i386/mm/discontig.c 2005-05-09 20:09:11.000000000 -0700 @@ -108,6 +108,9 @@ unsigned long node_remap_offset[MAX_NUMN void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); +void *node_remap_end_vaddr[MAX_NUMNODES]; +void *node_remap_alloc_vaddr[MAX_NUMNODES]; + /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -178,6 +181,21 @@ static void __init allocate_pgdat(int ni } } +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + void __init remap_numa_kva(void) { void *vaddr; @@ -185,8 +203,6 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { - if (node == 0) - continue; for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<node_mem_map = (struct page *)lmem_map; - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start, zholes_size); - } + + free_area_init_node(nid, NODE_DATA(nid), zones_size, start, + zholes_size); } return; } diff -puN include/linux/bootmem.h~resubmit-sparsemem-base-simple-numa-remap-space-allocator include/linux/bootmem.h --- 25/include/linux/bootmem.h~resubmit-sparsemem-base-simple-numa-remap-space-allocator 2005-05-09 20:09:11.000000000 -0700 +++ 25-akpm/include/linux/bootmem.h 2005-05-09 20:09:11.000000000 -0700 @@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_nod __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ +#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP +extern void *alloc_remap(int nid, unsigned long size); +#else +static inline void *alloc_remap(int nid, unsigned long size) +{ + return NULL; +} +#endif + extern unsigned long __initdata nr_kernel_pages; extern unsigned long __initdata nr_all_pages; diff -puN mm/page_alloc.c~resubmit-sparsemem-base-simple-numa-remap-space-allocator mm/page_alloc.c --- 25/mm/page_alloc.c~resubmit-sparsemem-base-simple-numa-remap-space-allocator 2005-05-09 20:09:11.000000000 -0700 +++ 25-akpm/mm/page_alloc.c 2005-05-09 20:09:11.000000000 -0700 @@ -1749,6 +1749,7 @@ static void __init free_area_init_core(s static void __init alloc_node_mem_map(struct pglist_data *pgdat) { unsigned long size; + struct page *map; /* Skip empty nodes */ if (!pgdat->node_spanned_pages) @@ -1757,7 +1758,10 @@ static void __init alloc_node_mem_map(st /* ia64 gets its own node_mem_map, before this, without bootmem */ if (!pgdat->node_mem_map) { size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); - pgdat->node_mem_map = alloc_bootmem_node(pgdat, size); + map = alloc_remap(pgdat->node_id, size); + if (!map) + map = alloc_bootmem_node(pgdat, size); + pgdat->node_mem_map = map; } #ifndef CONFIG_DISCONTIGMEM /* _