From: Andrew Morton - Move ifdef out of init/main.c - Coding style fixups - Fit it into 80-col xterm - Return -ENOMEM on failure, not -ENOBUFS - Factor out common code into zone_batchsize(). (This fixes a bug: the "clamp to 2^n-1" logic was missing for NUMA). - Fix missing conversion in zoneinfo_show(). Cc: Christoph Lameter Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 ++ init/main.c | 2 - mm/page_alloc.c | 89 +++++++++++++++++++++++++++-------------------------- base/node.c | 0 linux/mmzone.h | 0 mempolicy.c | 0 6 files changed, 52 insertions(+), 44 deletions(-) diff -puN drivers/base/node.c~node-local-per-cpu-pages-tidy drivers/base/node.c diff -puN include/linux/mm.h~node-local-per-cpu-pages-tidy include/linux/mm.h --- 25/include/linux/mm.h~node-local-per-cpu-pages-tidy 2005-05-09 20:09:16.000000000 -0700 +++ 25-akpm/include/linux/mm.h 2005-05-09 20:09:16.000000000 -0700 @@ -783,7 +783,12 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); + +#ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); +#else +static inline void setup_per_cpu_pageset(void) {} +#endif /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); diff -puN include/linux/mmzone.h~node-local-per-cpu-pages-tidy include/linux/mmzone.h diff -puN init/main.c~node-local-per-cpu-pages-tidy init/main.c --- 25/init/main.c~node-local-per-cpu-pages-tidy 2005-05-09 20:09:16.000000000 -0700 +++ 25-akpm/init/main.c 2005-05-09 20:09:16.000000000 -0700 @@ -490,9 +490,7 @@ asmlinkage void __init start_kernel(void vfs_caches_init_early(); mem_init(); kmem_cache_init(); -#ifdef CONFIG_NUMA setup_per_cpu_pageset(); -#endif numa_policy_init(); if (late_time_init) late_time_init(); diff -puN mm/mempolicy.c~node-local-per-cpu-pages-tidy mm/mempolicy.c diff -puN mm/page_alloc.c~node-local-per-cpu-pages-tidy mm/page_alloc.c --- 25/mm/page_alloc.c~node-local-per-cpu-pages-tidy 2005-05-09 20:09:16.000000000 -0700 +++ 25-akpm/mm/page_alloc.c 2005-05-09 20:09:16.000000000 -0700 @@ -1728,12 +1728,44 @@ void zonetable_add(struct zone *zone, in memmap_init_zone((size), (nid), (zone), (start_pfn)) #endif +static int __devinit zone_batchsize(struct zone *zone) +{ + int batch; + + /* + * The per-cpu-pages pools are set to around 1000th of the + * size of the zone. But no more than 1/4 of a meg - there's + * no point in going beyond the size of L2 cache. + * + * OK, so we don't know how big the cache is. So guess. + */ + batch = zone->present_pages / 1024; + if (batch * PAGE_SIZE > 256 * 1024) + batch = (256 * 1024) / PAGE_SIZE; + batch /= 4; /* We effectively *= 4 below */ + if (batch < 1) + batch = 1; + + /* + * Clamp the batch to a 2^n - 1 value. Having a power + * of 2 value was found to be more likely to have + * suboptimal cache aliasing properties in some cases. + * + * For example if 2 tasks are alternately allocating + * batches of pages, one task can end up with a lot + * of pages of one half of the possible page colors + * and the other with pages of the other colors. + */ + batch = (1 << fls(batch + batch/2)) - 1; + return batch; +} + #ifdef CONFIG_NUMA /* * Dynamicaly allocate memory for the * per cpu pageset array in struct zone. */ -static inline int __devinit process_zones(int cpu) +static int __devinit process_zones(int cpu) { struct zone *zone, *dzone; int i; @@ -1743,29 +1775,26 @@ static inline int __devinit process_zone npageset = kmalloc_node(sizeof(struct per_cpu_pageset), GFP_KERNEL, cpu_to_node(cpu)); - if(!npageset) { + if (!npageset) { zone->pageset[cpu] = NULL; goto bad; } - if(zone->pageset[cpu]) { - memcpy(npageset, zone->pageset[cpu], sizeof(struct per_cpu_pageset)); + if (zone->pageset[cpu]) { + memcpy(npageset, zone->pageset[cpu], + sizeof(struct per_cpu_pageset)); /* Relocate lists */ - for(i = 0; i<2; i++) { + for (i = 0; i < 2; i++) { INIT_LIST_HEAD(&npageset->pcp[i].list); - list_splice(&zone->pageset[cpu]->pcp[i].list, &npageset->pcp[i].list); + list_splice(&zone->pageset[cpu]->pcp[i].list, + &npageset->pcp[i].list); } } else { struct per_cpu_pages *pcp; unsigned long batch; - batch = zone->present_pages / 1024; - if (batch * PAGE_SIZE > 256 * 1024) - batch = (256 * 1024) / PAGE_SIZE; - batch /= 4; /* We effectively *= 4 below */ - if (batch < 1) - batch = 1; + batch = zone_batchsize(zone); pcp = &npageset->pcp[0]; /* hot */ pcp->count = 0; @@ -1787,12 +1816,12 @@ static inline int __devinit process_zone return 0; bad: for_each_zone(dzone) { - if(dzone == zone) + if (dzone == zone) break; kfree(dzone->pageset[cpu]); dzone->pageset[cpu] = NULL; } - return -ENOBUFS; + return -ENOMEM; } static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, @@ -1801,9 +1830,9 @@ static int __devinit pageset_cpuup_callb { int cpu = (long)hcpu; - switch(action) { + switch (action) { case CPU_UP_PREPARE: - if(process_zones(cpu)) + if (process_zones(cpu)) goto bad; break; #ifdef CONFIG_HOTPLUG_CPU @@ -1886,31 +1915,7 @@ static void __init free_area_init_core(s zone->temp_priority = zone->prev_priority = DEF_PRIORITY; - /* - * The per-cpu-pages pools are set to around 1000th of the - * size of the zone. But no more than 1/4 of a meg - there's - * no point in going beyond the size of L2 cache. - * - * OK, so we don't know how big the cache is. So guess. - */ - batch = zone->present_pages / 1024; - if (batch * PAGE_SIZE > 256 * 1024) - batch = (256 * 1024) / PAGE_SIZE; - batch /= 4; /* We effectively *= 4 below */ - if (batch < 1) - batch = 1; - - /* - * Clamp the batch to a 2^n - 1 value. Having a power - * of 2 value was found to be more likely to have - * suboptimal cache aliasing properties in some cases. - * - * For example if 2 tasks are alternately allocating - * batches of pages, one task can end up with a lot - * of pages of one half of the possible page colors - * and the other with pages of the other colors. - */ - batch = (1 << fls(batch + batch/2)) - 1; + batch = zone_batchsize(zone); for (cpu = 0; cpu < NR_CPUS; cpu++) { struct per_cpu_pages *pcp; @@ -2144,7 +2149,7 @@ static int zoneinfo_show(struct seq_file struct per_cpu_pageset *pageset; int j; - pageset = &zone->pageset[i]; + pageset = zone_pcp(zone, i); for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { if (pageset->pcp[j].count) break; _