[patch 12/14] mm: variable pcp size

From: Nick Piggin
Date: Sun Nov 06 2005 - 03:25:44 EST


12/14

--
SUSE Labs, Novell Inc.

The previous increase in pcp list size will probably be too much for
huge NUMA machines, despite advances in keeping remote pagesets in check.
Make pcp sizes for remote zones much smaller (slightly smaller than before
the increase), and take advantage of this to increase local pcp list size
again.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -1779,13 +1779,14 @@ static int __devinit zone_batchsize(stru
return batch;
}

-inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+static inline void setup_pageset(struct per_cpu_pageset *p,
+ unsigned long size, unsigned long batch)
{
memset(p, 0, sizeof(*p));
p->count = 0;
p->cold_count = 0;
- p->high = 16 * batch;
- p->batch = max(1UL, 1 * batch);
+ p->high = max(1UL, size);
+ p->batch = max(1UL, batch);
INIT_LIST_HEAD(&p->list);
}

@@ -1819,13 +1820,19 @@ static int __devinit process_zones(int c
struct zone *zone, *dzone;

for_each_zone(zone) {
+ unsigned long size, batch;

zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),
GFP_KERNEL, cpu_to_node(cpu));
if (!zone->pageset[cpu])
goto bad;

- setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
+ batch = zone_batchsize(zone);
+ if (cpu_to_node(cpu) == zone->zone_pgdat->node_id)
+ size = batch * 32;
+ else
+ size = batch * 4;
+ setup_pageset(zone->pageset[cpu], size, batch);
}

return 0;
@@ -1923,9 +1930,9 @@ static __devinit void zone_pcp_init(stru
#ifdef CONFIG_NUMA
/* Early boot. Slab allocator not functional yet */
zone->pageset[cpu] = &boot_pageset[cpu];
- setup_pageset(&boot_pageset[cpu],0);
+ setup_pageset(&boot_pageset[cpu], 0, 0);
#else
- setup_pageset(zone_pcp(zone,cpu), batch);
+ setup_pageset(zone_pcp(zone, cpu), batch * 32, batch);
#endif
}
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",