limiting node (NUMA) memory

From: Rajesham Gajjela
Date: Wed Dec 27 2006 - 07:07:47 EST


PS: Please cc me I am not on the list:

On 2.6.7 kernel & x86_64 arch, I am trying to limit
the each node memory. I have 4 CPUs on my system and
therefore 4 nodes will be setup. Total RAM is 8G, and
each node will have 2G mem. But at times, I may want
to limit each node mem to 1G - some thing like:
memnode="1G,1G,2G,2G" on the grub. I have written the
below patch, which is not working. With this patch,
the machine keeps on rebooting after initrd stage:


Booting 'Red Hat Enterprise Linux AS
(2.6.7-aruba_0.0.0.49309)'

kernel /vmlinuz-2.6.7-aruba_0.0.0.49309
root=/dev/system/rootvol console=ttyS0,
9600n8 SPEED=9600 CONSOLE=serial ide=nodma
ramdisk_size=40000 quiet hangcheck_t
imer.hangcheck_reboot=1 memnode="1g,1g,1g,2M" gdb=0
[Linux-bzImage, setup=0x1400, size=0x1e387c]
initrd /initrd-2.6.7-aruba_0.0.0.49309.img
[Linux-initrd @ 0x37a38000, 0x5b7016 bytes]

Any pointers on my mistake(s) is appreciated.

Rajesham

__________________________________________________
Do You Yahoo!?
Tired of spam? Yahoo! Mail has the best spam protection around
http://mail.yahoo.com diff -urNp -X kernel-2.6.7-dontdiff kernel-2.6.7-orig/arch/x86_64/kernel/e820.c kernel-2.6.7-modif/arch/x86_64/kernel/e820.c
--- kernel-2.6.7-orig/arch/x86_64/kernel/e820.c
+++ kernel-2.6.7-modif/arch/x86_64/kernel/e820.c
@@ -763,7 +763,7 @@ void __init parse_memopt(char *p, char *
*
* -AK
*/
- end_user_pfn = memparse(p, from) + HIGH_MEMORY;
+ end_user_pfn = memparse(p, from);
end_user_pfn >>= PAGE_SHIFT;
}

diff -urNp -X kernel-2.6.7-dontdiff kernel-2.6.7-orig/arch/x86_64/kernel/setup.c kernel-2.6.7-modif/arch/x86_64/kernel/setup.c
--- kernel-2.6.7-orig/arch/x86_64/kernel/setup.c
+++ kernel-2.6.7-modif/arch/x86_64/kernel/setup.c
@@ -335,6 +335,10 @@ static __init void parse_cmdline_early (
if (ptr) stag_HBAs = memparse(ptr+1, &ptr);
}

+ if (!memcmp(from, "memnode=", 8)) {
+ parse_memnode(from+8);
+ }
+
next_char:
c = *(from++);
if (!c)
diff -urNp -X kernel-2.6.7-dontdiff kernel-2.6.7-orig/arch/x86_64/mm/k8topology.c kernel-2.6.7-modif/arch/x86_64/mm/k8topology.c
--- kernel-2.6.7-orig/arch/x86_64/mm/k8topology.c
+++ kernel-2.6.7-modif/arch/x86_64/mm/k8topology.c
@@ -41,6 +41,22 @@ static __init int find_northbridge(void)
return -1;
}

+unsigned long memnode[MAXNODE];
+
+void __init
+parse_memnode(char *str)
+{
+ int i = 0;
+ char *p = str;
+ char *p1;
+
+ ++str; /* skip "=" */
+ while (i < 8 && ((p = strsep(&str, ",")) != NULL)) {
+ memnode[i++] = memparse(p, &p1);
+ }
+}
+
+
int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
unsigned long prevbase;
@@ -59,6 +75,7 @@ int __init k8_scan_nodes(unsigned long s
numnodes = ((reg >> 4) & 7) + 1;

printk(KERN_INFO "Number of nodes %d (%x)\n", numnodes, reg);
+ printk(KERN_INFO "DEBUG0: start=%lu, end=%lu\n", start, end);

memset(&nodes,0,sizeof(nodes));
prevbase = 0;
@@ -69,6 +86,10 @@ int __init k8_scan_nodes(unsigned long s
limit = read_pci_config(0, nb, 1, 0x44 + i*8);

nodeid = limit & 7;
+
+ printk(KERN_INFO "DEBUG1: from pci config space - base=%lu, "
+ "limit=%lu, nodeid=%d\n", base, limit, nodeid);
+
if ((base & 3) == 0) {
if (i < numnodes)
printk("Skipping disabled node %d\n", i);
@@ -151,11 +172,18 @@ int __init k8_scan_nodes(unsigned long s
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);

for (i = 0; i < MAXNODE; i++) {
- if (nodes[i].start != nodes[i].end) {
- /* assume 1:1 NODE:CPU */
- cpu_to_node[i] = i;
+ if (nodes[i].start != nodes[i].end) {
+ /* assume 1:1 NODE:CPU */
+ cpu_to_node[i] = i;
+
+ if (nodes[i].end - nodes[i].start > memnode[i])
+ nodes[i].end = memnode[i];
+
+ printk(KERN_INFO "DEBUG3: Node %d, memnode:%lu, start:%llu, "
+ "end:%llu\n", i, memnode[i], nodes[i].start, nodes[i].end);
+
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
+ }
}

numa_init_array();
diff -urNp -X kernel-2.6.7-dontdiff kernel-2.6.7-orig/include/asm-x86_64/mmzone.h kernel-2.6.7-modif/include/asm-x86_64/mmzone.h
--- kernel-2.6.7-orig/include/asm-x86_64/mmzone.h
+++ kernel-2.6.7-modif/include/asm-x86_64/mmzone.h
@@ -60,6 +60,7 @@ static inline __attribute__((pure)) int
/* AK: !DISCONTIGMEM just forces it to 1. Can't we too? */
#define pfn_valid(pfn) ((pfn) < num_physpages)

+extern void parse_memnode(char *);

#endif
#endif