Re: [PATCH 0/2] Relax restrictions on setting CONFIG_NUMA on x86

From: Mel Gorman
Date: Wed Jan 23 2008 - 05:25:54 EST


On (23/01/08 11:04), KOSAKI Motohiro didst pronounce:
> Hi mel
>
> > Hi
> >
> > > A fix[1] was merged to the x86.git tree that allowed NUMA kernels to boot
> > > on normal x86 machines (and not just NUMA-Q, Summit etc.). I took a look
> > > at the restrictions on setting NUMA on x86 to see if they could be lifted.
> >
> > Interesting!
> >
> > I will test tomorrow.
>
> Hmm...
> It doesn't works on my machine.
>
> panic at booting at __free_pages_ok() with blow call trace.
>
> [<hex number>] free_all_bootmem_core
> [<hex number>] mem_init
> [<hex number>] alloc_large_system_hash
> [<hex number>] inode_init_early
> [<hex number>] start_kernel
> [<hex number>] unknown_bootoption
>
> my machine spec
> CPU: Pentium4 with HT
> MEM: 512M
>
> I will try more investigate.
> but I have no time for a while, sorry ;-)
>
>
> BTW:
> when config sparse mem turn on instead discontig mem.
> panic at booting at get_pageblock_flags_group() with below call stack.
>
> free_initrd
> free_init_pages
> free_hot_cold_page
>
>

And the actual patch :/

diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.24-rc8-010_any32bit_x86/arch/x86/mm/discontig_32.c linux-2.6.24-rc8-015_remap_discontigmem/arch/x86/mm/discontig_32.c
--- linux-2.6.24-rc8-010_any32bit_x86/arch/x86/mm/discontig_32.c 2008-01-17 15:40:21.000000000 +0000
+++ linux-2.6.24-rc8-015_remap_discontigmem/arch/x86/mm/discontig_32.c 2008-01-19 15:50:47.000000000 +0000
@@ -32,6 +32,7 @@
#include <linux/kexec.h>
#include <linux/pfn.h>
#include <linux/swap.h>
+#include <linux/acpi.h>

#include <asm/e820.h>
#include <asm/setup.h>
@@ -103,14 +104,10 @@ extern unsigned long highend_pfn, highst

#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)

-static unsigned long node_remap_start_pfn[MAX_NUMNODES];
unsigned long node_remap_size[MAX_NUMNODES];
-static unsigned long node_remap_offset[MAX_NUMNODES];
static void *node_remap_start_vaddr[MAX_NUMNODES];
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);

-static void *node_remap_end_vaddr[MAX_NUMNODES];
-static void *node_remap_alloc_vaddr[MAX_NUMNODES];
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
/*
@@ -167,6 +164,22 @@ static void __init allocate_pgdat(int ni
}
}

+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * In the discontig memory model, a portion of the kernel virtual area (KVA)
+ * is reserved and portions of nodes are mapped using it. This is to allow
+ * node-local memory to be allocated for structures that would normally require
+ * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers
+ * should be prepared to allocate from the bootmem allocator instead. This KVA
+ * mechanism is incompatible with SPARSEMEM as it makes assumptions about the
+ * layout of memory that are broken if alloc_remap() succeeds for some of the
+ * map and fails for others
+ */
+static unsigned long node_remap_start_pfn[MAX_NUMNODES];
+static void *node_remap_end_vaddr[MAX_NUMNODES];
+static void *node_remap_alloc_vaddr[MAX_NUMNODES];
+static unsigned long node_remap_offset[MAX_NUMNODES];
+
void *alloc_remap(int nid, unsigned long size)
{
void *allocation = node_remap_alloc_vaddr[nid];
@@ -263,6 +276,40 @@ static unsigned long calculate_numa_rema
return reserve_pages;
}

+static void init_remap_allocator(int nid)
+{
+ node_remap_start_vaddr[nid] = pfn_to_kaddr(
+ kva_start_pfn + node_remap_offset[nid]);
+ node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
+ (node_remap_size[nid] * PAGE_SIZE);
+ node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
+ ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+
+ printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
+ (ulong) node_remap_start_vaddr[nid],
+ (ulong) pfn_to_kaddr(highstart_pfn
+ + node_remap_offset[nid] + node_remap_size[nid]));
+}
+#else
+void *alloc_remap(int nid, unsigned long size)
+{
+ return NULL;
+}
+
+static unsigned long calculate_numa_remap_pages(void)
+{
+ return 0;
+}
+
+static void init_remap_allocator(int nid)
+{
+}
+
+void __init remap_numa_kva(void)
+{
+}
+#endif /* CONFIG_DISCONTIGMEM */
+
extern void setup_bootmem_allocator(void);
unsigned long __init setup_memory(void)
{
@@ -326,19 +373,9 @@ unsigned long __init setup_memory(void)
printk("Low memory ends at vaddr %08lx\n",
(ulong) pfn_to_kaddr(max_low_pfn));
for_each_online_node(nid) {
- node_remap_start_vaddr[nid] = pfn_to_kaddr(
- kva_start_pfn + node_remap_offset[nid]);
- /* Init the node remap allocator */
- node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
- (node_remap_size[nid] * PAGE_SIZE);
- node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
- ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+ init_remap_allocator(nid);

allocate_pgdat(nid);
- printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
- (ulong) node_remap_start_vaddr[nid],
- (ulong) pfn_to_kaddr(highstart_pfn
- + node_remap_offset[nid] + node_remap_size[nid]));
}
printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn));
@@ -439,3 +476,29 @@ int memory_add_physaddr_to_nid(u64 addr)

EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
+
+#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT
+/*
+ * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA
+ *
+ * These stub functions are needed to compile 32-bit NUMA when SRAT is
+ * not set. There are functions in srat_64.c for parsing this table
+ * and it may be possible to make them common functions.
+ */
+void acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+ printk(KERN_INFO "ACPI: No support for parsing SLIT table\n");
+}
+
+void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa)
+{
+}
+
+void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma)
+{
+}
+
+void acpi_numa_arch_fixup(void)
+{
+}
+#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */

--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/