[patch 045/104] x86: Hibernate: Fix breakage on x86_32 withCONFIG_NUMA set

From: Greg KH
Date: Wed Dec 03 2008 - 15:08:16 EST


2.6.27-stable review patch. If anyone has any objections, please let us know.

------------------
From: Rafael J. Wysocki <rjw@xxxxxxx>

backport of commit 97a70e548bd97d5a46ae9d44f24aafcc013fd701 to the 2.6.27 kernel.

The NUMA code on x86_32 creates special memory mapping that allows
each node's pgdat to be located in this node's memory. For this
purpose it allocates a memory area at the end of each node's memory
and maps this area so that it is accessible with virtual addresses
belonging to low memory. As a result, if there is high memory,
these NUMA-allocated areas are physically located in high memory,
although they are mapped to low memory addresses.

Our hibernation code does not take that into account and for this
reason hibernation fails on all x86_32 systems with CONFIG_NUMA=y and
with high memory present. Fix this by adding a special mapping for
the NUMA-allocated memory areas to the temporary page tables created
during the last phase of resume.

Signed-off-by: Rafael J. Wysocki <rjw@xxxxxxx>
Acked-by: Ingo Molnar <mingo@xxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>

---
arch/x86/mm/discontig_32.c | 35 +++++++++++++++++++++++++++++++++++
arch/x86/power/hibernate_32.c | 4 ++++
include/asm-x86/mmzone_32.h | 4 ++++
3 files changed, 43 insertions(+)

--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
}
}

+#ifdef CONFIG_HIBERNATION
+/**
+ * resume_map_numa_kva - add KVA mapping to the temporary page tables created
+ * during resume from hibernation
+ * @pgd_base - temporary resume page directory
+ */
+void resume_map_numa_kva(pgd_t *pgd_base)
+{
+ int node;
+
+ for_each_online_node(node) {
+ unsigned long start_va, start_pfn, size, pfn;
+
+ start_va = (unsigned long)node_remap_start_vaddr[node];
+ start_pfn = node_remap_start_pfn[node];
+ size = node_remap_size[node];
+
+ printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+
+ for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+ unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
+ pgd_t *pgd = pgd_base + pgd_index(vaddr);
+ pud_t *pud = pud_offset(pgd, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
+
+ set_pmd(pmd, pfn_pmd(start_pfn + pfn,
+ PAGE_KERNEL_LARGE_EXEC));
+
+ printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
+ __FUNCTION__, vaddr, start_pfn + pfn);
+ }
+ }
+}
+#endif
+
static unsigned long calculate_numa_remap_pages(void)
{
int nid;
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -12,6 +12,7 @@
#include <asm/system.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/mmzone.h>

/* Defined in hibernate_asm_32.S */
extern int restore_image(void);
@@ -127,6 +128,9 @@ static int resume_physical_mapping_init(
}
}
}
+
+ resume_map_numa_kva(pgd_base);
+
return 0;
}

--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void)

extern int early_pfn_to_nid(unsigned long pfn);

+extern void resume_map_numa_kva(pgd_t *pgd);
+
#else /* !CONFIG_NUMA */

#define get_memcfg_numa get_memcfg_numa_flat

+static inline void resume_map_numa_kva(pgd_t *pgd) {}
+
#endif /* CONFIG_NUMA */

#ifdef CONFIG_DISCONTIGMEM

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/