Re: [RFC] Reverting NUMA-affine page table allocation

From: Tejun Heo
Date: Wed Mar 02 2011 - 13:22:53 EST


Hello,

On Wed, Mar 02, 2011 at 07:08:27PM +0100, Ingo Molnar wrote:
> > I tried to clean up the page table allocation code but the necessary
> > changes felt a bit too large at this stage, so IMO that's best left to
> > the next cycle.
>
> Do you plan to implement it more cleanly?

Yeah, that's the plan. I want the page table allocation code cleaned
up before doing this.

I also want to take a dumber/simpler approach at the expense of some
disadvantage to machines with interleaved NUMA nodes which can't do
1GiB mappings. If this scenario is a real concern, which I'm doubtful
about but then again it could be, we can do the callback walking thing
but I'd at least want to know that that's an actual concern we need to
address.

> > To me, it seems complicated for not good enough reasons. I'll defer
> > the decision to x86 maintainers. Ingo, hpa, Thomas, what do you guys
> > think?
>
> Would be nice to see an actual patch that does the revert.

Here it is.

Thanks.

diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 97e6007..bce688d 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -54,8 +54,6 @@ static inline phys_addr_t get_max_mapped(void)
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);

-void init_memory_mapping_high(void);
-
extern void initmem_init(void);
extern void free_initmem(void);

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 46e684f..c3a606c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -963,6 +963,14 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;

+#ifdef CONFIG_X86_64
+ if (max_pfn > max_low_pfn) {
+ max_pfn_mapped = init_memory_mapping(1UL<<32,
+ max_pfn<<PAGE_SHIFT);
+ /* can we preseve max_low_pfn ?*/
+ max_low_pfn = max_pfn;
+ }
+#endif
memblock.current_limit = get_max_mapped();

/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 470cc47..c8813aa 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -606,63 +606,9 @@ kernel_physical_mapping_init(unsigned long start,
void __init initmem_init(void)
{
memblock_x86_register_active_regions(0, 0, max_pfn);
- init_memory_mapping_high();
}
#endif

-struct mapping_work_data {
- unsigned long start;
- unsigned long end;
- unsigned long pfn_mapped;
-};
-
-static int __init_refok
-mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
-{
- struct mapping_work_data *data = datax;
- unsigned long pfn_mapped;
- unsigned long final_start, final_end;
-
- final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
- final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
-
- if (final_end <= final_start)
- return 0;
-
- pfn_mapped = init_memory_mapping(final_start, final_end);
-
- if (pfn_mapped > data->pfn_mapped)
- data->pfn_mapped = pfn_mapped;
-
- return 0;
-}
-
-static unsigned long __init_refok
-init_memory_mapping_active_regions(unsigned long start, unsigned long end)
-{
- struct mapping_work_data data;
-
- data.start = start;
- data.end = end;
- data.pfn_mapped = 0;
-
- work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
-
- return data.pfn_mapped;
-}
-
-void __init_refok init_memory_mapping_high(void)
-{
- if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
- max_pfn<<PAGE_SHIFT);
- /* can we preserve max_low_pfn ? */
- max_low_pfn = max_pfn;
-
- memblock.current_limit = get_max_mapped();
- }
-}
-
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 74064e8..86491ba 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -543,8 +543,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
if (!numa_meminfo_cover_memory(mi))
return -EINVAL;

- init_memory_mapping_high();
-
/* Finally register nodes. */
for_each_node_mask(nid, node_possible_map) {
u64 start = (u64)max_pfn << PAGE_SHIFT;

--
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/