Re: [patch] tip/x86_64: fix e820 merge issue which broke max_pfn_mapped

From: Yinghai Lu
Date: Wed Jul 09 2008 - 14:06:00 EST


On Wed, Jul 9, 2008 at 10:56 AM, Suresh Siddha
<suresh.b.siddha@xxxxxxxxx> wrote:
> On Tue, Jul 08, 2008 at 06:56:38PM -0700, Yinghai Lu wrote:
>> On Tue, Jul 8, 2008 at 5:59 PM, Yinghai Lu <yhlu.kernel@xxxxxxxxx> wrote:
>> > On Tue, Jul 8, 2008 at 4:08 PM, Suresh Siddha <suresh.b.siddha@xxxxxxxxx> wrote:
>> >> With out this 64bit tip/master doesn't boot using ACPI on my system.
>> >> ---
>> >>
>> >> max_pfn_mapped should include all e820 entries.
>> >> The direct mapping extends to max_pfn_mapped, so that we can directly access
>> >> apertures, ACPI and other tables without having to play with fixmaps.
>> >>
>> >> With this, my system with 1GB memory boots fine with ACPI enabled.
>> >
>> > so without this patch, your system doesn't boot?
>>
>> how about attached patch?
>>
>> [PATCH] x86: make max_pfn cover acpi table below 4g
>>
>> when system have 4g less ram installed, and acpi table sit
>> near end of ram. make max_pfn cover them too.
>> so 64bit kernel don't need to mess up fixmap
>
> Now the latest 64bit x86 tip/master (latest commit d1f7cb8) doesn't boot
> on any of my test systems :( It gets a very early exception..

fix one panic on Ingo's machine

>
> I can't even revert your max_pfn patch, to see if this early exception is
> caused by this patch.. There seems to be more changes on top of it
> already overnight...
>
> BTW, please explain the need for your patch which has more changes, instead
> of my simple patch which was test booted on 3 different systems with both
> 32bit and 64bit kernels...

try to reduce #ifdef CONFIG_X86_64/32, and make 32/64 at the same page.

could be some regression from early_io_remap unifying from jeremy

please check attached revert patch.

YH


YH
revert unifying early_io_remap

also remove the cover so make e820_end to return max ram type for 64bit.


diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 66fd5bd..9836a07 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1066,10 +1066,8 @@ unsigned long __init e820_end(void)
struct e820entry *ei = &e820.map[i];
unsigned long end_pfn;

-#ifdef CONFIG_X86_32
if (ei->type != E820_RAM)
continue;
-#endif

end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
if (end_pfn > last_pfn)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2240f82..db3280a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -362,6 +362,12 @@ NEXT_PAGE(level3_kernel_pgt)
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE

NEXT_PAGE(level2_fixmap_pgt)
+ .fill 506,8,0
+ .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
+ /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
+ .fill 5,8,0
+
+NEXT_PAGE(level1_fixmap_pgt)
.fill 512,8,0

NEXT_PAGE(level2_ident_pgt)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d418794..149ff9a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -598,12 +598,11 @@ void __init setup_arch(char **cmdline_p)
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
pre_setup_arch_hook();
early_cpu_init();
+ early_ioremap_init();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif

- early_ioremap_init();
-
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
screen_info = boot_params.screen_info;
edid_info = boot_params.edid_info;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 246a2b2..70187fa 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -339,6 +339,58 @@ phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
phys_pte_init(pte, address, end);
}

+/* Must run before zap_low_mappings */
+__meminit void *early_ioremap(unsigned long addr, unsigned long size)
+{
+ pmd_t *pmd, *last_pmd;
+ unsigned long vaddr;
+ int i, pmds;
+
+ pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+ vaddr = __START_KERNEL_map;
+ pmd = level2_kernel_pgt;
+ last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
+
+ for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
+ for (i = 0; i < pmds; i++) {
+ if (pmd_present(pmd[i]))
+ goto continue_outer_loop;
+ }
+ vaddr += addr & ~PMD_MASK;
+ addr &= PMD_MASK;
+
+ for (i = 0; i < pmds; i++, addr += PMD_SIZE)
+ set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ __flush_tlb_all();
+
+ return (void *)vaddr;
+continue_outer_loop:
+ ;
+ }
+ printk(KERN_ERR "early_ioremap(0x%lx, %lu) failed\n", addr, size);
+
+ return NULL;
+}
+
+/*
+ * To avoid virtual aliases later:
+ */
+__meminit void early_iounmap(void *addr, unsigned long size)
+{
+ unsigned long vaddr;
+ pmd_t *pmd;
+ int i, pmds;
+
+ vaddr = (unsigned long)addr;
+ pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
+ pmd = level2_kernel_pgt + pmd_index(vaddr);
+
+ for (i = 0; i < pmds; i++)
+ pmd_clear(pmd + i);
+
+ __flush_tlb_all();
+}
+
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6a05a33..47719ec 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -409,6 +409,8 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
return;
}

+#ifdef CONFIG_X86_32
+
int __initdata early_ioremap_debug;

static int __init early_ioremap_debug_setup(char *str)
@@ -509,7 +511,6 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
return;
}
pte = early_ioremap_pte(addr);
-
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else
@@ -649,3 +650,5 @@ void __this_fixmap_does_not_exist(void)
{
WARN_ON(1);
}
+
+#endif /* CONFIG_X86_32 */
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 1a0b61e..7594346 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -52,19 +52,6 @@ enum fixed_addresses {
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
FIX_OHCI1394_BASE,
#endif
- __end_of_permanent_fixed_addresses,
- /*
- * 256 temporary boot-time mappings, used by early_ioremap(),
- * before ioremap() is functional.
- *
- * We round it up to the next 512 pages boundary so that we
- * can have a single pgd entry and a single pte table:
- */
-#define NR_FIX_BTMAPS 64
-#define FIX_BTMAPS_NESTING 4
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
- (__end_of_permanent_fixed_addresses & 511),
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
__end_of_fixed_addresses
};

diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index bf5d629..00e5f1e 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h
@@ -86,17 +86,4 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
unsigned long prot_val);
extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);

-/*
- * early_ioremap() and early_iounmap() are for temporary early boot-time
- * mappings, before the real ioremap() is functional.
- * A boot-time mapping is currently limited to at most 16 pages.
- */
-extern void early_ioremap_init(void);
-extern void early_ioremap_clear(void);
-extern void early_ioremap_reset(void);
-extern void *early_ioremap(unsigned long offset, unsigned long size);
-extern void early_iounmap(void *addr, unsigned long size);
-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
-
-
#endif /* _ASM_X86_IO_H */
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 4df44ed..d71be8d 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -122,6 +122,18 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
extern void iounmap(volatile void __iomem *addr);

/*
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
+ * mappings, before the real ioremap() is functional.
+ * A boot-time mapping is currently limited to at most 16 pages.
+ */
+extern void early_ioremap_init(void);
+extern void early_ioremap_clear(void);
+extern void early_ioremap_reset(void);
+extern void *early_ioremap(unsigned long offset, unsigned long size);
+extern void early_iounmap(void *addr, unsigned long size);
+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
+
+/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
*/
#define isa_virt_to_bus virt_to_phys