Re: linux-next: reservetop fix disables mem=

From: Yinghai Lu
Date: Sun Sep 06 2009 - 02:56:31 EST


On Mon, Aug 24, 2009 at 11:27 AM, Ingo Molnar<mingo@xxxxxxx> wrote:
>
> * Yinghai Lu <yinghai@xxxxxxxxxx> wrote:
>
>> Hugh Dickins wrote:
>> > I find the "mem=" boot parameter disabled in today's linux-next:
>> > reverting the tip commit below fixes that.
>> >
>> > Hugh
>> >
>> > From: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxx>
>> > Date: Thu, 20 Aug 2009 12:23:11 +0000 (+0800)
>> > Subject: x86: Fix system crash when loading with "reservetop" parameter
>> > X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fmingo%2Flinux-2.6-x86.git;a=commitdiff_plain;h=8126dec32738421afa362114337331337b4be17f
>> >
>> > x86: Fix system crash when loading with "reservetop" parameter
>> >
>> > The system will die if the kernel is booted with "reservetop"
>> > parameter, in present code, parse "reservetop" parameter after
>> > early_ioremap_init(), and some function still use
>> > early_ioremap() after it.
>> >
>> > The problem is, "reservetop" parameter can modify
>> > 'FIXADDR_TOP', then the virtual address got by early_ioremap()
>> > is base on old 'FIXADDR_TOP', but the page mapping is base on
>> > new 'FIXADDR_TOP', it will occur page fault, and the IDT is not
>> > prepare yet, so, the system is dead.
>> >
>> > So, put parse_early_param() in the front of
>> > early_ioremap_init() in this patch.
>> >
>> > Signed-off-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxx>
>> > Cc: yinghai@xxxxxxxxxx
>> > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
>> > LKML-Reference: <4A8D402F.4080805@xxxxxxxxxxxxxx>
>> > Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
>> > ---
>> >
>> > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> > index 63f32d2..02643cc 100644
>> > --- a/arch/x86/kernel/setup.c
>> > +++ b/arch/x86/kernel/setup.c
>> > @@ -711,6 +711,11 @@ void __init setup_arch(char **cmdline_p)
>> >     printk(KERN_INFO "Command line: %s\n", boot_command_line);
>> >  #endif
>> >
>> > +   strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
>> > +   *cmdline_p = command_line;
>> > +
>> > +   parse_early_param();
>> > +
>> >     /* VMI may relocate the fixmap; do this before touching ioremap area */
>> >     vmi_init();
>> >
>> > @@ -793,11 +798,6 @@ void __init setup_arch(char **cmdline_p)
>> >  #endif
>> >  #endif
>> >
>> > -   strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
>> > -   *cmdline_p = command_line;
>> > -
>> > -   parse_early_param();
>> > -
>> >  #ifdef CONFIG_X86_64
>> >     check_efer();
>> >  #endif
>>
>> yes, that patch will break other built-in command too.
>>
>> need drop that patch.
>
> done. Was nervous about the patch already:
>
>  http://lkml.org/lkml/2009/8/21/127
>
>> also the problem was caused by vmi patch, and that commit should
>> be reverted.
>>
>> commit ae8d04e2ecbb233926860e9ce145eac19c7835dc
>> Author: Zachary Amsden <zach@xxxxxxxxxx>
>> Date:   Sat Dec 13 12:36:58 2008 -0800
>>
>>     x86 Fix VMI crash on boot in 2.6.28-rc8
>>
>>     VMI initialiation can relocate the fixmap, causing early_ioremap to
>>     malfunction if it is initialized before the relocation.  To fix this,
>>     VMI activation is split into two phases; the detection, which must
>>     happen before setting up ioremap, and the activation, which must happen
>>     after parsing early boot parameters.
>>
>>     This fixes a crash on boot when VMI is enabled under VMware.
>>
>>     Signed-off-by: Zachary Amsden <zach@xxxxxxxxxx>
>>     Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
>>
>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> index 9d5674f..bdec76e 100644
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -794,6 +794,9 @@ void __init setup_arch(char **cmdline_p)
>>         printk(KERN_INFO "Command line: %s\n", boot_command_line);
>>  #endif
>>
>> +       /* VMI may relocate the fixmap; do this before touching ioremap area */
>> +       vmi_init();
>> +
>>         early_cpu_init();
>>         early_ioremap_init();
>>
>> @@ -880,13 +883,8 @@ void __init setup_arch(char **cmdline_p)
>>         check_efer();
>>  #endif
>>
>> -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
>> -       /*
>> -        * Must be before kernel pagetables are setup
>> -        * or fixmap area is touched.
>> -        */
>> -       vmi_init();
>> -#endif
>> +       /* Must be before kernel pagetables are setup */
>> +       vmi_activate();
>>
>>         /* after early param, so could get panic from serial */
>>         reserve_early_setup_data();
>>
>>
>> and according to
>> http://lkml.org/lkml/2008/12/10/388
>> http://lkml.org/lkml/2008/12/10/456
>>
>> Zachary should split reserve_top_address() to two functions...
>> before sending that patch to Linus
>
> mind looking at this yourself if interested? Zachary has not been
> active for quite some time.
>

please check attached patch...

still need to make early_dbgp and early_console that would use early_ioremap?

YH
[PATCH] x86: fix early_param hanle moved more early

the patch the move early_param_parse early to make reservetop to work again,
it is reported to break mem=...

after close looking, it will break
1. some cpu feature in early stage too, like cpu_has_x2apic
2. will break built-in-command line
3. will break other memmap= and mem=
4. early_dbgp and early_console that will use early_ioremap to access mmio (?)

so try to
1. move early_cpu_init early
2. move built-in-command copying early
3. add userdef_mem_size to remember mem=
. add e820_user to remember memmap
to use userdef_mem_size and e820_user in finalize_map
4: ?

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
arch/x86/kernel/e820.c | 52 ++++++++++++++++++++++++++++++++----------------
arch/x86/kernel/setup.c | 39 ++++++++++++++++++------------------
2 files changed, 55 insertions(+), 36 deletions(-)

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -44,6 +44,7 @@
*/
struct e820map e820;
struct e820map e820_saved;
+static struct e820map e820_user __initdata;

/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0xaeedbabe;
@@ -1227,11 +1228,11 @@ static void early_panic(char *msg)
}

static int userdef __initdata;
+static u64 userdef_mem_size __initdata;

/* "mem=nopentium" disables the 4MB page tables. */
static int __init parse_memopt(char *p)
{
- u64 mem_size;

if (!p)
return -EINVAL;
@@ -1244,13 +1245,13 @@ static int __init parse_memopt(char *p)
#endif

userdef = 1;
- mem_size = memparse(p, &p);
- e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
+ userdef_mem_size = memparse(p, &p);

return 0;
}
early_param("mem", parse_memopt);

+static int userdef_exactmap __initdata;
static int __init parse_memmap_opt(char *p)
{
char *oldp;
@@ -1260,16 +1261,9 @@ static int __init parse_memmap_opt(char
return -EINVAL;

if (!strncmp(p, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
- /*
- * If we are doing a crash dump, we still need to know
- * the real mem size before original memory map is
- * reset.
- */
- saved_max_pfn = e820_end_of_ram_pfn();
-#endif
- e820.nr_map = 0;
+ e820_user.nr_map = 0;
userdef = 1;
+ userdef_exactmap = 1;
return 0;
}

@@ -1281,15 +1275,16 @@ static int __init parse_memmap_opt(char
userdef = 1;
if (*p == '@') {
start_at = memparse(p+1, &p);
- e820_add_region(start_at, mem_size, E820_RAM);
+ __e820_add_region(&e820_user, start_at, mem_size, E820_RAM);
} else if (*p == '#') {
start_at = memparse(p+1, &p);
- e820_add_region(start_at, mem_size, E820_ACPI);
+ __e820_add_region(&e820_user, start_at, mem_size, E820_ACPI);
} else if (*p == '$') {
start_at = memparse(p+1, &p);
- e820_add_region(start_at, mem_size, E820_RESERVED);
+ __e820_add_region(&e820_user, start_at, mem_size,
+ E820_RESERVED);
} else
- e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
+ userdef_mem_size = mem_size;

return *p == '\0' ? 0 : -EINVAL;
}
@@ -1298,7 +1293,30 @@ early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)
{
if (userdef) {
- u32 nr = e820.nr_map;
+ u32 nr;
+
+ if (userdef_exactmap) {
+#ifdef CONFIG_CRASH_DUMP
+ /*
+ * If we are doing a crash dump, we still need to know
+ * the real mem size before original memory map is
+ * reset.
+ */
+ saved_max_pfn = e820_end_of_ram_pfn();
+#endif
+ /* good, just use e820_user instead */
+ memcpy(&e820, &e820_user, sizeof(struct e820map));
+ } else {
+ /* ok, need to append e820_user to e820 */
+ __append_e820_map(e820_user.map, e820_user.nr_map);
+ }
+
+ nr = e820.nr_map;
+
+ if (userdef_mem_size)
+ e820_remove_range(userdef_mem_size,
+ ULLONG_MAX - userdef_mem_size,
+ E820_RAM, 1);

if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
early_panic("Invalid user supplied memory map");
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -693,13 +693,28 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
+#endif
+
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
#else
- printk(KERN_INFO "Command line: %s\n", boot_command_line);
+ if (builtin_cmdline[0]) {
+ /* append boot loader cmdline to builtin */
+ strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+ strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+ }
+#endif
#endif

+ printk(KERN_INFO "Command line: %s\n", boot_command_line);
+
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;

+ early_cpu_init();
+
#ifdef CONFIG_X86_64
/*
* Must call this twice: Once just to detect whether hardware doesn't
@@ -712,10 +727,13 @@ void __init setup_arch(char **cmdline_p)

parse_early_param();

+#ifdef CONFIG_X86_64
+ check_efer();
+#endif
+
/* VMI may relocate the fixmap; do this before touching ioremap area */
vmi_init();

- early_cpu_init();
early_ioremap_init();

ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
@@ -781,23 +799,6 @@ void __init setup_arch(char **cmdline_p)
bss_resource.start = virt_to_phys(&__bss_start);
bss_resource.end = virt_to_phys(&__bss_stop)-1;

-#ifdef CONFIG_CMDLINE_BOOL
-#ifdef CONFIG_CMDLINE_OVERRIDE
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
- if (builtin_cmdline[0]) {
- /* append boot loader cmdline to builtin */
- strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
- strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
- }
-#endif
-#endif
-
-#ifdef CONFIG_X86_64
- check_efer();
-#endif
-
/* Must be before kernel pagetables are setup */
vmi_activate();