Re: Early boot oops with 2.5.67-bk(current) on a dual Athlon-MP [Asus A7M266-D] machine

From: Andrew Morton (akpm@digeo.com)
Date: Sun Apr 13 2003 - 18:52:59 EST


Nicholas Wourms <nwourms@myrealbox.com> wrote:
>
> Hi,
>
> Attached is the oops (which results in a panic) when I
> attempt to boot the lastest bk current on my machine.

The MCE code is using the workqueue infrastructure waaaaaaay earlier than it
is allowed to. It looks like the timer went off before the workqueue
initialisation had been run.

This should fix it.

 arch/i386/kernel/cpu/mcheck/k7.c | 4 ----
 arch/i386/kernel/cpu/mcheck/mce.h | 2 --
 arch/i386/kernel/cpu/mcheck/non-fatal.c | 5 ++++-
 arch/i386/kernel/cpu/mcheck/p4.c | 3 ---
 4 files changed, 4 insertions(+), 10 deletions(-)

diff -puN arch/i386/kernel/cpu/mcheck/k7.c~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/k7.c
--- 25/arch/i386/kernel/cpu/mcheck/k7.c~mce-workqueue-startup-fix 2003-04-13 16:41:36.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/k7.c 2003-04-13 16:42:24.000000000 -0700
@@ -92,8 +92,4 @@ void __init amd_mcheck_init(struct cpuin
         set_in_cr4 (X86_CR4_MCE);
         printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
                 smp_processor_id());
-
-#ifdef CONFIG_X86_MCE_NONFATAL
- init_nonfatal_mce_checker();
-#endif
 }
diff -puN arch/i386/kernel/cpu/mcheck/mce.h~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/mce.h
--- 25/arch/i386/kernel/cpu/mcheck/mce.h~mce-workqueue-startup-fix 2003-04-13 16:41:43.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/mce.h 2003-04-13 16:42:27.000000000 -0700
@@ -6,8 +6,6 @@ void intel_p5_mcheck_init(struct cpuinfo
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
-void init_nonfatal_mce_checker(void);
-
 /* Call the installed machine check handler for this CPU setup. */
 extern void (*machine_check_vector)(struct pt_regs *, long error_code);
 
diff -puN arch/i386/kernel/cpu/mcheck/non-fatal.c~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/non-fatal.c
--- 25/arch/i386/kernel/cpu/mcheck/non-fatal.c~mce-workqueue-startup-fix 2003-04-13 16:41:50.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/non-fatal.c 2003-04-13 16:45:01.000000000 -0700
@@ -11,6 +11,7 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/module.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -65,7 +66,7 @@ static void mce_timerfunc (unsigned long
         add_timer (&mce_timer);
 }
 
-void init_nonfatal_mce_checker()
+static int init_nonfatal_mce_checker()
 {
         if (timerset == 0) {
                 /* Set the timer to check for non-fatal
@@ -78,4 +79,6 @@ void init_nonfatal_mce_checker()
                 timerset = 1;
                 printk(KERN_INFO "Machine check exception polling timer started.\n");
         }
+ return 0;
 }
+module_init(init_nonfatal_mce_checker);
diff -puN arch/i386/kernel/cpu/mcheck/p4.c~mce-workqueue-startup-fix arch/i386/kernel/cpu/mcheck/p4.c
--- 25/arch/i386/kernel/cpu/mcheck/p4.c~mce-workqueue-startup-fix 2003-04-13 16:42:03.000000000 -0700
+++ 25-akpm/arch/i386/kernel/cpu/mcheck/p4.c 2003-04-13 16:44:22.000000000 -0700
@@ -255,7 +255,4 @@ void __init intel_p4_mcheck_init(struct
                 intel_init_thermal(c);
 #endif
         }
-#ifdef CONFIG_X86_MCE_NONFATAL
- init_nonfatal_mce_checker();
-#endif
 }

_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Tue Apr 15 2003 - 22:00:29 EST