Re: 2.6.18-mm2 boot failure on x86-64

From: Andi Kleen
Date: Thu Oct 05 2006 - 14:27:33 EST


On Thursday 05 October 2006 19:57, Steve Fox wrote:
> On Thu, 2006-10-05 at 17:40 +0200, Andi Kleen wrote:
>
> > Please don't snip the Code: line. It is fairly important.
>
> Sorry about that. The remote console I was using appears to overwrite
> some text after I force the reboot. Here's a clean one.
>
> global ffffffffffffffff

Ok that definitely shouldn't be in there.

I guess we need to track when it gets corrupted. Can you send the full
boot log with this patch applied?


-Andi

Index: linux-2.6.19-rc1-hack/init/main.c
===================================================================
--- linux-2.6.19-rc1-hack.orig/init/main.c
+++ linux-2.6.19-rc1-hack/init/main.c
@@ -75,6 +75,9 @@

static int init(void *);

+extern void bugcheck(char *, int);
+#define CHECK bugcheck(__FILE__, __LINE__)
+
extern void init_IRQ(void);
extern void fork_init(unsigned long);
extern void mca_init(void);
@@ -480,6 +483,8 @@ asmlinkage void __init start_kernel(void
char * command_line;
extern struct kernel_param __start___param[], __stop___param[];

+ CHECK;
+
smp_setup_processor_id();

/*
@@ -502,7 +507,9 @@ asmlinkage void __init start_kernel(void
page_address_init();
printk(KERN_NOTICE);
printk(linux_banner);
+ CHECK;
setup_arch(&command_line);
+ CHECK;
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */

@@ -517,6 +524,7 @@ asmlinkage void __init start_kernel(void
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
+ CHECK;
build_all_zonelists();
page_alloc_init();
printk(KERN_NOTICE "Kernel command line: %s\n", saved_command_line);
@@ -525,6 +533,7 @@ asmlinkage void __init start_kernel(void
__stop___param - __start___param,
&unknown_bootoption);
sort_main_extable();
+ CHECK;
trap_init();
rcu_init();
init_IRQ();
@@ -533,8 +542,10 @@ asmlinkage void __init start_kernel(void
hrtimers_init();
softirq_init();
timekeeping_init();
+ CHECK;
time_init();
profile_init();
+ CHECK;
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
early_boot_irqs_on();
@@ -568,7 +579,9 @@ asmlinkage void __init start_kernel(void
#endif
vfs_caches_init_early();
cpuset_init_early();
+ CHECK;
mem_init();
+ CHECK;
kmem_cache_init();
setup_per_cpu_pageset();
numa_policy_init();
@@ -577,6 +590,7 @@ asmlinkage void __init start_kernel(void
calibrate_delay();
pidmap_init();
pgtable_cache_init();
+ CHECK;
prio_tree_init();
anon_vma_init();
#ifdef CONFIG_X86
@@ -586,12 +600,14 @@ asmlinkage void __init start_kernel(void
fork_init(num_physpages);
proc_caches_init();
buffer_init();
+ CHECK;
unnamed_dev_init();
key_init();
security_init();
vfs_caches_init(num_physpages);
radix_tree_init();
signals_init();
+ CHECK;
/* rootfs populating might need page-writeback */
page_writeback_init();
#ifdef CONFIG_PROC_FS
@@ -599,6 +615,7 @@ asmlinkage void __init start_kernel(void
#endif
cpuset_init();
taskstats_init_early();
+ CHECK;
delayacct_init();

check_bugs();
@@ -609,7 +626,7 @@ asmlinkage void __init start_kernel(void
rest_init();
}

-static int __initdata initcall_debug;
+static int __initdata initcall_debug = 1;

static int __init initcall_debug_setup(char *str)
{
@@ -639,7 +656,11 @@ static void __init do_initcalls(void)
printk("\n");
}

+ CHECK;
+
result = (*call)();
+
+ CHECK;

if (result && result != -ENODEV && initcall_debug) {
sprintf(msgbuf, "error code %d", result);
@@ -725,21 +746,32 @@ static int init(void * unused)

smp_prepare_cpus(max_cpus);

+ CHECK;
+
do_pre_smp_initcalls();

smp_init();
+
+ CHECK;
+
sched_init_smp();

cpuset_init_smp();

+ CHECK;
+
/*
* Do this before initcalls, because some drivers want to access
* firmware files.
*/
populate_rootfs();

+ CHECK;
+
do_basic_setup();

+ CHECK;
+
/*
* check if there is an early userspace init. If yes, let it do all
* the work
Index: linux-2.6.19-rc1-hack/net/xfrm/xfrm_policy.c
===================================================================
--- linux-2.6.19-rc1-hack.orig/net/xfrm/xfrm_policy.c
+++ linux-2.6.19-rc1-hack/net/xfrm/xfrm_policy.c
@@ -39,6 +39,16 @@ EXPORT_SYMBOL(xfrm_policy_count);
static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];

+void bugcheck(char *where, int line)
+{
+ int i;
+ for (i = 0; i < NPROTO; i++)
+ if (xfrm_policy_afinfo[i] == (void *)-1UL) {
+ printk("afinfo corrupted at %s:%d\n",where,line);
+ return;
+ }
+}
+
static kmem_cache_t *xfrm_dst_cache __read_mostly;

static struct work_struct xfrm_policy_gc_work;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/