Re: 2.6.26-rc: SPARC: Sun Ultra 10 can not boot

From: David Miller
Date: Wed Aug 13 2008 - 23:53:47 EST


From: "Alexander Beregalov" <a.beregalov@xxxxxxxxx>
Date: Fri, 8 Aug 2008 15:52:53 +0400

> 2008/8/8 David Miller <davem@xxxxxxxxxxxxx>:
> > This will allow you to see the crash message.
> Yes, I saw it.
> There were few WARNINGS at lib/list_debug.c:__list_add
> That messages went fast, I can not see it now.
> Now I see call trace:
> __free_pages_ok
> __free_pages
> __free_pages_bootmem
> free_all_bootmem_core
> free_all_bootmem
> mem_init
> start_kernel
> tlb_fixup_done
>
> Can it be helpful?

Mikulas Patocka is seeing the same bug (see thread "Re: console
handover badness") I just posted the following patch there that can
help track this down.

Please try it out on your machine too.

BTW, how much ram is in your system?

Thanks.

diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 217de3e..26b018f 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1643,6 +1643,8 @@ void __init setup_per_cpu_areas(void)
{
}

+extern void sparse_validate_usemap(const char *file, int line);
+
void __init paging_init(void)
{
unsigned long end_pfn, shift, phys_base;
@@ -1788,7 +1790,9 @@ void __init paging_init(void)
#ifndef CONFIG_NEED_MULTIPLE_NODES
max_mapnr = last_valid_pfn;
#endif
+ sparse_validate_usemap(__FILE__, __LINE__);
kernel_physical_mapping_init();
+ sparse_validate_usemap(__FILE__, __LINE__);

{
unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -1798,12 +1802,15 @@ void __init paging_init(void)
max_zone_pfns[ZONE_NORMAL] = end_pfn;

free_area_init_nodes(max_zone_pfns);
+ sparse_validate_usemap(__FILE__, __LINE__);
}

printk("Booting Linux...\n");

central_probe();
+ sparse_validate_usemap(__FILE__, __LINE__);
cpu_probe();
+ sparse_validate_usemap(__FILE__, __LINE__);
}

int __init page_in_phys_avail(unsigned long paddr)
diff --git a/init/main.c b/init/main.c
index 0bc7e16..80771f5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -536,6 +536,8 @@ void __init __weak thread_info_cache_init(void)
{
}

+extern void sparse_validate_usemap(const char *file, int line);
+
asmlinkage void __init start_kernel(void)
{
char * command_line;
@@ -567,12 +569,19 @@ asmlinkage void __init start_kernel(void)
printk(KERN_NOTICE);
printk(linux_banner);
setup_arch(&command_line);
+ sparse_validate_usemap(__FILE__, __LINE__);
mm_init_owner(&init_mm, &init_task);
+ sparse_validate_usemap(__FILE__, __LINE__);
setup_command_line(command_line);
+ sparse_validate_usemap(__FILE__, __LINE__);
unwind_setup();
+ sparse_validate_usemap(__FILE__, __LINE__);
setup_per_cpu_areas();
+ sparse_validate_usemap(__FILE__, __LINE__);
setup_nr_cpu_ids();
+ sparse_validate_usemap(__FILE__, __LINE__);
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+ sparse_validate_usemap(__FILE__, __LINE__);

/*
* Set up the scheduler prior starting any interrupts (such as the
@@ -580,35 +589,52 @@ asmlinkage void __init start_kernel(void)
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
build_all_zonelists();
+ sparse_validate_usemap(__FILE__, __LINE__);
page_alloc_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
parse_early_param();
+ sparse_validate_usemap(__FILE__, __LINE__);
parse_args("Booting kernel", static_command_line, __start___param,
__stop___param - __start___param,
&unknown_bootoption);
+ sparse_validate_usemap(__FILE__, __LINE__);
if (!irqs_disabled()) {
printk(KERN_WARNING "start_kernel(): bug: interrupts were "
"enabled *very* early, fixing it\n");
local_irq_disable();
}
sort_main_extable();
+ sparse_validate_usemap(__FILE__, __LINE__);
trap_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
rcu_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
init_IRQ();
+ sparse_validate_usemap(__FILE__, __LINE__);
pidhash_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
init_timers();
+ sparse_validate_usemap(__FILE__, __LINE__);
hrtimers_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
softirq_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
timekeeping_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
time_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
sched_clock_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
profile_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
early_boot_irqs_on();
@@ -620,10 +646,12 @@ asmlinkage void __init start_kernel(void)
* this. But we do want output early, in case something goes wrong.
*/
console_init();
+ sparse_validate_usemap(__FILE__, __LINE__);
if (panic_later)
panic(panic_later, panic_param);

lockdep_info();
+ sparse_validate_usemap(__FILE__, __LINE__);

/*
* Need to run this when irqs are enabled, because it wants
@@ -631,6 +659,7 @@ asmlinkage void __init start_kernel(void)
* too:
*/
locking_selftest();
+ sparse_validate_usemap(__FILE__, __LINE__);

#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
@@ -643,7 +672,9 @@ asmlinkage void __init start_kernel(void)
}
#endif
vfs_caches_init_early();
+ sparse_validate_usemap(__FILE__, __LINE__);
cpuset_init_early();
+ sparse_validate_usemap(__FILE__, __LINE__);
mem_init();
enable_debug_pagealloc();
cpu_hotplug_init();
diff --git a/mm/sparse.c b/mm/sparse.c
index 5d9dbbb..116559c 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -262,6 +262,52 @@ unsigned long usemap_size(void)
return size_bytes;
}

+#if 1
+static int check_one_blockval(unsigned long *bitmap, unsigned long off, unsigned long nbits)
+{
+ unsigned long i, value = 1, flags = 0;
+
+ for (i = 0; i < nbits; i++, value <<= 1)
+ if (test_bit(off + i, bitmap))
+ flags |= value;
+
+ if (flags >= MIGRATE_TYPES) {
+ printk(KERN_ERR "BUG: Bogus migrate type %lu\n", flags);
+ return 1;
+ }
+ return 0;
+}
+
+void sparse_validate_usemap(const char *file, int line)
+{
+ void *caller = __builtin_return_address(0);
+ unsigned long size = usemap_size();
+ unsigned long pnum;
+ static int reported = 0;
+
+ if (reported)
+ return;
+
+ for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+ struct mem_section *ms;
+ unsigned long *bitmap;
+ unsigned long off;
+
+ if (!present_section_nr(pnum))
+ continue;
+ ms = __nr_to_section(pnum);
+ bitmap = ms->pageblock_flags;
+ for (off = 0; off < size; off += 3) {
+ if (check_one_blockval(bitmap, off, 3)) {
+ printk(KERN_ERR "BUG: Usemap for section %lu corrupted at %pS[%s:%d]\n",
+ pnum, caller, file, line);
+ reported = 1;
+ break;
+ }
+ }
+ }
+}
+#endif
#ifdef CONFIG_MEMORY_HOTPLUG
static unsigned long *__kmalloc_section_usemap(void)
{
@@ -445,10 +491,16 @@ void __init sparse_init(void)
sparse_init_one_section(__nr_to_section(pnum), pnum, map,
usemap);
}
+#if 1
+ sparse_validate_usemap(__FILE__, __LINE__);
+#endif

vmemmap_populate_print_last();

free_bootmem(__pa(usemap_map), size);
+#if 1
+ sparse_validate_usemap(__FILE__, __LINE__);
+#endif
}

#ifdef CONFIG_MEMORY_HOTPLUG
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/