[SMP patch] FIXED, lockup-debugger, NMI-oopser-2.1.98-C

MOLNAR Ingo (mingo@chiara.csoma.elte.hu)
Mon, 27 Apr 1998 00:10:35 +0200 (CEST)


several people have reported that the previous NMI-oopser was very
unstable. Now i think i have found the bug, and since then i have not had
any problems with this version yet.

this version now increases the NMI counter in /proc/interrupts, thus one
can easily see wether the watchdog is really running. ObPlug: This version
also tries to be production quality, i think it's very useful for SMP
systems, (and good for the morale of SMP users ;), if there are no major
problems with it then i plan to submit it to Linus. (in the hope that it's
low-impact and high-prio enough to be still included)

any comments on the release quality of this patch?

-- mingo

--- linux/drivers/char/Config.in.orig Sat May 2 07:55:51 1998
+++ linux/drivers/char/Config.in Sat May 2 08:07:38 1998
@@ -105,6 +105,12 @@
tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG
tristate ' Berkshire Products PC Watchdog' CONFIG_PCWATCHDOG
tristate ' Acquire SBC Watchdog Timer' CONFIG_ACQUIRE_WDT
+ if [ "$SMP" = "1" ]; then
+ bool ' SMP-IOAPIC NMI Software Watchdog' CONFIG_NMI_WATCHDOG
+ if [ "$CONFIG_NMI_WATCHDOG" = "y" ]; then
+ int ' watchdog source IRQ' CONFIG_NMI_WATCHDOG_IRQ 0
+ fi
+ fi
fi
bool 'Enhanced Real Time Clock Support' CONFIG_RTC
if [ "$CONFIG_ALPHA_BOOK1" = "y" ]; then
--- linux/arch/i386/kernel/traps.c.orig Tue Apr 21 22:39:27 1998
+++ linux/arch/i386/kernel/traps.c Sat May 2 08:55:17 1998
@@ -2,6 +2,8 @@
* linux/arch/i386/traps.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1998, Ingo Molnar, added NMI-Watchdog driver
*/

/*
@@ -21,6 +23,7 @@
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/kernel_stat.h>

#include <asm/system.h>
#include <asm/uaccess.h>
@@ -32,10 +35,17 @@
asmlinkage void lcall7(void);
struct desc_struct default_ldt = { 0, 0 };

+extern int console_loglevel;
+
+static inline void console_silent(void)
+{
+ console_loglevel = 0;
+}
+
static inline void console_verbose(void)
{
- extern int console_loglevel;
- console_loglevel = 15;
+ if (console_loglevel)
+ console_loglevel = 15;
}

#define DO_ERROR(trapnr, signr, str, name, tsk) \
@@ -237,12 +247,15 @@
unlock_kernel();
}

+#ifndef CONFIG_NMI_WATCHDOG
static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
-}
+}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
static void io_check_error(unsigned char reason, struct pt_regs * regs)
{
unsigned long i;
@@ -258,18 +271,23 @@
reason &= ~8;
outb(reason, 0x61);
}
+#endif

+#ifndef CONFIG_NMI_WATCHDOG
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
{
printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
}
+#endif

+extern atomic_t nmi_counter;
+
+#ifndef CONFIG_NMI_WATCHDOG
asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
{
unsigned char reason = inb(0x61);
- extern atomic_t nmi_counter;

atomic_inc(&nmi_counter);
if (reason & 0x80)
@@ -279,6 +297,69 @@
if (!(reason & 0xc0))
unknown_nmi_error(reason, regs);
}
+#else
+
+/*
+ * FIXME: we assume here that the NMI came from the IO-APIC. It's a quite safe
+ * assumption in most cases, but if anyone knows a way to distinguish between
+ * NMI reasons, please speak up ... [i doubt that the IO-APIC does IO port 0x61
+ * correctly]
+ */
+
+extern atomic_t apic_timer_irqs [NR_CPUS];
+extern spinlock_t console_lock;
+static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+ /*
+ * the best way to detect wether a CPU has a 'hard lockup' problem
+ * is to check it's local APIC timer IRQ counts. If they are not
+ * changing then that CPU has some problem.
+ *
+ * as these watchdog NMI IRQs are broadcasted to every CPU, here
+ * we only have to check the current processor.
+ *
+ * since NMIs dont listen to _any_ locks, we have to be extremely
+ * careful not to rely on unsafe variables. The printk might lock
+ * up though, so we have to break up console_lock first ...
+ * [when there will be more tty-related locks, break them up
+ * here too!]
+ */
+
+ static atomic_t last_irq_sums [NR_CPUS] = { ATOMIC_INIT(0), };
+ static atomic_t alert_counter [NR_CPUS] = { ATOMIC_INIT(0), };
+
+ /*
+ * Since current-> is always on the stack, and we always switch
+ * the stack NMI-atomically, it's safe to use smp_processor_id().
+ */
+ int sum, cpu = smp_processor_id();
+
+ atomic_inc(&nmi_counter);
+ sum = atomic_read(apic_timer_irqs+cpu);
+
+ if (atomic_read(last_irq_sums+cpu) == sum) {
+ /*
+ * Ayiee, looks like this CPU is stuck ...
+ * wait a few IRQs (5 seconds) before doing the oops ...
+ */
+ atomic_inc(alert_counter+cpu);
+ if (atomic_read(alert_counter+cpu) == 5*HZ) {
+ spin_lock(&nmi_print_lock);
+ printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
+ show_registers(regs);
+ printk("console shuts up ...\n");
+ console_silent();
+ spin_unlock(&nmi_print_lock);
+ do_exit(SIGSEGV);
+ }
+ } else {
+ atomic_set(last_irq_sums+cpu,sum);
+ atomic_set(alert_counter+cpu,0);
+ }
+}
+#endif

asmlinkage void do_debug(struct pt_regs * regs, long error_code)
{
--- linux/arch/i386/kernel/io_apic.c.orig Tue Apr 21 22:39:49 1998
+++ linux/arch/i386/kernel/io_apic.c Sat May 2 08:07:38 1998
@@ -35,6 +35,17 @@
#define IO_APIC_BASE ((volatile int *)0xfec00000)

/*
+ * We want to avoid #ifdef CONFIG_'s in the main code whenever possible:
+ */
+#ifdef CONFIG_NMI_WATCHDOG
+ int nmi_pin = -1;
+ const int nmi_irq = CONFIG_NMI_WATCHDOG_IRQ;
+#else
+ int nmi_pin = 0;
+ const int nmi_irq = -1;
+#endif
+
+/*
* The structure of the IO-APIC:
*/
struct IO_APIC_reg_00 {
@@ -60,6 +71,7 @@
__u32 vector : 8,
delivery_mode : 3, /* 000: FIXED
* 001: lowest prio
+ * 100: NMI
* 111: ExtInt
*/
dest_mode : 1, /* 0: physical, 1: logical */
@@ -273,6 +285,19 @@

entry.vector = IO_APIC_VECTOR(irq);

+ if (mp_irqs[i].mpc_irqtype)
+ continue;
+
+ if (irq == nmi_irq) {
+ entry.delivery_mode = 4; /* broadcast NMI */
+ make_8259A_irq(irq);
+ /*
+ * Remember which register has the NMI IRQ entry,
+ * so we can turn it off in case there is some
+ * incompatibility
+ */
+ nmi_pin = i;
+ }
/*
* Determine IRQ line polarity (high active or low active):
*/
@@ -710,12 +735,20 @@

setup_IO_APIC_irqs ();

+ if (nmi_pin == -1)
+ printk(".. NMI watchdog has invalid source IRQ.\n");
+ else if (nmi_irq != -1)
+ printk("NMI Watchdog activated on source IRQ %d\n", nmi_irq);
+
if (!timer_irq_works ()) {
make_8259A_irq(0);
if (!timer_irq_works ())
panic("IO-APIC + timer doesnt work!");
printk("..MP-BIOS bug: i8254 timer not connected to IO-APIC\n");
printk("..falling back to 8259A-based timer interrupt\n");
+ if ((nmi_pin != -1) && (nmi_irq == 0))
+ printk("NMI Watchdog disabled (source IRQ was 0)!\n");
+
}

printk("nr of MP irq sources: %d.\n", mp_irq_entries);
--- linux/arch/i386/kernel/smp.c.orig Tue Apr 21 22:39:49 1998
+++ linux/arch/i386/kernel/smp.c Sat May 2 09:19:04 1998
@@ -1432,17 +1432,25 @@
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
+#ifdef CONFIG_NMI_WATCHDOG
+atomic_t apic_timer_irqs [NR_CPUS] = { ATOMIC_INIT(0), };
+#endif
void smp_apic_timer_interrupt(struct pt_regs * regs)
{
+#ifdef CONFIG_NMI_WATCHDOG
+ /*
+ * the only thing that can lock an NMI is an unACK-ed APIC ...
+ */
+ atomic_inc(apic_timer_irqs+smp_processor_id());
+#endif
+
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow, and we
* want to be able to accept NMI tlb invalidates
* during this time.
*/
- spin_lock(&irq_controller_lock);
ack_APIC_irq ();
- spin_unlock(&irq_controller_lock);

smp_local_timer_interrupt(regs);
}
--- linux/arch/i386/kernel/process.c.orig Sat May 2 07:55:40 1998
+++ linux/arch/i386/kernel/process.c Sat May 2 08:07:38 1998
@@ -153,7 +153,7 @@
{
if(current_cpu_data.hlt_works_ok &&
!hlt_counter && !need_resched)
- __asm("hlt");
+ udelay(10);
/*
* tq_scheduler currently assumes we're running in a process
* context (ie that we hold the kernel lock..)
--- linux/arch/i386/kernel/entry.S.orig Sat May 2 09:43:55 1998
+++ linux/arch/i386/kernel/entry.S Sat May 2 09:56:16 1998
@@ -291,9 +291,14 @@
jmp error_code

ENTRY(nmi)
+ pushl %eax
+ SAVE_ALL
pushl $0
- pushl $ SYMBOL_NAME(do_nmi)
- jmp error_code
+ movl %esp,%edx
+ pushl %edx
+ call SYMBOL_NAME(do_nmi)
+ addl $8,%esp
+ RESTORE_ALL

ENTRY(int3)
pushl $0
--- linux/arch/i386/defconfig.orig Tue Apr 21 22:39:26 1998
+++ linux/arch/i386/defconfig Sat May 2 08:07:38 1998
@@ -264,7 +264,14 @@
# CONFIG_UMISC is not set
# CONFIG_QIC02_TAPE is not set
# CONFIG_APM is not set
-# CONFIG_WATCHDOG is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+# CONFIG_WDT is not set
+# CONFIG_SOFT_WATCHDOG is not set
+# CONFIG_PCWATCHDOG is not set
+# CONFIG_ACQUIRE_WDT is not set
+CONFIG_NMI_WATCHDOG=y
+CONFIG_NMI_WATCHDOG_IRQ=0
# CONFIG_RTC is not set
# CONFIG_VIDEO_DEV is not set
# CONFIG_NVRAM is not set

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu