Re: [PATCH -v2 7/7] x86, NMI, Remove do_nmi_callback logic

From: Robert Richter
Date: Mon Sep 27 2010 - 06:45:21 EST


On 26.09.10 20:57:06, Huang Ying wrote:
> do_nmi_callback related logic is removed, because it is useless, just
> adds code complexity.
>
> unknown_nmi_panic sysctl is reserved to keep kernel ABI unchanged.

"unknown_nmi_panic" and "panic_on_unrecovered_nmi" are different. See
below and also Documentation/sysctl/kernel.txt.

>
> Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx>
> ---
> arch/x86/include/asm/nmi.h | 10 +++++++++-
> arch/x86/kernel/apic/hw_nmi.c | 1 -
> arch/x86/kernel/apic/nmi.c | 29 +----------------------------
> arch/x86/kernel/traps.c | 16 ++++++++++------
> 4 files changed, 20 insertions(+), 36 deletions(-)
>
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -30,9 +30,17 @@ extern void setup_apic_nmi_watchdog(void
> extern void stop_apic_nmi_watchdog(void *);
> extern void disable_timer_nmi_watchdog(void);
> extern void enable_timer_nmi_watchdog(void);
> -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
> extern void cpu_nmi_set_wd_enabled(void);
>
> +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> +extern int nmi_watchdog_tick(struct pt_regs *regs);
> +#else
> +static inline int nmi_watchdog_tick(struct pt_regs *regs)
> +{
> + return 0;
> +}
> +#endif
> +
> extern atomic_t nmi_active;
> extern unsigned int nmi_watchdog;
> #define NMI_NONE 0
> --- a/arch/x86/kernel/apic/hw_nmi.c
> +++ b/arch/x86/kernel/apic/hw_nmi.c
> @@ -100,7 +100,6 @@ void acpi_nmi_disable(void) { return; }
> #endif
> atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
> EXPORT_SYMBOL(nmi_active);
> -int unknown_nmi_panic;
> void cpu_nmi_set_wd_enabled(void) { return; }
> void stop_apic_nmi_watchdog(void *unused) { return; }
> void setup_apic_nmi_watchdog(void *unused) { return; }
> --- a/arch/x86/kernel/apic/nmi.c
> +++ b/arch/x86/kernel/apic/nmi.c
> @@ -37,7 +37,6 @@
>
> #include <asm/mach_traps.h>
>
> -int unknown_nmi_panic;
> int nmi_watchdog_enabled;
>
> /* For reliability, we're prepared to waste bits here. */
> @@ -389,7 +388,7 @@ void touch_nmi_watchdog(void)
> EXPORT_SYMBOL(touch_nmi_watchdog);
>
> notrace __kprobes int
> -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
> +nmi_watchdog_tick(struct pt_regs *regs)
> {
> /*
> * Since current_thread_info()-> is always on the stack, and we
> @@ -483,23 +482,6 @@ static void disable_ioapic_nmi_watchdog(
> on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
> }
>
> -static int __init setup_unknown_nmi_panic(char *str)
> -{
> - unknown_nmi_panic = 1;
> - return 1;
> -}
> -__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
> -
> -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
> -{
> - unsigned char reason = get_nmi_reason();
> - char buf[64];
> -
> - sprintf(buf, "NMI received for unknown reason %02x\n", reason);
> - die_nmi(buf, regs, 1); /* Always panic here */
> - return 0;

You are dropping this code that is different to panic().

> -}
> -
> /*
> * proc handler for /proc/sys/kernel/nmi
> */
> @@ -540,15 +522,6 @@ int proc_nmi_enabled(struct ctl_table *t
>
> #endif /* CONFIG_SYSCTL */
>
> -int do_nmi_callback(struct pt_regs *regs, int cpu)
> -{
> -#ifdef CONFIG_SYSCTL
> - if (unknown_nmi_panic)
> - return unknown_nmi_panic_callback(regs, cpu);
> -#endif
> - return 0;
> -}
> -
> void arch_trigger_all_cpu_backtrace(void)
> {
> int i;
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -84,6 +84,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
> static int ignore_nmis;
>
> int unknown_nmi_for_hwerr;
> +int unknown_nmi_panic;
>
> /*
> * Prevent NMI reason port (0x61) being accessed simultaneously, can
> @@ -308,6 +309,13 @@ gp_in_kernel:
> die("general protection fault", regs, error_code);
> }
>
> +static int __init setup_unknown_nmi_panic(char *str)
> +{
> + unknown_nmi_panic = 1;

We should register a handler here for this case instead of using
global variables.

> + return 1;
> +}
> +__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
> +
> static notrace __kprobes void
> pci_serr_error(unsigned char reason, struct pt_regs *regs)
> {
> @@ -385,7 +393,7 @@ unknown_nmi_error(unsigned char reason,
> reason, smp_processor_id());
>
> printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
> - if (panic_on_unrecovered_nmi)
> + if (unknown_nmi_panic || panic_on_unrecovered_nmi)
> panic("NMI: Not continuing");

There is different behavior, we should rather keep this code and add a
handler in the unknown_nmi_panic case with:

die_nmi(...)
...

-Robert

>
> printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
> @@ -432,12 +440,8 @@ static notrace __kprobes void default_do
> }
> raw_spin_unlock(&nmi_reason_lock);
>
> -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> - if (nmi_watchdog_tick(regs, reason))
> - return;
> - if (do_nmi_callback(regs, smp_processor_id()))
> + if (nmi_watchdog_tick(regs))
> return;
> -#endif
>
> if (notify_die(DIE_NMIUNKNOWN, "nmi_unknown", regs, reason, 2, SIGINT)
> == NOTIFY_STOP)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

--
Advanced Micro Devices, Inc.
Operating System Research Center

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/