Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler

From: Huang Ying
Date: Tue Nov 02 2010 - 14:16:27 EST


Hi, Don,

On Tue, 2010-11-02 at 10:50 -0700, Don Zickus wrote:
> On Wed, Oct 27, 2010 at 07:08:44PM +0200, Peter Zijlstra wrote:
> > On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote:
> > > I assume this is sorta of what Peter was looking for.
> >
> > Yeah close, except the prio field of the various notification blocks
> > need to get adjusted to preserve semantics.
>
> Here is my next crack at it. I added some global defines to make it more
> clear what the priorities are.
>
> Again the intent was to roll something like this patch into Huang's
> original patch. This would help simplify the notifier chain and add
> priorities to maintain the original relationships.
>
> Cheers,
> Don
>
>
> diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
> index 5bdfca8..e28ec43 100644
> --- a/arch/x86/include/asm/kdebug.h
> +++ b/arch/x86/include/asm/kdebug.h
> @@ -18,7 +18,6 @@ enum die_val {
> DIE_TRAP,
> DIE_GPF,
> DIE_CALL,
> - DIE_NMI_IPI,
> DIE_PAGE_FAULT,
> DIE_NMIUNKNOWN,
> };
> diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
> index 932f0f8..cfb6156 100644
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -68,6 +68,26 @@ static inline int nmi_watchdog_active(void)
> }
> #endif
>
> +/*
> + * Define some priorities for the nmi notifier call chain.
> + *
> + * Create a local nmi bit that has a higher priority than
> + * external nmis, because the local ones are more frequent.
> + *
> + * Also setup some default high/normal/low settings for
> + * subsystems to registers with. Using 4 bits to seperate
> + * the priorities. This can go alot higher if needed be.
> + */
> +
> +#define NMI_LOCAL_SHIFT 16 /* randomly picked */
> +#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT)
> +#define NMI_HIGH_PRIOR (1ULL << 8)
> +#define NMI_NORMAL_PRIOR (1ULL << 4)
> +#define NMI_LOW_PRIOR (1ULL << 0)
> +#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
> +#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
> +#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR)
> +
> void lapic_watchdog_stop(void);
> int lapic_watchdog_init(unsigned nmi_hz);
> int lapic_wd_event(unsigned nmi_hz);
> diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
> index 07a837d..44ff8c9 100644
> --- a/arch/x86/kernel/apic/hw_nmi.c
> +++ b/arch/x86/kernel/apic/hw_nmi.c
> @@ -67,7 +67,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
>
> switch (cmd) {
> case DIE_NMI:
> - case DIE_NMI_IPI:
> break;
>
> default:
> @@ -95,7 +94,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
> static __read_mostly struct notifier_block backtrace_notifier = {
> .notifier_call = arch_trigger_all_cpu_backtrace_handler,
> .next = NULL,
> - .priority = 1
> + .priority = NMI_LOCAL_LOW_PRIOR,
> };
>
> static int __init register_trigger_all_cpu_backtrace(void)
> diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
> index be6f9c4..e6c6294 100644
> --- a/arch/x86/kernel/apic/x2apic_uv_x.c
> +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
> @@ -605,7 +605,7 @@ void __cpuinit uv_cpu_init(void)
> */
> int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
> {
> - if (reason != DIE_NMI_IPI)
> + if (reason != DIE_NMIUNKNOWN)
> return NOTIFY_OK;
>
> if (in_crash_kexec)
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> index e7dbde7..a779719 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> @@ -25,6 +25,7 @@
> #include <linux/gfp.h>
> #include <asm/mce.h>
> #include <asm/apic.h>
> +#include <asm/nmi.h>
>
> /* Update fake mce registers on current CPU. */
> static void inject_mce(struct mce *m)
> @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
> struct die_args *args = (struct die_args *)data;
> int cpu = smp_processor_id();
> struct mce *m = &__get_cpu_var(injectm);
> - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
> + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
> return NOTIFY_DONE;
> cpumask_clear_cpu(cpu, mce_inject_cpumask);
> if (m->inject_flags & MCJ_EXCEPTION)
> @@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
>
> static struct notifier_block mce_raise_nb = {
> .notifier_call = mce_raise_notify,
> - .priority = 1000,
> + .priority = NMI_LOCAL_NORMAL_PRIOR,
> };
>
> /* Inject mce on current CPU */
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index da98b6d..55a3797 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self,
> return NOTIFY_DONE;
>
> switch (cmd) {
> - case DIE_NMI_IPI:
> + case DIE_NMI:
> break;
> case DIE_NMIUNKNOWN:
> this_nmi = percpu_read(irq_stat.__nmi_count);
> @@ -1276,7 +1276,7 @@ perf_event_nmi_handler(struct notifier_block *self,
> static __read_mostly struct notifier_block perf_event_nmi_notifier = {
> .notifier_call = perf_event_nmi_handler,
> .next = NULL,
> - .priority = 1
> + .priority = NMI_LOCAL_LOW_PRIOR,
> };
>
> static struct event_constraint unconstrained;
> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
> index 852b819..020d052 100644
> --- a/arch/x86/kernel/kgdb.c
> +++ b/arch/x86/kernel/kgdb.c
> @@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
> }
> return NOTIFY_DONE;
>
> - case DIE_NMI_IPI:
> - /* Just ignore, we will handle the roundup on DIE_NMI. */
> - return NOTIFY_DONE;
> -
> case DIE_NMIUNKNOWN:
> if (was_in_debug_nmi[raw_smp_processor_id()]) {
> was_in_debug_nmi[raw_smp_processor_id()] = 0;
> @@ -604,7 +600,7 @@ static struct notifier_block kgdb_notifier = {
> /*
> * Lowest-prio notifier priority, we want to be notified last:
> */
> - .priority = -INT_MAX,
> + .priority = NMI_LOCAL_LOW_PRIOR,
> };
>
> /**
> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
> index e3af342..eabbde6 100644
> --- a/arch/x86/kernel/reboot.c
> +++ b/arch/x86/kernel/reboot.c
> @@ -18,6 +18,7 @@
> #include <asm/pci_x86.h>
> #include <asm/virtext.h>
> #include <asm/cpu.h>
> +#include <asm/nmi.h>
>
> #ifdef CONFIG_X86_32
> # include <linux/ctype.h>
> @@ -753,7 +754,7 @@ static int crash_nmi_callback(struct notifier_block *self,
> {
> int cpu;
>
> - if (val != DIE_NMI_IPI)
> + if (val != DIE_NMI)
> return NOTIFY_OK;
>
> cpu = raw_smp_processor_id();
> @@ -784,6 +785,8 @@ static void smp_send_nmi_allbutself(void)
>
> static struct notifier_block crash_nmi_nb = {
> .notifier_call = crash_nmi_callback,
> + /* we want to be the first one called */
> + .priority = NMI_LOCAL_HIGH_PRIOR+1,
> };
>
> /* Halt all other CPUs, calling the specified function on each of them
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index d8acab3..9e56e3d 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
> * CPU-specific NMI: send to specific CPU or NMI sources must
> * be processed on specific CPU
> */
> - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
> - == NOTIFY_STOP)
> + if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> return;
>
> /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
> @@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
> if (!cpu) {
> reason = get_nmi_reason();
> if (reason & NMI_REASON_MASK) {
> - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
> - == NOTIFY_STOP)
> - return;
> if (reason & NMI_REASON_SERR)
> pci_serr_error(reason, regs);
> else if (reason & NMI_REASON_IOCHK)
> @@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
> }
> }
>
> - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> - return;
> -
> #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
> if (nmi_watchdog_tick(regs, reason))
> return;
> diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
> index 57f01bb..43b4f35 100644
> --- a/arch/x86/oprofile/nmi_int.c
> +++ b/arch/x86/oprofile/nmi_int.c
> @@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
> int ret = NOTIFY_DONE;
>
> switch (val) {
> - case DIE_NMI_IPI:
> + case DIE_NMI:
> if (ctr_running)
> model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
> else if (!nmi_enabled)
> @@ -360,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
> static struct notifier_block profile_exceptions_nb = {
> .notifier_call = profile_exceptions_notify,
> .next = NULL,
> - .priority = 2
> + .priority = NMI_LOCAL_LOW_PRIOR,
> };
>
> static void nmi_cpu_restore_registers(struct op_msrs *msrs)
> diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
> index ab72a21..0197aa1 100644
> --- a/arch/x86/oprofile/nmi_timer_int.c
> +++ b/arch/x86/oprofile/nmi_timer_int.c
> @@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
> int ret = NOTIFY_DONE;
>
> switch (val) {
> - case DIE_NMI_IPI:
> + case DIE_NMI:
> oprofile_add_sample(args->regs, 0);
> ret = NOTIFY_STOP;
> break;
> @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
> static struct notifier_block profile_timer_exceptions_nb = {
> .notifier_call = profile_timer_exceptions_notify,
> .next = NULL,
> - .priority = 0
> + .priority = NMI_EXT_LOW_PRIOR,

Do not find definition of NMI_EXT_LOW_PRIOR, forget to add it?

BTW: Attach a patch I used to test external NMI. Hope that is useful to
you.

Best Regards,
Huang Ying