Re: [PATCH -tip] kgdb, x86: Pull up NMI notifier handler priority

From: Cyrill Gorcunov
Date: Thu Mar 24 2011 - 01:24:34 EST


If Jason is ok with such splitting -- I dont mind either ;)

/sorry for toppost/

On Thursday, March 24, 2011, Dongdong Deng <libfetion@xxxxxxxxx> wrote:
> On Thu, Mar 24, 2011 at 4:32 AM, Cyrill Gorcunov <gorcunov@xxxxxxxxxx> wrote:
>> kgdb needs IPI to be sent and handled before perf
>> or anything else NMI, otherwise kgdb hangs with bootup
>> self-tests (found on P4 HT SMP machine). Raise its priority
>> so that we're called first in a notifier chain.
>>
>> Reported-by: Don Zickus <dzickus@xxxxxxxxxx>
>> Tested-by: Lin Ming <ming.m.lin@xxxxxxxxx>
>> CC: Jason Wessel <jason.wessel@xxxxxxxxxxxxx>
>> Signed-off-by: Cyrill Gorcunov <gorcunov@xxxxxxxxxx>
>> ---
>>
>> Don, Jason, take a look please.
>>
>>  arch/x86/kernel/kgdb.c |    7 +++++--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> Index: linux-2.6.git/arch/x86/kernel/kgdb.c
>> =====================================================================
>> --- linux-2.6.git.orig/arch/x86/kernel/kgdb.c
>> +++ linux-2.6.git/arch/x86/kernel/kgdb.c
>> @@ -592,9 +592,12 @@ static struct notifier_block kgdb_notifi
>>        .notifier_call  = kgdb_notify,
>>
>>        /*
>> -        * Lowest-prio notifier priority, we want to be notified last:
>> +        * We might need to send an IPI and
>> +        * do cpu roundup before anything else
>> +        * in notifier chain so high priority
>> +        * is needed.
>>         */
>> -       .priority       = NMI_LOCAL_LOW_PRIOR,
>> +       .priority       = NMI_LOCAL_HIGH_PRIOR,
>>  };
>
> CC: kgdb maillist.
>
> I quote Jason's early email to explain why debugger tends to
> set the low level of die_notifier.
>
> "The original thinking was that if you are using a low level debugger
> that you would want to stop on such a event or breakpoint because there
> is nothing else handling it and your system is about to print an oops
> message."
>
> For keeping above purpose, I have a "ugly" proposal that splitting
> kgdb die handling to two parts.
>
> 1: The first part with HIGH priority and just handle NMI event,
>  if the NMI event is not belong kgdb, it return NOTIFY_DONE and pass to others.
>
> 2: The second part with low priority to handling others.
>
> Due to above handling logic could let kgdb source get complexly, I
> couldn't make sure
> it is a suitable method here, if it is OK, I can send a formal patch. :-)
>
>
> $git diff
> -----------
> arch/x86/kernel/kgdb.c |   67 +++++++++++++++++++++++++++++++++++------------
>  1 files changed, 50 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
> index dba0b36..afd18db 100644
> --- a/arch/x86/kernel/kgdb.c
> +++ b/arch/x86/kernel/kgdb.c
> @@ -516,23 +516,6 @@ static int __kgdb_notify(struct die_args *args,
> unsigned long cmd)
>        struct pt_regs *regs = args->regs;
>
>        switch (cmd) {
> -       case DIE_NMI:
> -               if (atomic_read(&kgdb_active) != -1) {
> -                       /* KGDB CPU roundup */
> -                       kgdb_nmicallback(raw_smp_processor_id(), regs);
> -                       was_in_debug_nmi[raw_smp_processor_id()] = 1;
> -                       touch_nmi_watchdog();
> -                       return NOTIFY_STOP;
> -               }
> -               return NOTIFY_DONE;
> -
> -       case DIE_NMIUNKNOWN:
> -               if (was_in_debug_nmi[raw_smp_processor_id()]) {
> -                       was_in_debug_nmi[raw_smp_processor_id()] = 0;
> -                       return NOTIFY_STOP;
> -               }
> -               return NOTIFY_DONE;
> -
>        case DIE_DEBUG:
>                if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
>                        if (user_mode(regs))
> @@ -588,6 +571,52 @@ kgdb_notify(struct notifier_block *self, unsigned
> long cmd, void *ptr)
>        return ret;
>  }
>
> +static int
> +kgdb_nmi_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
> +{
> +       unsigned long flags;
> +       struct pt_regs *regs = ((struct die_args*)ptr)->regs;
> +       int ret = NOTIFY_DONE;
> +
> +       local_irq_save(flags);
> +
> +       switch (cmd) {
> +       case DIE_NMI:
> +               if (atomic_read(&kgdb_active) != -1) {
> +                       /* KGDB CPU roundup */
> +                       kgdb_nmicallback(raw_smp_processor_id(), regs);
> +                       was_in_debug_nmi[raw_smp_processor_id()] = 1;
> +                       touch_nmi_watchdog();
> +                       ret = NOTIFY_STOP;
> +               }
> +               break;
> +
> +       case DIE_NMIUNKNOWN:
> +               if (was_in_debug_nmi[raw_smp_processor_id()]) {
> +                       was_in_debug_nmi[raw_smp_processor_id()] = 0;
> +                       ret = NOTIFY_STOP;
> +               }
> +               break;
> +       default:
> +               break;
> +       }
> +
> +       local_irq_restore(flags);
> +       return ret;
> +}
> +
> +static struct notifier_block kgdb_nmi_notifier = {
> +       .notifier_call  = kgdb_nmi_notify,
> +
> +       /*
> +        * We might need to send an IPI and
> +        * do cpu roundup before anything else
> +        * in notifier chain so high priority
> +        * is needed.
> +        */
> +       .priority       = NMI_LOCAL_HIGH_PRIOR,
> +};
> +
>  static struct notifier_block kgdb_notifier = {
>        .notifier_call  = kgdb_notify,
>
> @@ -605,6 +634,9 @@ static struct notifier_block kgdb_notifier = {
>  */
>  int kgdb_arch_init(void)
>  {
> +       int err = register_die_notifier(&kgdb_nmi_notifier);
> +       if (err)
> +               return err;
>        return register_die_notifier(&kgdb_notifier);
>  }
>
> @@ -673,6 +705,7 @@ void kgdb_arch_exit(void)
>                        breakinfo[i].pev = NULL;
>                }
>        }
> +       unregister_die_notifier(&kgdb_nmi_notifier);
>        unregister_die_notifier(&kgdb_notifier);
>  }
>
> --
> 1.6.0.4
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/