Re: [PATCH v3 4/8] arm64: Fix interrupt tracing in the presence of NMIs

From: Marc Zyngier
Date: Fri Jun 07 2019 - 12:02:44 EST


On 06/06/2019 10:31, Julien Thierry wrote:
> In the presence of any form of instrumentation, nmi_enter() should be
> done before calling any traceable code and any instrumentation code.
>
> Currently, nmi_enter() is done in handle_domain_nmi(), which is much
> too late as instrumentation code might get called before. Move the
> nmi_enter/exit() calls to the arch IRQ vector handler.
>
> On arm64, it is not possible to know if the IRQ vector handler was
> called because of an NMI before acknowledging the interrupt. However, It
> is possible to know whether normal interrupts could be taken in the
> interrupted context (i.e. if taking an NMI in that context could
> introduce a potential race condition).
>
> When interrupting a context with IRQs disabled, call nmi_enter() as soon
> as possible. In contexts with IRQs enabled, defer this to the interrupt
> controller, which is in a better position to know if an interrupt taken
> is an NMI.
>
> Fixes: bc3c03ccb ("arm64: Enable the support of pseudo-NMIs")
> Signed-off-by: Julien Thierry <julien.thierry@xxxxxxx>
> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
> Cc: Will Deacon <will.deacon@xxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Jason Cooper <jason@xxxxxxxxxxxxxx>
> Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
> Cc: Mark Rutland <mark.rutland@xxxxxxx>
> ---
> arch/arm64/kernel/entry.S | 44 +++++++++++++++++++++++++++++++++-----------
> arch/arm64/kernel/irq.c | 17 +++++++++++++++++
> drivers/irqchip/irq-gic-v3.c | 6 ++++++
> kernel/irq/irqdesc.c | 8 ++++++--
> 4 files changed, 62 insertions(+), 13 deletions(-)
>
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 89ab6bd..46358a3 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -435,6 +435,20 @@ tsk .req x28 // current thread_info
> irq_stack_exit
> .endm
>
> +#ifdef CONFIG_ARM64_PSEUDO_NMI
> + /*
> + * Set res to 0 if irqs were masked in interrupted context.

Is that comment right? This macro seems to return 0 if PMR is equal to
GIC_PRIO_IRQON, meaning that irqs are unmasked...

> + * Otherwise set res to non-0 value.
> + */
> + .macro test_irqs_unmasked res:req, pmr:req
> +alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> + sub \res, \pmr, #GIC_PRIO_IRQON
> +alternative_else
> + mov \res, xzr
> +alternative_endif
> + .endm
> +#endif
> +
> .text
>
> /*
> @@ -631,19 +645,19 @@ ENDPROC(el1_sync)
> el1_irq:
> kernel_entry 1
> enable_da_f
> -#ifdef CONFIG_TRACE_IRQFLAGS
> +
> #ifdef CONFIG_ARM64_PSEUDO_NMI
> alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> ldr x20, [sp, #S_PMR_SAVE]
> -alternative_else
> - mov x20, #GIC_PRIO_IRQON
> -alternative_endif
> - cmp x20, #GIC_PRIO_IRQOFF
> - /* Irqs were disabled, don't trace */
> - b.ls 1f
> +alternative_else_nop_endif
> + test_irqs_unmasked res=x0, pmr=x20
> + cbz x0, 1f
> + bl asm_nmi_enter
> +1:
> #endif
> +
> +#ifdef CONFIG_TRACE_IRQFLAGS
> bl trace_hardirqs_off
> -1:
> #endif
>
> irq_handler
> @@ -662,14 +676,22 @@ alternative_else_nop_endif
> bl preempt_schedule_irq // irq en/disable is done inside
> 1:
> #endif
> -#ifdef CONFIG_TRACE_IRQFLAGS
> +
> #ifdef CONFIG_ARM64_PSEUDO_NMI
> /*
> * if IRQs were disabled when we received the interrupt, we have an NMI
> * and we are not re-enabling interrupt upon eret. Skip tracing.
> */
> - cmp x20, #GIC_PRIO_IRQOFF
> - b.ls 1f
> + test_irqs_unmasked res=x0, pmr=x20
> + cbz x0, 1f
> + bl asm_nmi_exit
> +1:
> +#endif
> +
> +#ifdef CONFIG_TRACE_IRQFLAGS
> +#ifdef CONFIG_ARM64_PSEUDO_NMI
> + test_irqs_unmasked res=x0, pmr=x20
> + cbnz x0, 1f
> #endif
> bl trace_hardirqs_on
> 1:
> diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
> index 92fa817..fdd9cb2 100644
> --- a/arch/arm64/kernel/irq.c
> +++ b/arch/arm64/kernel/irq.c
> @@ -27,8 +27,10 @@
> #include <linux/smp.h>
> #include <linux/init.h>
> #include <linux/irqchip.h>
> +#include <linux/kprobes.h>
> #include <linux/seq_file.h>
> #include <linux/vmalloc.h>
> +#include <asm/daifflags.h>
> #include <asm/vmap_stack.h>
>
> unsigned long irq_err_count;
> @@ -76,3 +78,18 @@ void __init init_IRQ(void)
> if (!handle_arch_irq)
> panic("No interrupt controller found.");
> }
> +
> +/*
> + * Stubs to make nmi_enter/exit() code callable from ASM
> + */
> +asmlinkage void notrace asm_nmi_enter(void)
> +{
> + nmi_enter();
> +}
> +NOKPROBE_SYMBOL(asm_nmi_enter);
> +
> +asmlinkage void notrace asm_nmi_exit(void)
> +{
> + nmi_exit();
> +}
> +NOKPROBE_SYMBOL(asm_nmi_exit);
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index f44cd89..0bf0fc4 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -495,7 +495,13 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
>
> if (gic_supports_nmi() &&
> unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) {
> + if (interrupts_enabled(regs))
> + nmi_enter();
> +
> gic_handle_nmi(irqnr, regs);
> +
> + if (interrupts_enabled(regs))
> + nmi_exit();

Just to be on the safe side, I'd rather sample interrupts_enabled(regs)
and use the same value to decide whether to do nmi_exit or not.

> return;
> }
>
> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> index c52b737..a92b335 100644
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
> * @hwirq: The HW irq number to convert to a logical one
> * @regs: Register file coming from the low-level handling code
> *
> + * This function must be called from an NMI context.
> + *
> * Returns: 0 on success, or -EINVAL if conversion has failed
> */
> int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
> @@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
> unsigned int irq;
> int ret = 0;
>
> - nmi_enter();
> + /*
> + * NMI context needs to be setup earlier in order to deal with tracing.
> + */
> + WARN_ON(!in_nmi());
>
> irq = irq_find_mapping(domain, hwirq);
>
> @@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
> else
> ret = -EINVAL;
>
> - nmi_exit();
> set_irq_regs(old_regs);
> return ret;
> }
> --
> 1.9.1
>

Thanks,

M.
--
Jazz is not dead. It just smells funny...