Re: [PATCH v4 02/27] hardirq/nmi: Allow nested nmi_enter()

From: Frederic Weisbecker
Date: Mon Feb 24 2020 - 22:09:11 EST


On Mon, Feb 24, 2020 at 05:13:18PM +0100, Peter Zijlstra wrote:
> Damn, true. That also means I need to fix the arm64 bits, and that's a
> little more tricky.
>
> Something like so perhaps.. hmm?
>
> ---
> --- a/arch/arm64/include/asm/hardirq.h
> +++ b/arch/arm64/include/asm/hardirq.h
> @@ -32,30 +32,52 @@ u64 smp_irq_stat_cpu(unsigned int cpu);
>
> struct nmi_ctx {
> u64 hcr;
> + unsigned int cnt;
> };
>
> DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts);
>
> -#define arch_nmi_enter() \
> - do { \
> - if (is_kernel_in_hyp_mode() && !in_nmi()) { \
> - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \
> - nmi_ctx->hcr = read_sysreg(hcr_el2); \
> - if (!(nmi_ctx->hcr & HCR_TGE)) { \
> - write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \
> - isb(); \
> - } \
> - } \
> - } while (0)
> +#define arch_nmi_enter() \
> +do { \
> + struct nmi_ctx *___ctx; \
> + unsigned int ___cnt; \
> + \
> + if (!is_kernel_in_hyp_mode() || in_nmi()) \
> + break; \
> + \
> + ___ctx = this_cpu_ptr(&nmi_contexts); \
> + ___cnt = ___ctx->cnt; \
> + if (!(___cnt & 1) && __cnt) { \
> + ___ctx->cnt += 2; \
> + break; \
> + } \
> + \
> + ___ctx->cnt |= 1; \
> + barrier(); \
> + nmi_ctx->hcr = read_sysreg(hcr_el2); \
> + if (!(nmi_ctx->hcr & HCR_TGE)) { \
> + write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \
> + isb(); \
> + } \
> + barrier(); \

Suppose the first NMI is interrupted here. nmi_ctx->hcr has HCR_TGE unset.
The new NMI is going to overwrite nmi_ctx->hcr with HCR_TGE set. Then the
first NMI will not restore the correct value upon arch_nmi_exit().

So perhaps the below, but I bet I overlooked something obvious.

#define arch_nmi_enter() \
do { \
struct nmi_ctx *___ctx; \
u64 ___hcr; \
\
if (!is_kernel_in_hyp_mode()) \
break; \
\
___ctx = this_cpu_ptr(&nmi_contexts); \
if (___ctx->cnt) { \
___ctx->cnt++; \
break; \
} \
\
___hcr = read_sysreg(hcr_el2); \
if (!(___hcr & HCR_TGE)) { \
write_sysreg(___hcr | HCR_TGE, hcr_el2); \
isb(); \
} \
___ctx->cnt = 1; \
barrier(); \
___ctx->hcr = ___hcr; \
} while (0)

#define arch_nmi_exit() \
do { \
struct nmi_ctx *___ctx; \
u64 ___hcr; \
\
if (!is_kernel_in_hyp_mode()) \
break; \
\
___ctx = this_cpu_ptr(&nmi_contexts); \
___hcr = nmi_ctx->hcr; \
barrier(); \
--___ctx->cnt; \
barrier(); \
if (!___ctx->cnt && !(___hcr & HCR_TGE)) \
write_sysreg(___hcr, hcr_el2); \
} while (0)