Re: [PATCH v3 5/8] arm64: Fix incorrect irqflag restore for priority masking

From: Marc Zyngier
Date: Fri Jun 07 2019 - 12:33:51 EST


On 06/06/2019 10:31, Julien Thierry wrote:
> When using IRQ priority masking to disable interrupts, in order to deal
> with the PSR.I state, local_irq_save() would convert the I bit into a
> PMR value (GIC_PRIO_IRQOFF). This resulted in local_irq_restore()
> potentially modifying the value of PMR in undesired location due to the
> state of PSR.I upon flag saving [1].
>
> In an attempt to solve this issue in a less hackish manner, introduce
> a bit (GIC_PRIO_IGNORE_PMR) for the PMR values that can represent
> whether PSR.I is being used to disable interrupts, in which case it
> takes precedence of the status of interrupt masking via PMR.
>
> GIC_PRIO_IGNORE_PMR is chosen such that (<pmr_value> |
> GIC_PRIO_IGNORE_PMR) does not mask more interrupts than <pmr_value> as
> some sections (e.g. arch_cpu_idle(), interrupt acknowledge path)
> requires PMR not to mask interrupts that could be signaled to the
> CPU when using only PSR.I.
>

s/GIC_PRIO_IGNORE_PMR/GIC_PRIO_PSR_I_SET/

> [1] https://www.spinics.net/lists/arm-kernel/msg716956.html
>
> Fixes: commit 4a503217ce37 ("arm64: irqflags: Use ICC_PMR_EL1 for interrupt masking")
> Signed-off-by: Julien Thierry <julien.thierry@xxxxxxx>
> Reported-by: Zenghui Yu <yuzenghui@xxxxxxxxxx>
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Cc: Wei Li <liwei391@xxxxxxxxxx>
> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
> Cc: Will Deacon <will.deacon@xxxxxxx>
> Cc: Christoffer Dall <christoffer.dall@xxxxxxx>
> Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
> Cc: James Morse <james.morse@xxxxxxx>
> Cc: Suzuki K Pouloze <suzuki.poulose@xxxxxxx>
> Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
> ---
> arch/arm64/include/asm/arch_gicv3.h | 4 ++-
> arch/arm64/include/asm/daifflags.h | 68 ++++++++++++++++++++++---------------
> arch/arm64/include/asm/irqflags.h | 67 +++++++++++++++---------------------
> arch/arm64/include/asm/kvm_host.h | 7 ++--
> arch/arm64/include/asm/ptrace.h | 10 ++++--
> arch/arm64/kernel/entry.S | 38 ++++++++++++++++++---
> arch/arm64/kernel/process.c | 2 +-
> arch/arm64/kernel/smp.c | 8 +++--
> arch/arm64/kvm/hyp/switch.c | 2 +-
> 9 files changed, 123 insertions(+), 83 deletions(-)
>
> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
> index 14b41dd..9e991b6 100644
> --- a/arch/arm64/include/asm/arch_gicv3.h
> +++ b/arch/arm64/include/asm/arch_gicv3.h
> @@ -163,7 +163,9 @@ static inline bool gic_prio_masking_enabled(void)
>
> static inline void gic_pmr_mask_irqs(void)
> {
> - BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF);
> + BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
> + GIC_PRIO_PSR_I_SET));
> + BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
> gic_write_pmr(GIC_PRIO_IRQOFF);
> }
>
> diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
> index db452aa..f93204f 100644
> --- a/arch/arm64/include/asm/daifflags.h
> +++ b/arch/arm64/include/asm/daifflags.h
> @@ -18,6 +18,7 @@
>
> #include <linux/irqflags.h>
>
> +#include <asm/arch_gicv3.h>
> #include <asm/cpufeature.h>
>
> #define DAIF_PROCCTX 0
> @@ -32,6 +33,11 @@ static inline void local_daif_mask(void)
> :
> :
> : "memory");
> +
> + /* Don't really care for a dsb here, we don't intend to enable IRQs */
> + if (system_uses_irq_prio_masking())
> + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
> +
> trace_hardirqs_off();
> }
>
> @@ -43,7 +49,7 @@ static inline unsigned long local_daif_save(void)
>
> if (system_uses_irq_prio_masking()) {
> /* If IRQs are masked with PMR, reflect it in the flags */
> - if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF)
> + if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
> flags |= PSR_I_BIT;
> }
>
> @@ -59,36 +65,44 @@ static inline void local_daif_restore(unsigned long flags)
> if (!irq_disabled) {
> trace_hardirqs_on();
>
> - if (system_uses_irq_prio_masking())
> - arch_local_irq_enable();
> - } else if (!(flags & PSR_A_BIT)) {
> - /*
> - * If interrupts are disabled but we can take
> - * asynchronous errors, we can take NMIs
> - */
> if (system_uses_irq_prio_masking()) {
> - flags &= ~PSR_I_BIT;
> + gic_write_pmr(GIC_PRIO_IRQON);
> + dsb(sy);
> + }
> + } else if (system_uses_irq_prio_masking()) {
> + u64 pmr;
> +
> + if (!(flags & PSR_A_BIT)) {
> /*
> - * There has been concern that the write to daif
> - * might be reordered before this write to PMR.
> - * From the ARM ARM DDI 0487D.a, section D1.7.1
> - * "Accessing PSTATE fields":
> - * Writes to the PSTATE fields have side-effects on
> - * various aspects of the PE operation. All of these
> - * side-effects are guaranteed:
> - * - Not to be visible to earlier instructions in
> - * the execution stream.
> - * - To be visible to later instructions in the
> - * execution stream
> - *
> - * Also, writes to PMR are self-synchronizing, so no
> - * interrupts with a lower priority than PMR is signaled
> - * to the PE after the write.
> - *
> - * So we don't need additional synchronization here.
> + * If interrupts are disabled but we can take
> + * asynchronous errors, we can take NMIs
> */
> - arch_local_irq_disable();
> + flags &= ~PSR_I_BIT;
> + pmr = GIC_PRIO_IRQOFF;
> + } else {
> + pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
> }
> +
> + /*
> + * There has been concern that the write to daif
> + * might be reordered before this write to PMR.
> + * From the ARM ARM DDI 0487D.a, section D1.7.1
> + * "Accessing PSTATE fields":
> + * Writes to the PSTATE fields have side-effects on
> + * various aspects of the PE operation. All of these
> + * side-effects are guaranteed:
> + * - Not to be visible to earlier instructions in
> + * the execution stream.
> + * - To be visible to later instructions in the
> + * execution stream
> + *
> + * Also, writes to PMR are self-synchronizing, so no
> + * interrupts with a lower priority than PMR is signaled
> + * to the PE after the write.
> + *
> + * So we don't need additional synchronization here.
> + */
> + gic_write_pmr(pmr);
> }
>
> write_sysreg(flags, daif);
> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
> index fbe1aba..b6f757f 100644
> --- a/arch/arm64/include/asm/irqflags.h
> +++ b/arch/arm64/include/asm/irqflags.h
> @@ -67,43 +67,46 @@ static inline void arch_local_irq_disable(void)
> */
> static inline unsigned long arch_local_save_flags(void)
> {
> - unsigned long daif_bits;
> unsigned long flags;
>
> - daif_bits = read_sysreg(daif);
> -
> - /*
> - * The asm is logically equivalent to:
> - *
> - * if (system_uses_irq_prio_masking())
> - * flags = (daif_bits & PSR_I_BIT) ?
> - * GIC_PRIO_IRQOFF :
> - * read_sysreg_s(SYS_ICC_PMR_EL1);
> - * else
> - * flags = daif_bits;
> - */
> asm volatile(ALTERNATIVE(
> - "mov %0, %1\n"
> - "nop\n"
> - "nop",
> - __mrs_s("%0", SYS_ICC_PMR_EL1)
> - "ands %1, %1, " __stringify(PSR_I_BIT) "\n"
> - "csel %0, %0, %2, eq",
> - ARM64_HAS_IRQ_PRIO_MASKING)
> - : "=&r" (flags), "+r" (daif_bits)
> - : "r" ((unsigned long) GIC_PRIO_IRQOFF)
> - : "cc", "memory");
> + "mrs %0, daif",
> + __mrs_s("%0", SYS_ICC_PMR_EL1),
> + ARM64_HAS_IRQ_PRIO_MASKING)
> + : "=&r" (flags)
> + :
> + : "memory");

I think this is worth a comment here, as you're changing the semantics
of arch_local_save_flags(). Maybe just indicating that the only thing
this should be used for is to carry the interrupt state, and nothing else.

>
> return flags;
> }
>
> +static inline int arch_irqs_disabled_flags(unsigned long flags)
> +{
> + int res;
> +
> + asm volatile(ALTERNATIVE(
> + "and %w0, %w1, #" __stringify(PSR_I_BIT),
> + "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQOFF),
> + ARM64_HAS_IRQ_PRIO_MASKING)
> + : "=&r" (res)
> + : "r" ((int) flags)
> + : "memory");
> +
> + return res;
> +}
> +
> static inline unsigned long arch_local_irq_save(void)
> {
> unsigned long flags;
>
> flags = arch_local_save_flags();
>
> - arch_local_irq_disable();
> + /*
> + * There are too many states with IRQs disabled, just keep the current
> + * state if interrupts are already disabled/masked.
> + */
> + if (!arch_irqs_disabled_flags(flags))
> + arch_local_irq_disable();
>
> return flags;
> }
> @@ -124,21 +127,5 @@ static inline void arch_local_irq_restore(unsigned long flags)
> : "memory");
> }
>
> -static inline int arch_irqs_disabled_flags(unsigned long flags)
> -{
> - int res;
> -
> - asm volatile(ALTERNATIVE(
> - "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n"
> - "nop",
> - "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
> - "cset %w0, ls",
> - ARM64_HAS_IRQ_PRIO_MASKING)
> - : "=&r" (res)
> - : "r" ((int) flags)
> - : "cc", "memory");
> -
> - return res;
> -}
> #endif
> #endif
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 4bcd9c1..fdc0e1c 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -608,11 +608,12 @@ static inline void kvm_arm_vhe_guest_enter(void)
> * will not signal the CPU of interrupts of lower priority, and the
> * only way to get out will be via guest exceptions.
> * Naturally, we want to avoid this.
> + *
> + * local_daif_mask() already sets IGNORE_PMR, we just need a

GIC_PRIO_PSR_I_SET?

> + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
> */
> - if (system_uses_irq_prio_masking()) {
> - gic_write_pmr(GIC_PRIO_IRQON);
> + if (system_uses_irq_prio_masking())
> dsb(sy);
> - }
> }
>
> static inline void kvm_arm_vhe_guest_exit(void)
> diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
> index b2de329..da22422 100644
> --- a/arch/arm64/include/asm/ptrace.h
> +++ b/arch/arm64/include/asm/ptrace.h
> @@ -35,9 +35,15 @@
> * means masking more IRQs (or at least that the same IRQs remain masked).
> *
> * To mask interrupts, we clear the most significant bit of PMR.
> + *
> + * Some code sections either automatically switch back to PSR.I or explicitly
> + * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
> + * in the the priority mask, it indicates that PSR.I should be set and
> + * interrupt disabling temporarily does not rely on IRQ priorities.
> */
> -#define GIC_PRIO_IRQON 0xf0
> -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
> +#define GIC_PRIO_IRQON 0xc0
> +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
> +#define GIC_PRIO_PSR_I_SET (1 << 4)
>
> /* Additional SPSR bits not exposed in the UABI */
> #define PSR_IL_BIT (1 << 20)
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 46358a3..7f92e4b 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -258,6 +258,7 @@ alternative_else_nop_endif
> /*
> * Registers that may be useful after this macro is invoked:
> *
> + * x20 - ICC_PMR_EL1
> * x21 - aborted SP
> * x22 - aborted PC
> * x23 - aborted PSTATE
> @@ -449,6 +450,24 @@ alternative_endif
> .endm
> #endif
>
> + .macro gic_prio_kentry_setup, tmp:req
> +#ifdef CONFIG_ARM64_PSEUDO_NMI
> + alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> + mov x20, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
> + msr_s SYS_ICC_PMR_EL1, x20

I find the implicit use of x20 quite dangerous, but hey. I guess that
the context makes that fairly explicit.

> + alternative_else_nop_endif
> +#endif
> + .endm
> +
> + .macro gic_prio_irq_setup, pmr:req, tmp:req
> +#ifdef CONFIG_ARM64_PSEUDO_NMI
> + alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> + orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
> + msr_s SYS_ICC_PMR_EL1, \tmp
> + alternative_else_nop_endif
> +#endif
> + .endm
> +
> .text
>
> /*
> @@ -627,6 +646,7 @@ el1_dbg:
> cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
> cinc x24, x24, eq // set bit '0'
> tbz x24, #0, el1_inv // EL1 only
> + gic_prio_kentry_setup tmp=x3
> mrs x0, far_el1
> mov x2, sp // struct pt_regs
> bl do_debug_exception
> @@ -644,12 +664,10 @@ ENDPROC(el1_sync)
> .align 6
> el1_irq:
> kernel_entry 1
> + gic_prio_irq_setup pmr=x20, tmp=x1
> enable_da_f
>
> #ifdef CONFIG_ARM64_PSEUDO_NMI
> -alternative_if ARM64_HAS_IRQ_PRIO_MASKING
> - ldr x20, [sp, #S_PMR_SAVE]
> -alternative_else_nop_endif
> test_irqs_unmasked res=x0, pmr=x20
> cbz x0, 1f
> bl asm_nmi_enter
> @@ -679,8 +697,9 @@ alternative_else_nop_endif
>
> #ifdef CONFIG_ARM64_PSEUDO_NMI
> /*
> - * if IRQs were disabled when we received the interrupt, we have an NMI
> - * and we are not re-enabling interrupt upon eret. Skip tracing.
> + * When using IRQ priority masking, we can get spurious interrupts while
> + * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
> + * section with interrupts disabled. Skip tracing in those cases.
> */
> test_irqs_unmasked res=x0, pmr=x20
> cbz x0, 1f
> @@ -809,6 +828,7 @@ el0_ia:
> * Instruction abort handling
> */
> mrs x26, far_el1
> + gic_prio_kentry_setup tmp=x0
> enable_da_f
> #ifdef CONFIG_TRACE_IRQFLAGS
> bl trace_hardirqs_off
> @@ -854,6 +874,7 @@ el0_sp_pc:
> * Stack or PC alignment exception handling
> */
> mrs x26, far_el1
> + gic_prio_kentry_setup tmp=x0
> enable_da_f
> #ifdef CONFIG_TRACE_IRQFLAGS
> bl trace_hardirqs_off
> @@ -888,6 +909,7 @@ el0_dbg:
> * Debug exception handling
> */
> tbnz x24, #0, el0_inv // EL0 only
> + gic_prio_kentry_setup tmp=x3
> mrs x0, far_el1
> mov x1, x25
> mov x2, sp
> @@ -909,7 +931,9 @@ ENDPROC(el0_sync)
> el0_irq:
> kernel_entry 0
> el0_irq_naked:
> + gic_prio_irq_setup pmr=x20, tmp=x0
> enable_da_f
> +
> #ifdef CONFIG_TRACE_IRQFLAGS
> bl trace_hardirqs_off
> #endif
> @@ -931,6 +955,7 @@ ENDPROC(el0_irq)
> el1_error:
> kernel_entry 1
> mrs x1, esr_el1
> + gic_prio_kentry_setup tmp=x2
> enable_dbg
> mov x0, sp
> bl do_serror
> @@ -941,6 +966,7 @@ el0_error:
> kernel_entry 0
> el0_error_naked:
> mrs x1, esr_el1
> + gic_prio_kentry_setup tmp=x2
> enable_dbg
> mov x0, sp
> bl do_serror
> @@ -965,6 +991,7 @@ work_pending:
> */
> ret_to_user:
> disable_daif
> + gic_prio_kentry_setup tmp=x3
> ldr x1, [tsk, #TSK_TI_FLAGS]
> and x2, x1, #_TIF_WORK_MASK
> cbnz x2, work_pending
> @@ -981,6 +1008,7 @@ ENDPROC(ret_to_user)
> */
> .align 6
> el0_svc:
> + gic_prio_kentry_setup tmp=x1
> mov x0, sp
> bl el0_svc_handler
> b ret_to_user
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 3767fb2..58efc37 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -94,7 +94,7 @@ static void __cpu_do_idle_irqprio(void)
> * be raised.
> */
> pmr = gic_read_pmr();
> - gic_write_pmr(GIC_PRIO_IRQON);
> + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
>
> __cpu_do_idle();
>
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index bb4b3f0..4deaee3 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -192,11 +192,13 @@ static void init_gic_priority_masking(void)
>
> WARN_ON(!(cpuflags & PSR_I_BIT));
>
> - gic_write_pmr(GIC_PRIO_IRQOFF);
> -
> /* We can only unmask PSR.I if we can take aborts */
> - if (!(cpuflags & PSR_A_BIT))
> + if (!(cpuflags & PSR_A_BIT)) {
> + gic_write_pmr(GIC_PRIO_IRQOFF);
> write_sysreg(cpuflags & ~PSR_I_BIT, daif);
> + } else {
> + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
> + }
> }
>
> /*
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 8799e0c..b89fcf0 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -615,7 +615,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
> * Naturally, we want to avoid this.
> */
> if (system_uses_irq_prio_masking()) {
> - gic_write_pmr(GIC_PRIO_IRQON);
> + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
> dsb(sy);
> }
>
> --
> 1.9.1
>

Thanks,

M.
--
Jazz is not dead. It just smells funny...