Re: [PATCH v9 12/26] arm64: irqflags: Use ICC_PMR_EL1 for interrupt masking

From: Julien Thierry
Date: Wed Jan 23 2019 - 05:44:38 EST




On 22/01/2019 15:21, Catalin Marinas wrote:
> On Mon, Jan 21, 2019 at 03:33:31PM +0000, Julien Thierry wrote:
>> diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
>> index 24692ed..7e82a92 100644
>> --- a/arch/arm64/include/asm/irqflags.h
>> +++ b/arch/arm64/include/asm/irqflags.h
>> @@ -18,7 +18,9 @@
>>
>> #ifdef __KERNEL__
>>
>> +#include <asm/alternative.h>
>> #include <asm/ptrace.h>
>> +#include <asm/sysreg.h>
>>
>> /*
>> * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
>> @@ -36,33 +38,31 @@
>> /*
>> * CPU interrupt mask handling.
>> */
>> -static inline unsigned long arch_local_irq_save(void)
>> -{
>> - unsigned long flags;
>> - asm volatile(
>> - "mrs %0, daif // arch_local_irq_save\n"
>> - "msr daifset, #2"
>> - : "=r" (flags)
>> - :
>> - : "memory");
>> - return flags;
>> -}
>> -
>> static inline void arch_local_irq_enable(void)
>> {
>> - asm volatile(
>> - "msr daifclr, #2 // arch_local_irq_enable"
>> - :
>> + unsigned long unmasked = GIC_PRIO_IRQON;
>> +
>> + asm volatile(ALTERNATIVE(
>> + "msr daifclr, #2 // arch_local_irq_enable\n"
>> + "nop",
>> + "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
>> + "dsb sy",
>> + ARM64_HAS_IRQ_PRIO_MASKING)
>> :
>> + : "r" (unmasked)
>> : "memory");
>> }
>>
>> static inline void arch_local_irq_disable(void)
>> {
>> - asm volatile(
>> - "msr daifset, #2 // arch_local_irq_disable"
>> - :
>> + unsigned long masked = GIC_PRIO_IRQOFF;
>> +
>> + asm volatile(ALTERNATIVE(
>> + "msr daifset, #2 // arch_local_irq_disable",
>> + "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0",
>> + ARM64_HAS_IRQ_PRIO_MASKING)
>> :
>> + : "r" (masked)
>> : "memory");
>> }
>
> Nitpicks: you could drop masked/unmasked variables here (it's up to you,
> it wouldn't make any difference on the generated asm).
>

Good point, I'll do that.

>> @@ -71,12 +71,44 @@ static inline void arch_local_irq_disable(void)
>> */
>> static inline unsigned long arch_local_save_flags(void)
>> {
>> + unsigned long daif_bits;
>> unsigned long flags;
>> - asm volatile(
>> - "mrs %0, daif // arch_local_save_flags"
>> - : "=r" (flags)
>> - :
>> +
>> + daif_bits = read_sysreg(daif);
>> +
>> + /*
>> + * The asm is logically equivalent to:
>> + *
>> + * if (system_uses_irq_prio_masking())
>> + * flags = (daif_bits & PSR_I_BIT) ?
>> + * GIC_PRIO_IRQOFF :
>> + * read_sysreg_s(SYS_ICC_PMR_EL1);
>> + * else
>> + * flags = daif_bits;
>> + */
>> + asm volatile(ALTERNATIVE(
>> + "mov %0, %1\n"
>> + "nop\n"
>> + "nop",
>> + "mrs_s %0, " __stringify(SYS_ICC_PMR_EL1) "\n"
>> + "ands %1, %1, " __stringify(PSR_I_BIT) "\n"
>> + "csel %0, %0, %2, eq",
>> + ARM64_HAS_IRQ_PRIO_MASKING)
>> + : "=&r" (flags), "+r" (daif_bits)
>> + : "r" (GIC_PRIO_IRQOFF)
>> : "memory");
>> +
>> + return flags;
>> +}
>
> BTW, how's the code generated from the C version? It will have a branch
> but may not be too bad. Either way is fine by me.
>

It's a bit hard to talk about the code generated from the C version as
it can lie within several layers of inline, so the instructions for that
section are a bit more scattered.

However, it seems like the compiler is more clever (maybe the asm
volatile prevents some optimizations regarding register allocation or
instruction ordering) and the C version seems to perform slightly better
(although it could be within the noise) despite the branch.

So, I'll just switch up to the C version.

> Reviewed-by: Catalin Marinas <catalin.marinas@xxxxxxx>
>

--
Julien Thierry