Re: [PATCH V2] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR
From: Marc Zyngier
Date: Fri Mar 04 2022 - 02:43:40 EST
On Fri, 04 Mar 2022 01:43:01 +0000,
Linu Cherian <lcherian@xxxxxxxxxxx> wrote:
>
> When a IAR register read races with a GIC interrupt RELEASE event,
> GIC-CPU interface could wrongly return a valid INTID to the CPU
> for an interrupt that is already released(non activated) instead of 0x3ff.
>
> As a side effect, an interrupt handler could run twice, once with
> interrupt priority and then with idle priority.
>
> As a workaround, gic_read_iar is updated so that it will return a
> valid interrupt ID only if there is a change in the active priority list
> after the IAR read on all the affected Silicons.
>
> Since there are silicon variants where both 23154 and 38545 are applicable,
> workaround for erratum 23154 has been extended to address both of them.
>
> Signed-off-by: Linu Cherian <lcherian@xxxxxxxxxxx>
> ---
> Changes since V2:
> - IIDR based quirk management done for 23154 has been reverted
> - Extended existing 23154 errata to address 38545 as well,
> so that existing static keys are reused.
> - Added MIDR based support macros to cover all the affected parts
> - Changed the unlikely construct to likely construct in the workaround
> function.
>
>
>
> Documentation/arm64/silicon-errata.rst | 2 +-
> arch/arm64/Kconfig | 8 ++++++--
> arch/arm64/include/asm/arch_gicv3.h | 22 ++++++++++++++++++++--
> arch/arm64/include/asm/cputype.h | 2 ++
> arch/arm64/kernel/cpu_errata.c | 25 ++++++++++++++++++++++---
> 5 files changed, 51 insertions(+), 8 deletions(-)
>
> diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> index ea281dd75517..466cb9e89047 100644
> --- a/Documentation/arm64/silicon-errata.rst
> +++ b/Documentation/arm64/silicon-errata.rst
> @@ -136,7 +136,7 @@ stable kernels.
> +----------------+-----------------+-----------------+-----------------------------+
> | Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
> +----------------+-----------------+-----------------+-----------------------------+
> -| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
> +| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 |
> +----------------+-----------------+-----------------+-----------------------------+
> | Cavium | ThunderX GICv3 | #38539 | N/A |
> +----------------+-----------------+-----------------+-----------------------------+
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 09b885cc4db5..778cc2e22c21 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -891,13 +891,17 @@ config CAVIUM_ERRATUM_23144
> If unsure, say Y.
>
> config CAVIUM_ERRATUM_23154
> - bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
> + bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation"
> default y
> help
> - The gicv3 of ThunderX requires a modified version for
> + The ThunderX GICv3 implementation requires a modified version for
> reading the IAR status to ensure data synchronization
> (access to icc_iar1_el1 is not sync'ed before and after).
>
> + It also suffers from erratum 38545 (also present on Marvell's
> + OcteonTX and OcteonTX2), resulting in deactivated interrupts being
> + spuriously presented to the CPU interface.
> +
> If unsure, say Y.
>
> config CAVIUM_ERRATUM_27456
> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
> index 4ad22c3135db..b8fd7b1f9944 100644
> --- a/arch/arm64/include/asm/arch_gicv3.h
> +++ b/arch/arm64/include/asm/arch_gicv3.h
> @@ -53,17 +53,35 @@ static inline u64 gic_read_iar_common(void)
> * The gicv3 of ThunderX requires a modified version for reading the
> * IAR status to ensure data synchronization (access to icc_iar1_el1
> * is not sync'ed before and after).
> + *
> + * Erratum 38545
> + *
> + * When a IAR register read races with a GIC interrupt RELEASE event,
> + * GIC-CPU interface could wrongly return a valid INTID to the CPU
> + * for an interrupt that is already released(non activated) instead of 0x3ff.
> + *
> + * To workaround this, return a valid interrupt ID only if there is a change
> + * in the active priority list after the IAR read.
> + *
> + * Common function used for both the workarounds since,
> + * 1. On Thunderx 88xx 1.x both erratas are applicable.
> + * 2. Having extra nops doesn't add any side effects for Silicons where
> + * erratum 23154 is not applicable.
> */
> static inline u64 gic_read_iar_cavium_thunderx(void)
> {
> - u64 irqstat;
> + u64 irqstat, apr;
>
> + apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
Why only AP1R0? Does the HW only support 5 bits of priority? If it
supports more, you need to check all the registers that may contain an
active priority (0xa0 for a standard interrupt, 0x20 for a pNMI).
> nops(8);
> irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
> nops(4);
> mb();
>
> - return irqstat;
> + if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
> + return irqstat;
> +
> + return 0x3ff;
This should be ICC_IAR1_EL1_SPURIOUS.
> }
>
> static inline void gic_write_ctlr(u32 val)
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 999b9149f856..9407c5074a4f 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -80,6 +80,7 @@
>
> #define APM_CPU_PART_POTENZA 0x000
>
> +#define CAVIUM_CPU_PART_THUNDERX_OTX_GEN 0x0A0
Is this an actual part number? What does 'GEN' stand for?
> #define CAVIUM_CPU_PART_THUNDERX 0x0A1
> #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
> #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
> @@ -121,6 +122,7 @@
> #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
> #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
> #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
> +#define MIDR_THUNDERX_OTX_GEN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_OTX_GEN)
> #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
> #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
> #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index b217941713a8..82ed09b363d6 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -41,6 +41,25 @@ is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry,
> return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
> }
>
> +static bool __maybe_unused
> +is_marvell_thunderx_otx_family(const struct arm64_cpu_capabilities *entry,
> + int scope)
> +{
> + u32 model;
> +
> + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
> +
> + model = read_cpuid_id();
> + /* 0xe8 mask will cover 0xA1 - 0xA3 and 0xB1 - 0xB6 series with
> + * 0xAF and 0xB8 as exceptions
> + */
> + model &= MIDR_IMPLEMENTOR_MASK | (0x0e8 << MIDR_PARTNUM_SHIFT) |
> + MIDR_ARCHITECTURE_MASK;
> +
> + /* This includes Thunderx, OcteonTx, OcteonTx2 family of processors */
> + return model == MIDR_THUNDERX_OTX_GEN;
> +}
> +
No, please. This is a version of the Kryo hack, only worse. We
*really* want to see an actual list of models and revisions, not an
obfuscated mask that covers HW that may or may not exist. All the
infrastructure is there to describe these constraints as data, just
make use of it.
> static bool __maybe_unused
> is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
> {
> @@ -425,10 +444,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
> #endif
> #ifdef CONFIG_CAVIUM_ERRATUM_23154
> {
> - /* Cavium ThunderX, pass 1.x */
> - .desc = "Cavium erratum 23154",
> + .desc = "Cavium errata 23154 and 38545",
> .capability = ARM64_WORKAROUND_CAVIUM_23154,
> - ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
> + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
> + .matches = is_marvell_thunderx_otx_family,
> },
> #endif
> #ifdef CONFIG_CAVIUM_ERRATUM_27456
Thanks,
M.
--
Without deviation from the norm, progress is not possible.