Re: [RFT PATCH v3 01/27] arm64: Cope with CPUs stuck in VHE mode

From: Marc Zyngier
Date: Wed Mar 24 2021 - 16:01:04 EST


On Wed, 24 Mar 2021 18:05:46 +0000,
Will Deacon <will@xxxxxxxxxx> wrote:
>
> On Fri, Mar 05, 2021 at 06:38:36AM +0900, Hector Martin wrote:
> > From: Marc Zyngier <maz@xxxxxxxxxx>
> >
> > It seems that the CPU known as Apple M1 has the terrible habit
> > of being stuck with HCR_EL2.E2H==1, in violation of the architecture.
> >
> > Try and work around this deplorable state of affairs by detecting
> > the stuck bit early and short-circuit the nVHE dance. It is still
> > unknown whether there are many more such nuggets to be found...
> >
> > Reported-by: Hector Martin <marcan@xxxxxxxxx>
> > Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx>
> > ---
> > arch/arm64/kernel/head.S | 33 ++++++++++++++++++++++++++++++---
> > arch/arm64/kernel/hyp-stub.S | 28 ++++++++++++++++++++++++----
> > 2 files changed, 54 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 66b0e0b66e31..673002b11865 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr)
> > * booted in EL1 or EL2 respectively.
> > */
> > SYM_FUNC_START(init_kernel_el)
> > - mov_q x0, INIT_SCTLR_EL1_MMU_OFF
> > - msr sctlr_el1, x0
> > -
> > mrs x0, CurrentEL
> > cmp x0, #CurrentEL_EL2
> > b.eq init_el2
> >
> > SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
> > + mov_q x0, INIT_SCTLR_EL1_MMU_OFF
> > + msr sctlr_el1, x0
> > isb
> > mov_q x0, INIT_PSTATE_EL1
> > msr spsr_el1, x0
> > @@ -504,6 +503,34 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
> > msr vbar_el2, x0
> > isb
> >
> > + /*
> > + * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
> > + * making it impossible to start in nVHE mode. Is that
> > + * compliant with the architecture? Absolutely not!
> > + */
> > + mrs x0, hcr_el2
> > + and x0, x0, #HCR_E2H
> > + cbz x0, 1f
> > +
> > + /* Switching to VHE requires a sane SCTLR_EL1 as a start */
> > + mov_q x0, INIT_SCTLR_EL1_MMU_OFF
> > + msr_s SYS_SCTLR_EL12, x0
> > +
> > + /*
> > + * Force an eret into a helper "function", and let it return
> > + * to our original caller... This makes sure that we have
> > + * initialised the basic PSTATE state.
> > + */
> > + mov x0, #INIT_PSTATE_EL2
> > + msr spsr_el1, x0
> > + adr_l x0, stick_to_vhe
> > + msr elr_el1, x0
> > + eret
> > +
> > +1:
> > + mov_q x0, INIT_SCTLR_EL1_MMU_OFF
> > + msr sctlr_el1, x0
> > +
> > msr elr_el2, lr
> > mov w0, #BOOT_CPU_MODE_EL2
> > eret
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 5eccbd62fec8..c7601030ee82 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors)
> > ventry el2_fiq_invalid // FIQ EL2t
> > ventry el2_error_invalid // Error EL2t
> >
> > - ventry el2_sync_invalid // Synchronous EL2h
> > + ventry elx_sync // Synchronous EL2h
> > ventry el2_irq_invalid // IRQ EL2h
> > ventry el2_fiq_invalid // FIQ EL2h
> > ventry el2_error_invalid // Error EL2h
> >
> > - ventry el1_sync // Synchronous 64-bit EL1
> > + ventry elx_sync // Synchronous 64-bit EL1
> > ventry el1_irq_invalid // IRQ 64-bit EL1
> > ventry el1_fiq_invalid // FIQ 64-bit EL1
> > ventry el1_error_invalid // Error 64-bit EL1
> > @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors)
> >
> > .align 11
> >
> > -SYM_CODE_START_LOCAL(el1_sync)
> > +SYM_CODE_START_LOCAL(elx_sync)
> > cmp x0, #HVC_SET_VECTORS
> > b.ne 1f
> > msr vbar_el2, x1
> > @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync)
> >
> > 9: mov x0, xzr
> > eret
> > -SYM_CODE_END(el1_sync)
> > +SYM_CODE_END(elx_sync)
> >
> > // nVHE? No way! Give me the real thing!
> > SYM_CODE_START_LOCAL(mutate_to_vhe)
> > @@ -243,3 +243,23 @@ SYM_FUNC_START(switch_to_vhe)
> > #endif
> > ret
> > SYM_FUNC_END(switch_to_vhe)
> > +
> > +SYM_FUNC_START(stick_to_vhe)
> > + /*
> > + * Make sure the switch to VHE cannot fail, by overriding the
> > + * override. This is hilarious.
> > + */
> > + adr_l x1, id_aa64mmfr1_override
> > + add x1, x1, #FTR_OVR_MASK_OFFSET
> > + dc civac, x1
> > + dsb sy
> > + isb
>
> Why do we need an ISB here?

Hmmm. Probably me being paranoid and trying to come up with something
for Hector to test before I had access to the HW. The DSB is more than
enough to order CMO and load/store.

> > + ldr x0, [x1]
> > + bic x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)
> > + str x0, [x1]
>
> I find it a bit bizarre doing this here, as for the primary CPU we're still
> a way away from parsing the early paramaters and for secondary CPUs this
> doesn't need to be done for each one. Furthermore, this same code is run
> on the resume path, which can probably then race with itself.

Agreed, this isn't great.

> Is it possible to do it later on the boot CPU only, e.g. in
> init_feature_override()? We should probably also log a warning that we're
> ignoring the option because nVHE is not available.

I've come up with this on top of the original patch, spitting a
warning when the right conditions are met. It's pretty ugly, but hey,
so is the HW this runs on.

M.

diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index c7601030ee82..e6fa5cdd790a 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -245,19 +245,6 @@ SYM_FUNC_START(switch_to_vhe)
SYM_FUNC_END(switch_to_vhe)

SYM_FUNC_START(stick_to_vhe)
- /*
- * Make sure the switch to VHE cannot fail, by overriding the
- * override. This is hilarious.
- */
- adr_l x1, id_aa64mmfr1_override
- add x1, x1, #FTR_OVR_MASK_OFFSET
- dc civac, x1
- dsb sy
- isb
- ldr x0, [x1]
- bic x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)
- str x0, [x1]
-
mov x0, #HVC_VHE_RESTART
hvc #0
mov x0, #BOOT_CPU_MODE_EL2
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 83f1c4b92095..ec07e150cf5c 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -195,6 +195,8 @@ static __init void parse_cmdline(void)
__parse_cmdline(prop, true);
}

+static bool nvhe_impaired __initdata;
+
/* Keep checkers quiet */
void init_feature_override(void);

@@ -211,9 +213,32 @@ asmlinkage void __init init_feature_override(void)

parse_cmdline();

+ /*
+ * If we ever reach this point while running VHE, we're
+ * guaranteed to be on one of these funky, VHE-stuck CPUs. If
+ * the user was trying to force nVHE on us, proceed with
+ * attitude adjustment.
+ */
+ if (is_kernel_in_hyp_mode()) {
+ u64 mask = 0xfUL << ID_AA64MMFR1_VHE_SHIFT;
+
+ if ((id_aa64mmfr1_override.mask & mask) &&
+ !(id_aa64mmfr1_override.val & mask)) {
+ nvhe_impaired = true;
+ id_aa64mmfr1_override.mask &= ~mask;
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(regs); i++) {
if (regs[i]->override)
__flush_dcache_area(regs[i]->override,
sizeof(*regs[i]->override));
}
}
+
+static int __init check_feature_override(void)
+{
+ WARN_ON(nvhe_impaired);
+ return 0;
+}
+core_initcall(check_feature_override);

--
Without deviation from the norm, progress is not possible.