[PATCH 02/16] clocksource/drivers/arm_arch_timer: Default to EL2 virtual timer when running VHE
From: Marc Zyngier
Date: Thu May 07 2026 - 08:57:58 EST
When running with at EL2 with VHE enabled, the architecture provides
two EL2 timer/counters, dubbed physical and virtual. Apart from their
names, they are strictly identical.
However, they don't get virtualised the same way, specially when
it comes to adding arbitrary offsets to the timers. When running as
a guest, the host CNTVOFF_EL2 does apply to the guest's view of
CNTHV*_El2. This is not true for CNTPOFF_EL2 and CNTHP*_EL2, as
the architecture is broken past the first level of virtualisation
(it lacks some essential mechanisms to be usable, despite what
the ARM ARM pretends).
This means that when running as a L2 guest hypervisor, using the
physical timer results in traps to L0, which are then forwarded to
L1 in order to emulate the offset, leading to even worse performance
due to massive trap amplification (the combination of register and
ERET trapping is absolutely lethal).
Switch the arch timer code to using the virtual timer when running
in VHE by default, only using the physical timer if the interrupt
is not correctly described in the firmware tables (which seems
to be an unfortunately common case). This comes as no impact on
bare-metal, and slightly improves the situation in the virtualised
case.
Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx>
---
drivers/clocksource/arm_arch_timer.c | 44 ++++++++++++++++------------
1 file changed, 25 insertions(+), 19 deletions(-)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 90aeff44a2764..eaf276a9b9d28 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -688,6 +688,7 @@ static void __arch_timer_setup(struct clock_event_device *clk)
clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
switch (arch_timer_uses_ppi) {
case ARCH_TIMER_VIRT_PPI:
+ case ARCH_TIMER_HYP_VIRT_PPI:
clk->set_state_shutdown = arch_timer_shutdown_virt;
clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
sne = erratum_handler(set_next_event_virt);
@@ -879,7 +880,7 @@ static void __init arch_timer_banner(void)
pr_info("cp15 timer running at %lu.%02luMHz (%s).\n",
(unsigned long)arch_timer_rate / 1000000,
(unsigned long)(arch_timer_rate / 10000) % 100,
- (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys");
+ arch_timer_ppi_names[arch_timer_uses_ppi]);
}
u32 arch_timer_get_rate(void)
@@ -1023,6 +1024,7 @@ static int __init arch_timer_register(void)
ppi = arch_timer_ppi[arch_timer_uses_ppi];
switch (arch_timer_uses_ppi) {
case ARCH_TIMER_VIRT_PPI:
+ case ARCH_TIMER_HYP_VIRT_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_virt,
"arch_timer", arch_timer_evt);
break;
@@ -1090,25 +1092,34 @@ static int __init arch_timer_common_init(void)
/**
* arch_timer_select_ppi() - Select suitable PPI for the current system.
*
- * If HYP mode is available, we know that the physical timer
- * has been configured to be accessible from PL1. Use it, so
- * that a guest can use the virtual timer instead.
+ * On AArch32, if HYP mode is available, we know that the physical
+ * timer has been configured to be accessible from PL1. Use it, so
+ * that a guest can use the virtual timer instead (though KVM host
+ * support has long been removed).
*
- * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
- * accesses to CNTP_*_EL1 registers are silently redirected to
- * their CNTHP_*_EL2 counterparts, and use a different PPI
- * number.
+ * On ARMv8.1 with FEAT_VHE, the kernel runs in EL2. Accesses to
+ * CNTV_*_EL1 registers are silently redirected to their CNTHV_*_EL2
+ * counterparts, and the timer uses a different PPI number. Similar
+ * thing happen when using the EL2 physical timer. Note that a bunch
+ * of DTs out there omit the virtual EL2 timer, so fallback gracefully
+ * on the physical timer.
+ *
+ * Without VHE, if no interrupt provided for virtual timer, we'll have
+ * to stick to the physical timer. It'd better be accessible...
*
- * If no interrupt provided for virtual timer, we'll have to
- * stick to the physical timer. It'd better be accessible...
* For arm64 we never use the secure interrupt.
*
* Return: a suitable PPI type for the current system.
*/
static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void)
{
- if (is_kernel_in_hyp_mode())
+ if (is_kernel_in_hyp_mode()) {
+ if (arch_timer_ppi[ARCH_TIMER_HYP_VIRT_PPI])
+ return ARCH_TIMER_HYP_VIRT_PPI;
+
+ pr_warn_once("VHE without EL2 virtual timer interrupt, broken firmware\n");
return ARCH_TIMER_HYP_PPI;
+ }
if (!is_hyp_mode_available() && arch_timer_ppi[ARCH_TIMER_VIRT_PPI])
return ARCH_TIMER_VIRT_PPI;
@@ -1200,14 +1211,9 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
if (ret)
return ret;
- arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] =
- acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI);
-
- arch_timer_ppi[ARCH_TIMER_VIRT_PPI] =
- acpi_gtdt_map_ppi(ARCH_TIMER_VIRT_PPI);
-
- arch_timer_ppi[ARCH_TIMER_HYP_PPI] =
- acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI);
+ /* The GTDT parser can't be bothered with the secure timer */
+ for (int i = ARCH_TIMER_PHYS_NONSECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++)
+ arch_timer_ppi[i] = acpi_gtdt_map_ppi(i);
arch_timer_populate_kvm_info();
--
2.47.3