Re: [PATCH v2 2/2] KVM: arm64: timers: Save restore CVAL of a ptimer across guest entry and exits

From: Ganapatrao Kulkarni
Date: Fri Sep 15 2023 - 06:00:06 EST



Hi Marc,

On 10-09-2023 05:15 pm, Marc Zyngier wrote:
On Mon, 04 Sep 2023 12:42:18 +0100,
Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx> wrote:

As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
as zero. On VHE host, E2H and TGE are set, hence it is required
to save Guest's CVAL to memory and increment it by CNTPOFF_EL2 at
guest exit to avoid false physical timer interrupts and also
restore back the CVAL with saved value before the guest entry.

Signed-off-by: Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx>
---
arch/arm64/kvm/arch_timer.c | 60 ++++++++++++++++++++++++++++++---
arch/arm64/kvm/hyp/vhe/switch.c | 13 +++++++
include/kvm/arm_arch_timer.h | 1 +
3 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 98b0e8ac02ae..9fe3fa6ed98a 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -514,6 +514,59 @@ static void set_cntpoff(u64 cntpoff)
write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
}
+static void ptimer_cval_save(struct arch_timer_context *ctx, u64 offset)
+{
+ unsigned long flags;
+ u64 cval;
+
+ local_irq_save(flags);
+ cval = read_sysreg_el0(SYS_CNTP_CVAL);
+ timer_set_cval(ctx, cval);
+ cval += offset;
+ write_sysreg_el0(cval, SYS_CNTP_CVAL);
+ isb();
+ local_irq_restore(flags);
+}
+
+static void ptimer_cval_restore(struct arch_timer_context *ctx, u64 offset)
+{
+ unsigned long flags;
+ u64 cval;
+
+ local_irq_save(flags);
+ cval = timer_get_cval(ctx);
+ write_sysreg_el0(cval, SYS_CNTP_CVAL);
+ isb();
+ local_irq_restore(flags);
+}
+
+void kvm_ptimer_cval_save_restore(struct kvm_vcpu *vcpu, bool save)
+{
+ struct timer_map map;
+ struct arch_timer_cpu *timer;
+ struct arch_timer_context *ctxp;
+ u64 offset;
+
+ get_timer_map(vcpu, &map);
+ ctxp = map.direct_ptimer;
+
+ if (unlikely(ctxp == NULL))
+ return;
+
+ offset = timer_get_offset(ctxp);
+ if (!offset)
+ return;
+
+ timer = vcpu_timer(ctxp->vcpu);
+ if (!timer->enabled || !ctxp->loaded)
+ return;
+
+ if (save)
+ ptimer_cval_save(ctxp, offset);
+ else
+ ptimer_cval_restore(ctxp, offset);
+}
+

I really don't want to see any of this code in the arch_timer
backend. There is nothing here that executes in the context of the
world-switch until this, and I think this is the wrong level of
abstraction.

You end-up doing things that make no sense in the expected execution
context (timer not loaded?, not enabled?, disabling interrupts?), and
this makes the whole thing pointlessly complex.

My take on this problem is still the same (vaguely amended compared to
the previous version). If this doesn't work for you, please explain
what is wrong with it. But it has the shape of what I really want to
see.

Thanks,

M.

From 2516a1410c0d45a7d3ba0523847be339bfcb1e50 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@xxxxxxxxxx>
Date: Tue, 22 Aug 2023 13:18:10 +0100
Subject: [PATCH] KVM: arm64: timers: Correctly handle TGE flip with
CNTPOFF_EL2

Contrary to common belief, HCR_EL2.TGE has a direct and immediate
effect on the way the EL0 physical counter is offset. Flipping
TGE from 1 to 0 while at EL2 immediately changes the way the counter
compared to the CVAL limit.

This means that we cannot directly save/restore the guest's view of
CVAL, but that we instead must treat it as if CNTPOFF didn't exist.
Only in the world switch, once we figure out that we do have CNTPOFF,
can we must the offset back and forth depending on the polarity of
TGE.

Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer")
Reported-by: Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx>
---
arch/arm64/kvm/arch_timer.c | 13 +++-------
arch/arm64/kvm/hyp/vhe/switch.c | 45 +++++++++++++++++++++++++++++++++
include/kvm/arm_arch_timer.h | 7 +++++
3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6dcdae4d38cb..a1e24228aaaa 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};
-static bool has_cntpoff(void)
-{
- return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
-}
-
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv(vcpu))
@@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
}
-static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
if (vcpu_has_nv(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
@@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
cval = read_sysreg_el0(SYS_CNTP_CVAL);
- if (!has_cntpoff())
- cval -= timer_get_offset(ctx);
+ cval -= timer_get_offset(ctx);
timer_set_cval(ctx, cval);
@@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
cval = timer_get_cval(ctx);
offset = timer_get_offset(ctx);
set_cntpoff(offset);
- if (!has_cntpoff())
- cval += offset;
+ cval += offset;
write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6537f58b1a8c..5dcbde57f466 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
___activate_traps(vcpu);
+ if (has_cntpoff()) {
+ struct timer_map map;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're entrering the guest. Reload the correct
+ * values from memory now that TGE is clear.
+ */
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
+
+ if (map.direct_ptimer) {
+ write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
+ isb();
+ }
+ }
+
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -77,6 +97,31 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+ if (has_cntpoff()) {
+ struct timer_map map;
+ u64 val, offset;
+
+ get_timer_map(vcpu, &map);
+
+ /*
+ * We're exiting the guest. Save the latest CVAL value
+ * to memory and apply the offset now that TGE is set.
+ */
+ val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+ if (map.direct_ptimer == vcpu_hptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+
+ offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+
+ if (map.direct_ptimer && offset) {
+ offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+ write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
+ isb();
+ }
+ }
+
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index bb3cb005873e..e748bc957d83 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -82,6 +82,8 @@ struct timer_map {
struct arch_timer_context *emul_ptimer;
};
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
+
struct arch_timer_cpu {
struct arch_timer_context timers[NR_KVM_TIMERS];
@@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
void kvm_timer_cpu_up(void);
void kvm_timer_cpu_down(void);
+static inline bool has_cntpoff(void)
+{
+ return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
+}
+
#endif

This patch did not work.
After adding changes as in below diff, it is started working.

diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index b0b07658f77d..91d2cfb03e26 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);

if (map.direct_ptimer) {
- write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
+ write_sysreg_el0(val, SYS_CNTP_CVAL);
isb();
}
}
@@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)

___deactivate_traps(vcpu);

- write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
-
if (has_cntpoff()) {
struct timer_map map;
u64 val, offset;
@@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
* We're exiting the guest. Save the latest CVAL value
* to memory and apply the offset now that TGE is set.
*/
- val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
+ val = read_sysreg_el0(SYS_CNTP_CVAL);
if (map.direct_ptimer == vcpu_ptimer(vcpu))
__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
if (map.direct_ptimer == vcpu_hptimer(vcpu))
@@ -182,12 +180,13 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
offset = read_sysreg_s(SYS_CNTPOFF_EL2);

if (map.direct_ptimer && offset) {
- offset = read_sysreg_s(SYS_CNTPOFF_EL2);
- write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
+ write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
isb();
}
}

+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by
[root@eng06sys-r120 arm-platforms]#



Thanks,
Ganapat