[PATCH RFC 7/7] kvm: guest: reprogram guest timer

From: Quan Xu
Date: Fri Dec 08 2017 - 03:41:30 EST


From: Ben Luo <bn0418@xxxxxxxxx>

In general, KVM guest programs tsc-deadline timestamp to
MSR_IA32_TSC_DEADLINE MSR.

When pvtimer is enabled, we introduce a new mechanism to
reprogram KVM guest timer. A periodically working kthread
scans share page and synchronize timer setting for guest
on a dedicated CPU. The next time event of the periodically
working kthread is a threshold to decide whether to program
tsc-deadline timestamp to MSR_IA32_TSC_DEADLINE MSR, or to
share page.

Signed-off-by: Yang Zhang <yang.zhang.wz@xxxxxxxxx>
Signed-off-by: Quan Xu <quan.xu0@xxxxxxxxx>
Signed-off-by: Ben Luo <bn0418@xxxxxxxxx>
---
arch/x86/include/asm/kvm_para.h | 9 +++++++++
arch/x86/kernel/apic/apic.c | 9 ++++++---
arch/x86/kernel/kvm.c | 38 ++++++++++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c373e44..109e706 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -5,6 +5,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
#include <uapi/asm/kvm_para.h>
+#include <linux/hrtimer.h>

extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
@@ -92,6 +93,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
+int kvm_pv_timer_next_event(unsigned long tsc,
+ struct clock_event_device *evt);
extern void kvm_disable_steal_time(void);

#ifdef CONFIG_PARAVIRT_SPINLOCKS
@@ -126,6 +129,12 @@ static inline void kvm_disable_steal_time(void)
{
return;
}
+
+static inline int kvm_pv_timer_next_event(unsigned long tsc,
+ struct clock_event_device *evt)
+{
+ return 0;
+}
#endif

#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ff89177..286c1b3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -471,10 +471,13 @@ static int lapic_next_event(unsigned long delta,
static int lapic_next_deadline(unsigned long delta,
struct clock_event_device *evt)
{
- u64 tsc;
+ u64 tsc = rdtsc() + (((u64) delta) * TSC_DIVISOR);

- tsc = rdtsc();
- wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+ /* TODO: undisciplined function call */
+ if (kvm_pv_timer_next_event(tsc, evt))
+ return 0;
+
+ wrmsrl(MSR_IA32_TSC_DEADLINE, tsc);
return 0;
}

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8bb9594..ec7aff1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -328,6 +328,35 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
}

+static DEFINE_PER_CPU(int, pvtimer_enabled);
+static DEFINE_PER_CPU(struct pvtimer_vcpu_event_info,
+ pvtimer_shared_buf) = {0};
+#define PVTIMER_PADDING 25000
+int kvm_pv_timer_next_event(unsigned long tsc,
+ struct clock_event_device *evt)
+{
+ struct pvtimer_vcpu_event_info *src;
+ u64 now;
+
+ if (!this_cpu_read(pvtimer_enabled))
+ return false;
+
+ src = this_cpu_ptr(&pvtimer_shared_buf);
+ xchg((u64 *)&src->expire_tsc, tsc);
+
+ barrier();
+
+ if (tsc < src->next_sync_tsc)
+ return false;
+
+ rdtscll(now);
+ if (tsc < now || tsc - now < PVTIMER_PADDING)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(kvm_pv_timer_next_event);
+
static void kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
@@ -362,6 +391,15 @@ static void kvm_guest_cpu_init(void)

if (has_steal_clock)
kvm_register_steal_time();
+
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TIMER)) {
+ unsigned long data;
+
+ data = slow_virt_to_phys(this_cpu_ptr(&pvtimer_shared_buf))
+ | KVM_MSR_ENABLED;
+ wrmsrl(MSR_KVM_PV_TIMER_EN, data);
+ this_cpu_write(pvtimer_enabled, 1);
+ }
}

static void kvm_pv_disable_apf(void)
--
1.7.1