Re: [PATCH RFC V2 2/2] KVM: x86: Support using the VMX preemption timer for APIC Timer periodic/oneshot mode

From: Radim KrÄmÃÅ
Date: Wed Oct 12 2016 - 13:49:31 EST


2016-10-12 14:52+0800, Wanpeng Li:
> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>
> Most windows guests still utilize APIC Timer periodic/oneshot mode
> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
> mode are still emulated by high overhead hrtimer on host. This patch
> converts the expected expire time of the periodic/oneshot mode to
> guest deadline tsc in order to leverage VMX preemption timer logic
> for APIC Timer tsc-deadline mode.
>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Cc: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> @@ -1101,7 +1101,14 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
> apic->lapic_timer.period == 0)
> return 0;
>
> - remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
> + if (kvm_lapic_hv_timer_in_use(apic->vcpu)) {

apic->lapic_timer.expired_period is set unconditionally, so the
condition seems superfluous.

> + ktime_t now;
> +
> + now = apic->lapic_timer.timer.base->get_time();
> + remaining = ktime_sub(apic->lapic_timer.expired_period, now);

The name expired_period is weird for something that can be in the future
... maybe target_ns?

> + } else
> + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
> +
> if (ktime_to_ns(remaining) < 0)
> remaining = ktime_set(0, 0);
>
> @@ -1353,8 +1360,6 @@ static void start_sw_period(struct kvm_lapic *apic)
>
> /* lapic timer in oneshot or periodic mode */
> now = apic->lapic_timer.timer.base->get_time();
> - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> - * APIC_BUS_CYCLE_NS * apic->divide_count;

I would gut start_sw_period() a bit more: the checking of minimal period
is a bit misplaced, because it uses now() when the timer switched to
hrtimers, but that could have been long after the timer was set, so we'd
delay for no good reason.

> if (!apic->lapic_timer.period)
> return;

Can this happen? (WARN_ONCE() if it is only a sanity check.)

> @@ -1400,52 +1405,71 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
> +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_lapic *apic = vcpu->arch.apic;
> +
> + WARN_ON(!apic->lapic_timer.hv_timer_in_use);
> + WARN_ON(swait_active(&vcpu->wq));
> + cancel_hv_timer(apic);
> + apic_timer_expired(apic);
> +
> + if (apic_lvtt_period(apic)) {
> + ktime_t now;
> + u64 tscl = rdtsc();
> +
> + apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> + * APIC_BUS_CYCLE_NS * apic->divide_count;
> +
> + if (!apic->lapic_timer.period)
> + return;
> +
> + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
> + nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
> + now = apic->lapic_timer.timer.base->get_time();
> + apic->lapic_timer.expired_period = ktime_add_ns(now, apic->lapic_timer.period);
> +
> + start_hv_timer(apic);
> + }
> +}
> +EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
> +
> @@ -1470,10 +1497,25 @@ static void start_apic_timer(struct kvm_lapic *apic)
> {
> atomic_set(&apic->lapic_timer.pending, 0);
>
> - if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
> - start_sw_period(apic);
> - else if (apic_lvtt_tscdeadline(apic)) {
> - if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
> + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
> + ktime_t now;
> + u64 tscl = rdtsc();
> +
> + apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> + * APIC_BUS_CYCLE_NS * apic->divide_count;
> +
> + if (!apic->lapic_timer.period)
> + return;
> + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
> + nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
> + now = apic->lapic_timer.timer.base->get_time();
> + apic->lapic_timer.expired_period = ktime_add_ns(now, apic->lapic_timer.period);

This is the same code as in kvm_lapic_expired_hv_timer(), a helper
function is in order.

> +
> + if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
> + start_sw_period(apic);
> + } else if (apic_lvtt_tscdeadline(apic)) {
> + if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
> start_sw_tscdeadline(apic);
> }
> }
> @@ -1711,8 +1753,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
> {
> struct kvm_lapic *apic = vcpu->arch.apic;
>
> - if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
> - apic_lvtt_period(apic))

rdmsr MSR_IA32_TSCDEADLINE has to return 0 when the timer is not in tsc
deadline mode, so the condition should stay. (and check for
apic_lvtt_tscdeadline(), but that is a unrelated cleanup.)

> + if (!lapic_in_kernel(vcpu))
> return 0;
>
> return apic->lapic_timer.tscdeadline;

Thanks.