Re: [PATCH RFC 2/2] KVM: x86: Support using the VMX preemption timer for APIC Timer periodic/oneshot mode

From: Wanpeng Li
Date: Wed Oct 12 2016 - 04:35:36 EST


2016-10-11 23:06 GMT+08:00 Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>:
> 2016-10-11 20:17+0800, Wanpeng Li:
>> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>>
>> Most windows guests still utilize APIC Timer periodic/oneshot mode
>> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
>> mode are still emulated by high overhead hrtimer on host. This patch
>> converts the expected expire time of the periodic/oneshot mode to
>> guest deadline tsc in order to leverage VMX preemption timer logic
>> for APIC Timer tsc-deadline mode.
>>
>> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
>> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
>> Cc: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
>> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>> ---
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> @@ -1101,13 +1101,20 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
>> apic->lapic_timer.period == 0)
>> return 0;
>>
>> + if (kvm_lapic_hv_timer_in_use(apic->vcpu)) {
>> + u64 tscl = rdtsc();
>> +
>> + tmcct = apic->lapic_timer.tscdeadline -
>> + kvm_read_l1_tsc(apic->vcpu, tscl);
>
> Yes, this won't work. The easiest way to return a less bogus TMCCT
> would be remember the timeout when setting up the timer and replace
> hrtimer_get_remaining() with it -- our deliver method shouldn't change
> the expiration time.

Agreed.

>
>> + } else {
>> + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
>> + if (ktime_to_ns(remaining) < 0)
>> + remaining = ktime_set(0, 0);
>> +
>> + ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
>> + tmcct = div64_u64(ns,
>> + (APIC_BUS_CYCLE_NS * apic->divide_count));
>> + }
>>
>> return tmcct;
>> }
>> @@ -1400,52 +1407,65 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
>> }
>> EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
>>
>> -static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
>> +static void cancel_hv_timer(struct kvm_lapic *apic)
>> {
>> kvm_x86_ops->cancel_hv_timer(apic->vcpu);
>> apic->lapic_timer.hv_timer_in_use = false;
>> }
>>
>> +static bool start_hv_timer(struct kvm_lapic *apic)
>> {
>> + u64 tscdeadline;
>>
>> + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
>> + u64 tscl = rdtsc();
>>
>> + apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
>> + * APIC_BUS_CYCLE_NS * apic->divide_count;
>> + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
>> + nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
>
> start_hv_timer() is called from kvm_lapic_switch_to_hv_timer(), which
> can happen mid-period. The worst case is that the timer will never
> fire, because we always convert back and forth. You have to compute the
> equivalent deadline only once, and carry it around.

Agreed. Thanks for your review. :) Please see RFC V2.

Regards,
Wanpeng Li

>
>> + }
>> +
>> + tscdeadline = apic->lapic_timer.tscdeadline;
>>
>> if (atomic_read(&apic->lapic_timer.pending) ||
>> kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
>> if (apic->lapic_timer.hv_timer_in_use)
>> + cancel_hv_timer(apic);
>> } else {
>> apic->lapic_timer.hv_timer_in_use = true;
>> hrtimer_cancel(&apic->lapic_timer.timer);
>>
>> /* In case the sw timer triggered in the window */
>> if (atomic_read(&apic->lapic_timer.pending))
>> + cancel_hv_timer(apic);
>> }
>> trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
>> apic->lapic_timer.hv_timer_in_use);
>> return apic->lapic_timer.hv_timer_in_use;
>> }
>>
>> +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
>> +{
>> + struct kvm_lapic *apic = vcpu->arch.apic;
>> +
>> + WARN_ON(!apic->lapic_timer.hv_timer_in_use);
>> + WARN_ON(swait_active(&vcpu->wq));
>> + cancel_hv_timer(apic);
>> + apic_timer_expired(apic);
>> +
>> + if (apic_lvtt_period(apic))
>> + start_hv_timer(apic);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
>> +
>> void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
>> {
>> struct kvm_lapic *apic = vcpu->arch.apic;
>>
>> WARN_ON(apic->lapic_timer.hv_timer_in_use);
>>
>> - if (apic_lvtt_tscdeadline(apic))
>> - start_hv_tscdeadline(apic);
>> + start_hv_timer(apic);
>> }
>> EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
>>