Re: [PATCH RFC V3 6/6] KVM: LAPIC: Add APIC Timer periodic/oneshot mode VMX preemption timer support

From: Wanpeng Li
Date: Thu Oct 13 2016 - 21:26:42 EST


2016-10-13 20:35 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>:
>
>
> On 13/10/2016 13:34, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>>
>> Most windows guests still utilize APIC Timer periodic/oneshot mode
>> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
>> mode are still emulated by high overhead hrtimer on host. This patch
>> converts the expected expire time of the periodic/oneshot mode to
>> guest deadline tsc in order to leverage VMX preemption timer logic
>> for APIC Timer tsc-deadline mode. After each preemption timer vmexit
>> preemption timer is restarted to emulate LVTT current-count register
>> is automatically reloaded from the initial-count register when the
>> count reaches 0.
>>
>> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
>> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
>> Cc: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
>> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>> ---
>> arch/x86/kvm/lapic.c | 100 ++++++++++++++++++++-------------------------------
>> 1 file changed, 39 insertions(+), 61 deletions(-)
>>
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index e93e549..7663246 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -1090,7 +1090,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
>>
>> static u32 apic_get_tmcct(struct kvm_lapic *apic)
>> {
>> - ktime_t remaining;
>> + ktime_t remaining, now;
>> s64 ns;
>> u32 tmcct;
>>
>> @@ -1101,7 +1101,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
>> apic->lapic_timer.period == 0)
>> return 0;
>>
>> - remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
>> + now = apic->lapic_timer.timer.base->get_time();
>> + remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
>> if (ktime_to_ns(remaining) < 0)
>> remaining = ktime_set(0, 0);
>>
>> @@ -1349,46 +1350,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
>>
>> static void start_sw_period(struct kvm_lapic *apic)
>> {
>> - ktime_t now;
>> -
>> - /* lapic timer in oneshot or periodic mode */
>> - now = apic->lapic_timer.timer.base->get_time();
>> - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
>> - * APIC_BUS_CYCLE_NS * apic->divide_count;
>> -
>> - if (!apic->lapic_timer.period)
>> - return;
>> - /*
>> - * Do not allow the guest to program periodic timers with small
>> - * interval, since the hrtimers are not throttled by the host
>> - * scheduler.
>> - */
>> - if (apic_lvtt_period(apic)) {
>> - s64 min_period = min_timer_period_us * 1000LL;
>> -
>> - if (apic->lapic_timer.period < min_period) {
>> - pr_info_ratelimited(
>> - "kvm: vcpu %i: requested %lld ns "
>> - "lapic timer period limited to %lld ns\n",
>> - apic->vcpu->vcpu_id,
>> - apic->lapic_timer.period, min_period);
>> - apic->lapic_timer.period = min_period;
>> - }
>> - }
>> -
>> hrtimer_start(&apic->lapic_timer.timer,
>> - ktime_add_ns(now, apic->lapic_timer.period),
>> - HRTIMER_MODE_ABS_PINNED);
>> -
>> - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
>> - PRIx64 ", "
>> - "timer initial count 0x%x, period %lldns, "
>> - "expire @ 0x%016" PRIx64 ".\n", __func__,
>> - APIC_BUS_CYCLE_NS, ktime_to_ns(now),
>> - kvm_lapic_get_reg(apic, APIC_TMICT),
>> - apic->lapic_timer.period,
>> - ktime_to_ns(ktime_add_ns(now,
>> - apic->lapic_timer.period)));
>> + apic->lapic_timer.target_expiration,
>> + HRTIMER_MODE_ABS_PINNED);
>> }
>>
>> static bool set_target_expiration(struct kvm_lapic *apic)
>> @@ -1453,22 +1417,12 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
>> apic->lapic_timer.hv_timer_in_use = false;
>> }
>>
>> -void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
>> -{
>> - struct kvm_lapic *apic = vcpu->arch.apic;
>> -
>> - WARN_ON(!apic->lapic_timer.hv_timer_in_use);
>> - WARN_ON(swait_active(&vcpu->wq));
>> - cancel_hv_timer(apic);
>> - apic_timer_expired(apic);
>> -}
>> -EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
>> -
>> static bool start_hv_timer(struct kvm_lapic *apic)
>> {
>> u64 tscdeadline = apic->lapic_timer.tscdeadline;
>
> I think things would be simpler if you change this to:
>
> if (!kvm_x86_ops->set_hv_timer)
> return false;
>
> if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
> if (!set_target_expiration(apic))
> return true;
> }

set_target_expiration() in start_hv_timer() is not correct as pointed
out by Radim. https://lkml.org/lkml/2016/10/12/93

Regards,
Wanpeng Li

>
> tscdeadline = apic->lapic_timer.tscdeadline;
>
> You can also add a corresponding
>
> static void start_sw_timer(struct kvm_lapic *apic)
> {
> if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
> start_sw_period(apic);
> else
> start_sw_tscdeadline(apic);
> }
>
> so that the caller can be just this:
>
> if (!start_hv_timer(apic));
> start_sw_timer(apic);
>
> or in the case of kvm_lapic_expired_hv_timer:
>
> if (apic_lvtt_period(apic) && !start_hv_timer(apic))
> start_sw_period(apic);
>
> Independent of this, patch 3 should be squashed into this one.
>
> Thanks,
>
> Paolo
>
>> - if (atomic_read(&apic->lapic_timer.pending) ||
>> + if ((atomic_read(&apic->lapic_timer.pending) &&
>> + !apic_lvtt_period(apic)) ||
>> kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
>> if (apic->lapic_timer.hv_timer_in_use)
>> cancel_hv_timer(apic);
>> @@ -1477,7 +1431,8 @@ static bool start_hv_timer(struct kvm_lapic *apic)
>> hrtimer_cancel(&apic->lapic_timer.timer);
>>
>> /* In case the sw timer triggered in the window */
>> - if (atomic_read(&apic->lapic_timer.pending))
>> + if (atomic_read(&apic->lapic_timer.pending) &&
>> + !apic_lvtt_period(apic))
>> cancel_hv_timer(apic);
>> }
>> trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
>> @@ -1485,14 +1440,29 @@ static bool start_hv_timer(struct kvm_lapic *apic)
>> return apic->lapic_timer.hv_timer_in_use;
>> }
>>
>> +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
>> +{
>> + struct kvm_lapic *apic = vcpu->arch.apic;
>> +
>> + WARN_ON(!apic->lapic_timer.hv_timer_in_use);
>> + WARN_ON(swait_active(&vcpu->wq));
>> + cancel_hv_timer(apic);
>> + apic_timer_expired(apic);
>> +
>> + if (apic_lvtt_period(apic) &&
>> + set_target_expiration(apic) &&
>> + !start_hv_timer(apic))
>> + start_sw_period(apic);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
>> +
>> void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
>> {
>> struct kvm_lapic *apic = vcpu->arch.apic;
>>
>> WARN_ON(apic->lapic_timer.hv_timer_in_use);
>>
>> - if (apic_lvtt_tscdeadline(apic))
>> - start_hv_timer(apic);
>> + start_hv_timer(apic);
>> }
>> EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
>>
>> @@ -1509,7 +1479,10 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
>> if (atomic_read(&apic->lapic_timer.pending))
>> return;
>>
>> - start_sw_tscdeadline(apic);
>> + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
>> + start_sw_period(apic);
>> + else if (apic_lvtt_tscdeadline(apic))
>> + start_sw_tscdeadline(apic);
>> }
>> EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
>>
>> @@ -1517,9 +1490,11 @@ static void start_apic_timer(struct kvm_lapic *apic)
>> {
>> atomic_set(&apic->lapic_timer.pending, 0);
>>
>> - if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
>> - start_sw_period(apic);
>> - else if (apic_lvtt_tscdeadline(apic)) {
>> + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
>> + if (set_target_expiration(apic) &&
>> + !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
>> + start_sw_period(apic);
>> + } else if (apic_lvtt_tscdeadline(apic)) {
>> if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
>> start_sw_tscdeadline(apic);
>> }
>> @@ -2052,8 +2027,11 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
>>
>> if (atomic_read(&apic->lapic_timer.pending) > 0) {
>> kvm_apic_local_deliver(apic, APIC_LVTT);
>> - if (apic_lvtt_tscdeadline(apic))
>> + if (!(apic_lvtt_period(apic) &&
>> + kvm_lapic_hv_timer_in_use(vcpu))) {
>> apic->lapic_timer.tscdeadline = 0;
>> + apic->lapic_timer.target_expiration = ktime_set(0, 0);
>> + }
>> atomic_set(&apic->lapic_timer.pending, 0);
>> }
>> }
>>