Re: [PATCH RFC V3 6/6] KVM: LAPIC: Add APIC Timer periodic/oneshot mode VMX preemption timer support

From: Paolo Bonzini
Date: Thu Oct 13 2016 - 08:36:27 EST




On 13/10/2016 13:34, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
>
> Most windows guests still utilize APIC Timer periodic/oneshot mode
> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
> mode are still emulated by high overhead hrtimer on host. This patch
> converts the expected expire time of the periodic/oneshot mode to
> guest deadline tsc in order to leverage VMX preemption timer logic
> for APIC Timer tsc-deadline mode. After each preemption timer vmexit
> preemption timer is restarted to emulate LVTT current-count register
> is automatically reloaded from the initial-count register when the
> count reaches 0.
>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
> Cc: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
> Signed-off-by: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> ---
> arch/x86/kvm/lapic.c | 100 ++++++++++++++++++++-------------------------------
> 1 file changed, 39 insertions(+), 61 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index e93e549..7663246 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1090,7 +1090,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
>
> static u32 apic_get_tmcct(struct kvm_lapic *apic)
> {
> - ktime_t remaining;
> + ktime_t remaining, now;
> s64 ns;
> u32 tmcct;
>
> @@ -1101,7 +1101,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
> apic->lapic_timer.period == 0)
> return 0;
>
> - remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
> + now = apic->lapic_timer.timer.base->get_time();
> + remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
> if (ktime_to_ns(remaining) < 0)
> remaining = ktime_set(0, 0);
>
> @@ -1349,46 +1350,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
>
> static void start_sw_period(struct kvm_lapic *apic)
> {
> - ktime_t now;
> -
> - /* lapic timer in oneshot or periodic mode */
> - now = apic->lapic_timer.timer.base->get_time();
> - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> - * APIC_BUS_CYCLE_NS * apic->divide_count;
> -
> - if (!apic->lapic_timer.period)
> - return;
> - /*
> - * Do not allow the guest to program periodic timers with small
> - * interval, since the hrtimers are not throttled by the host
> - * scheduler.
> - */
> - if (apic_lvtt_period(apic)) {
> - s64 min_period = min_timer_period_us * 1000LL;
> -
> - if (apic->lapic_timer.period < min_period) {
> - pr_info_ratelimited(
> - "kvm: vcpu %i: requested %lld ns "
> - "lapic timer period limited to %lld ns\n",
> - apic->vcpu->vcpu_id,
> - apic->lapic_timer.period, min_period);
> - apic->lapic_timer.period = min_period;
> - }
> - }
> -
> hrtimer_start(&apic->lapic_timer.timer,
> - ktime_add_ns(now, apic->lapic_timer.period),
> - HRTIMER_MODE_ABS_PINNED);
> -
> - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
> - PRIx64 ", "
> - "timer initial count 0x%x, period %lldns, "
> - "expire @ 0x%016" PRIx64 ".\n", __func__,
> - APIC_BUS_CYCLE_NS, ktime_to_ns(now),
> - kvm_lapic_get_reg(apic, APIC_TMICT),
> - apic->lapic_timer.period,
> - ktime_to_ns(ktime_add_ns(now,
> - apic->lapic_timer.period)));
> + apic->lapic_timer.target_expiration,
> + HRTIMER_MODE_ABS_PINNED);
> }
>
> static bool set_target_expiration(struct kvm_lapic *apic)
> @@ -1453,22 +1417,12 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
> apic->lapic_timer.hv_timer_in_use = false;
> }
>
> -void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
> -{
> - struct kvm_lapic *apic = vcpu->arch.apic;
> -
> - WARN_ON(!apic->lapic_timer.hv_timer_in_use);
> - WARN_ON(swait_active(&vcpu->wq));
> - cancel_hv_timer(apic);
> - apic_timer_expired(apic);
> -}
> -EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
> -
> static bool start_hv_timer(struct kvm_lapic *apic)
> {
> u64 tscdeadline = apic->lapic_timer.tscdeadline;

I think things would be simpler if you change this to:

if (!kvm_x86_ops->set_hv_timer)
return false;

if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
if (!set_target_expiration(apic))
return true;
}

tscdeadline = apic->lapic_timer.tscdeadline;

You can also add a corresponding

static void start_sw_timer(struct kvm_lapic *apic)
{
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
start_sw_period(apic);
else
start_sw_tscdeadline(apic);
}

so that the caller can be just this:

if (!start_hv_timer(apic));
start_sw_timer(apic);

or in the case of kvm_lapic_expired_hv_timer:

if (apic_lvtt_period(apic) && !start_hv_timer(apic))
start_sw_period(apic);

Independent of this, patch 3 should be squashed into this one.

Thanks,

Paolo

> - if (atomic_read(&apic->lapic_timer.pending) ||
> + if ((atomic_read(&apic->lapic_timer.pending) &&
> + !apic_lvtt_period(apic)) ||
> kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
> if (apic->lapic_timer.hv_timer_in_use)
> cancel_hv_timer(apic);
> @@ -1477,7 +1431,8 @@ static bool start_hv_timer(struct kvm_lapic *apic)
> hrtimer_cancel(&apic->lapic_timer.timer);
>
> /* In case the sw timer triggered in the window */
> - if (atomic_read(&apic->lapic_timer.pending))
> + if (atomic_read(&apic->lapic_timer.pending) &&
> + !apic_lvtt_period(apic))
> cancel_hv_timer(apic);
> }
> trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
> @@ -1485,14 +1440,29 @@ static bool start_hv_timer(struct kvm_lapic *apic)
> return apic->lapic_timer.hv_timer_in_use;
> }
>
> +void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_lapic *apic = vcpu->arch.apic;
> +
> + WARN_ON(!apic->lapic_timer.hv_timer_in_use);
> + WARN_ON(swait_active(&vcpu->wq));
> + cancel_hv_timer(apic);
> + apic_timer_expired(apic);
> +
> + if (apic_lvtt_period(apic) &&
> + set_target_expiration(apic) &&
> + !start_hv_timer(apic))
> + start_sw_period(apic);
> +}
> +EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
> +
> void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
> {
> struct kvm_lapic *apic = vcpu->arch.apic;
>
> WARN_ON(apic->lapic_timer.hv_timer_in_use);
>
> - if (apic_lvtt_tscdeadline(apic))
> - start_hv_timer(apic);
> + start_hv_timer(apic);
> }
> EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
>
> @@ -1509,7 +1479,10 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
> if (atomic_read(&apic->lapic_timer.pending))
> return;
>
> - start_sw_tscdeadline(apic);
> + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
> + start_sw_period(apic);
> + else if (apic_lvtt_tscdeadline(apic))
> + start_sw_tscdeadline(apic);
> }
> EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
>
> @@ -1517,9 +1490,11 @@ static void start_apic_timer(struct kvm_lapic *apic)
> {
> atomic_set(&apic->lapic_timer.pending, 0);
>
> - if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
> - start_sw_period(apic);
> - else if (apic_lvtt_tscdeadline(apic)) {
> + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
> + if (set_target_expiration(apic) &&
> + !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
> + start_sw_period(apic);
> + } else if (apic_lvtt_tscdeadline(apic)) {
> if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
> start_sw_tscdeadline(apic);
> }
> @@ -2052,8 +2027,11 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
>
> if (atomic_read(&apic->lapic_timer.pending) > 0) {
> kvm_apic_local_deliver(apic, APIC_LVTT);
> - if (apic_lvtt_tscdeadline(apic))
> + if (!(apic_lvtt_period(apic) &&
> + kvm_lapic_hv_timer_in_use(vcpu))) {
> apic->lapic_timer.tscdeadline = 0;
> + apic->lapic_timer.target_expiration = ktime_set(0, 0);
> + }
> atomic_set(&apic->lapic_timer.pending, 0);
> }
> }
>