Re: [PATCH v3] KVM: LAPIC: Fix lapic timer injection delay
From: Wanpeng Li
Date: Wed Jun 28 2017 - 23:42:32 EST
2017-06-28 22:30 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>:
>
>
> On 28/06/2017 16:27, Wanpeng Li wrote:
>> 2017-06-28 20:10 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>:
>>>
>>>
>>> On 28/06/2017 03:29, Wanpeng Li wrote:
>>>> u64 tscdeadline = apic->lapic_timer.tscdeadline;
>>>> + int ret = 0;
>>>>
>>>> if ((atomic_read(&apic->lapic_timer.pending) &&
>>>> !apic_lvtt_period(apic)) ||
>>>> - kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
>>>> + (ret = kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline))) {
>>>> if (apic->lapic_timer.hv_timer_in_use)
>>>> cancel_hv_timer(apic);
>>>> + if (ret == 1) {
>>>> + apic_timer_expired(apic);
>>>> + return true;
>>>> + }
>>>
>>> The preemption timer can also be used for modes other than TSC deadline.
>>>
>>> In periodic mode, your patch would miss a call to
>>> advance_periodic_target_expiration, which is only called by
>>> kvm_lapic_expired_hv_timer.
>>>
>>> You could use something like this:
>>>
>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>> index d24c8742d9b0..15b751aa7625 100644
>>> --- a/arch/x86/kvm/lapic.c
>>> +++ b/arch/x86/kvm/lapic.c
>>> @@ -1504,21 +1504,26 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
>>> static bool start_hv_timer(struct kvm_lapic *apic)
>>> {
>>> u64 tscdeadline = apic->lapic_timer.tscdeadline;
>>> + bool need_cancel = apic->lapic_timer.hv_timer_in_use;
>>> + if (!atomic_read(&apic->lapic_timer.pending) || apic_lvtt_period(apic)) {
>>> + int r = kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline);
>>> + if (r >= 0) {
>>> + need_cancel = false;
>>> + apic->lapic_timer.hv_timer_in_use = true;
>>> + hrtimer_cancel(&apic->lapic_timer.timer);
>>>
>>> - if ((atomic_read(&apic->lapic_timer.pending) &&
>>> - !apic_lvtt_period(apic)) ||
>>> - kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
>>> - if (apic->lapic_timer.hv_timer_in_use)
>>> - cancel_hv_timer(apic);
>>> - } else {
>>> - apic->lapic_timer.hv_timer_in_use = true;
>>> - hrtimer_cancel(&apic->lapic_timer.timer);
>>> -
>>> - /* In case the sw timer triggered in the window */
>>> - if (atomic_read(&apic->lapic_timer.pending) &&
>>> - !apic_lvtt_period(apic))
>>> - cancel_hv_timer(apic);
>>> + /* In case the sw timer triggered in the window */
>>> + if (atomic_read(&apic->lapic_timer.pending) &&
>>> + !apic_lvtt_period(apic))
>>> + need_cancel = true;
>>> + else if (r)
>>> + kvm_lapic_expired_hv_timer(vcpu);
>>> + }
>>> }
>>> +
>>> + if (need_cancel)
>>> + cancel_hv_timer(apic);
>>> +
>>> trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
>>> apic->lapic_timer.hv_timer_in_use);
>>> return apic->lapic_timer.hv_timer_in_use;
>>>
>>> but I'm afraid of introducing a mutual recursion between
>>> start_hv_timer and kvm_lapic_expired_hv_timer.
>>
>> We can just handle the apic timer oneshot/tscdeadline mode instead of
>> periodic mode just like which is emulated by hrtimer to avoid the
>> mutual recusion, what do you think?
>
> In that case, set_hv_timer should probably always enable the preemption
> timer. You can then cancel it if it returns 1 _and_ the APIC timer's
> mode is oneshot/tscdeadline.
>
> Paolo
So how about something like this?
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d24c874..900ce86 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1504,21 +1504,26 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
static bool start_hv_timer(struct kvm_lapic *apic)
{
u64 tscdeadline = apic->lapic_timer.tscdeadline;
+ bool need_cancel = apic->lapic_timer.hv_timer_in_use;
+ if (!atomic_read(&apic->lapic_timer.pending) || apic_lvtt_period(apic)) {
+ int r = kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline);
+ if (r >= 0) {
+ need_cancel = false;
+ apic->lapic_timer.hv_timer_in_use = true;
+ hrtimer_cancel(&apic->lapic_timer.timer);
+
+ /* In case the sw timer triggered in the window */
+ if (atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic))
+ need_cancel = true;
+ else if (r && (apic_lvtt_oneshot(apic) ||
apic_lvtt_tscdeadline(apic)))
+ apic_timer_expired(apic);
+ }
+ }
- if ((atomic_read(&apic->lapic_timer.pending) &&
- !apic_lvtt_period(apic)) ||
- kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
- if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_timer(apic);
- } else {
- apic->lapic_timer.hv_timer_in_use = true;
- hrtimer_cancel(&apic->lapic_timer.timer);
+ if (need_cancel)
+ cancel_hv_timer(apic);
- /* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending) &&
- !apic_lvtt_period(apic))
- cancel_hv_timer(apic);
- }
trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
apic->lapic_timer.hv_timer_in_use);
return apic->lapic_timer.hv_timer_in_use;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4f616db..0a0e696 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11125,6 +11125,9 @@ static int vmx_set_hv_timer(struct kvm_vcpu
*vcpu, u64 guest_deadline_tsc)
u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+ if (delta_tsc == 0)
+ return 1;
+
/* Convert to host delta tsc if tsc scaling is enabled */
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
u64_shl_div_u64(delta_tsc,