Re: [PATCH] KVM: Boost vCPU candidiate in user mode which is delivering interrupt

From: Wanpeng Li
Date: Tue Apr 20 2021 - 02:09:09 EST


On Tue, 20 Apr 2021 at 14:02, Wanpeng Li <kernellwp@xxxxxxxxx> wrote:
>
> On Tue, 20 Apr 2021 at 00:59, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
> >
> > On 19/04/21 18:32, Sean Christopherson wrote:
> > > If false positives are a big concern, what about adding another pass to the loop
> > > and only yielding to usermode vCPUs with interrupts in the second full pass?
> > > I.e. give vCPUs that are already in kernel mode priority, and only yield to
> > > handle an interrupt if there are no vCPUs in kernel mode.
> > >
> > > kvm_arch_dy_runnable() pulls in pv_unhalted, which seems like a good thing.
> >
> > pv_unhalted won't help if you're waiting for a kernel spinlock though,
> > would it? Doing two passes (or looking for a "best" candidate that
> > prefers kernel mode vCPUs to user mode vCPUs waiting for an interrupt)
> > seems like the best choice overall.
>
> How about something like this:

Sorry, should be the codes below:

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6b4dd95..9bc5f87 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -325,10 +325,12 @@ struct kvm_vcpu {
* Cpu relax intercept or pause loop exit optimization
* in_spin_loop: set when a vcpu does a pause loop exit
* or cpu relax intercepted.
+ * pending_interrupt: set when a vcpu waiting for an interrupt
* dy_eligible: indicates whether vcpu is eligible for directed yield.
*/
struct {
bool in_spin_loop;
+ bool pending_interrupt;
bool dy_eligible;
} spin_loop;
#endif
@@ -1427,6 +1429,12 @@ static inline void
kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
{
vcpu->spin_loop.in_spin_loop = val;
}
+
+static inline void kvm_vcpu_set_pending_interrupt(struct kvm_vcpu
*vcpu, bool val)
+{
+ vcpu->spin_loop.pending_interrupt = val;
+}
+
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
vcpu->spin_loop.dy_eligible = val;
@@ -1438,6 +1446,10 @@ static inline void
kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
{
}

+static inline void kvm_vcpu_set_pending_interrupt(struct kvm_vcpu
*vcpu, bool val)
+{
+}
+
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 529cff1..bf6f1ec 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -410,6 +410,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu,
struct kvm *kvm, unsigned id)
INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);

kvm_vcpu_set_in_spin_loop(vcpu, false);
+ kvm_vcpu_set_pending_interrupt(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false;
vcpu->ready = false;
@@ -3079,14 +3080,17 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
* Helper that checks whether a VCPU is eligible for directed yield.
* Most eligible candidate to yield is decided by following heuristics:
*
- * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
- * (preempted lock holder), indicated by @in_spin_loop.
- * Set at the beginning and cleared at the end of interception/PLE handler.
+ * (a) VCPU which has not done pl-exit or cpu relax intercepted and is not
+ * waiting for an interrupt recently (preempted lock holder). The former
+ * one is indicated by @in_spin_loop, set at the beginning and cleared at
+ * the end of interception/PLE handler. The later one is indicated by
+ * @pending_interrupt, set when interrupt is delivering and cleared at
+ * the end of directed yield.
*
- * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
- * chance last time (mostly it has become eligible now since we have probably
- * yielded to lockholder in last iteration. This is done by toggling
- * @dy_eligible each time a VCPU checked for eligibility.)
+ * (b) VCPU which has done pl-exit/ cpu relax intercepted or is waiting for
+ * interrupt but did not get chance last time (mostly it has become eligible
+ * now since we have probably yielded to lockholder in last iteration. This
+ * is done by toggling @dy_eligible each time a VCPU checked for eligibility.)
*
* Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
* to preempted lock-holder could result in wrong VCPU selection and CPU
@@ -3102,10 +3106,10 @@ static bool
kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
bool eligible;

- eligible = !vcpu->spin_loop.in_spin_loop ||
+ eligible = !(vcpu->spin_loop.in_spin_loop ||
vcpu->spin_loop.pending_interrupt) ||
vcpu->spin_loop.dy_eligible;

- if (vcpu->spin_loop.in_spin_loop)
+ if (vcpu->spin_loop.in_spin_loop || vcpu->spin_loop.pending_interrupt)
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);

return eligible;
@@ -3137,6 +3141,16 @@ static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
return false;
}

+static bool kvm_has_interrupt_delivery(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_dy_runnable(vcpu)) {
+ kvm_vcpu_set_pending_interrupt(vcpu, true);
+ return true;
+ }
+
+ return false;
+}
+
void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
struct kvm *kvm = me->kvm;
@@ -3170,6 +3184,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool
yield_to_kernel_mode)
!vcpu_dy_runnable(vcpu))
continue;
if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
+ !kvm_has_interrupt_delivery(vcpu) &&
!kvm_arch_vcpu_in_kernel(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
@@ -3177,6 +3192,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool
yield_to_kernel_mode)

yielded = kvm_vcpu_yield_to(vcpu);
if (yielded > 0) {
+ kvm_vcpu_set_pending_interrupt(vcpu, false);
kvm->last_boosted_vcpu = i;
break;
} else if (yielded < 0) {