[patch -rt 1/2] KVM: use simple waitqueue for vcpu->wq

From: Marcelo Tosatti
Date: Wed Jan 14 2015 - 12:16:27 EST


The problem:

On -RT, an emulated LAPIC timer instances has the following path:

1) hard interrupt
2) ksoftirqd is scheduled
3) ksoftirqd wakes up vcpu thread
4) vcpu thread is scheduled

This extra context switch introduces unnecessary latency in the
LAPIC path for a KVM guest.

The solution:

Allow waking up vcpu thread from hardirq context,
thus avoiding the need for ksoftirqd to be scheduled.

Normal waitqueues make use of spinlocks, which on -RT
are sleepable locks. Therefore, waking up a waitqueue
waiter involves locking a sleeping lock, which
is not allowed from hard interrupt context.

cyclictest command line:
# cyclictest -m -n -q -p99 -l 1000000 -h60 -D 1m

This patch reduces the average latency in my tests from 14us to 11us.

Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

---
arch/arm/kvm/arm.c | 4 ++--
arch/arm/kvm/psci.c | 4 ++--
arch/mips/kvm/kvm_mips.c | 8 ++++----
arch/powerpc/include/asm/kvm_host.h | 4 ++--
arch/powerpc/kvm/book3s_hv.c | 20 ++++++++++----------
arch/s390/include/asm/kvm_host.h | 2 +-
arch/s390/kvm/interrupt.c | 22 ++++++++++------------
arch/s390/kvm/sigp.c | 16 ++++++++--------
arch/x86/kvm/lapic.c | 6 +++---
include/linux/kvm_host.h | 4 ++--
virt/kvm/async_pf.c | 4 ++--
virt/kvm/kvm_main.c | 16 ++++++++--------
12 files changed, 54 insertions(+), 56 deletions(-)

Index: linux-stable-rt/arch/arm/kvm/arm.c
===================================================================
--- linux-stable-rt.orig/arch/arm/kvm/arm.c 2014-11-25 14:13:39.188899952 -0200
+++ linux-stable-rt/arch/arm/kvm/arm.c 2014-11-25 14:14:38.620810092 -0200
@@ -495,9 +495,9 @@

static void vcpu_pause(struct kvm_vcpu *vcpu)
{
- wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+ struct swait_head *wq = kvm_arch_vcpu_wq(vcpu);

- wait_event_interruptible(*wq, !vcpu->arch.pause);
+ swait_event_interruptible(*wq, !vcpu->arch.pause);
}

static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
Index: linux-stable-rt/arch/arm/kvm/psci.c
===================================================================
--- linux-stable-rt.orig/arch/arm/kvm/psci.c 2014-11-25 14:13:39.189899951 -0200
+++ linux-stable-rt/arch/arm/kvm/psci.c 2014-11-25 14:14:38.620810092 -0200
@@ -36,7 +36,7 @@
{
struct kvm *kvm = source_vcpu->kvm;
struct kvm_vcpu *vcpu = NULL, *tmp;
- wait_queue_head_t *wq;
+ struct swait_head *wq;
unsigned long cpu_id;
unsigned long mpidr;
phys_addr_t target_pc;
@@ -80,7 +80,7 @@
smp_mb(); /* Make sure the above is visible */

wq = kvm_arch_vcpu_wq(vcpu);
- wake_up_interruptible(wq);
+ swait_wake_interruptible(wq);

return KVM_PSCI_RET_SUCCESS;
}
Index: linux-stable-rt/arch/mips/kvm/kvm_mips.c
===================================================================
--- linux-stable-rt.orig/arch/mips/kvm/kvm_mips.c 2014-11-25 14:13:39.191899948 -0200
+++ linux-stable-rt/arch/mips/kvm/kvm_mips.c 2014-11-25 14:14:38.621810091 -0200
@@ -464,8 +464,8 @@

dvcpu->arch.wait = 0;

- if (waitqueue_active(&dvcpu->wq)) {
- wake_up_interruptible(&dvcpu->wq);
+ if (swaitqueue_active(&dvcpu->wq)) {
+ swait_wake_interruptible(&dvcpu->wq);
}

return 0;
@@ -971,8 +971,8 @@
kvm_mips_callbacks->queue_timer_int(vcpu);

vcpu->arch.wait = 0;
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (swaitqueue_active(&vcpu->wq)) {
+ swait_wake_nterruptible(&vcpu->wq);
}
}

Index: linux-stable-rt/arch/powerpc/include/asm/kvm_host.h
===================================================================
--- linux-stable-rt.orig/arch/powerpc/include/asm/kvm_host.h 2014-11-25 14:13:39.193899944 -0200
+++ linux-stable-rt/arch/powerpc/include/asm/kvm_host.h 2014-11-25 14:14:38.621810091 -0200
@@ -295,7 +295,7 @@
u8 in_guest;
struct list_head runnable_threads;
spinlock_t lock;
- wait_queue_head_t wq;
+ struct swait_head wq;
u64 stolen_tb;
u64 preempt_tb;
struct kvm_vcpu *runner;
@@ -612,7 +612,7 @@
u8 prodded;
u32 last_inst;

- wait_queue_head_t *wqp;
+ struct swait_head *wqp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
Index: linux-stable-rt/arch/powerpc/kvm/book3s_hv.c
===================================================================
--- linux-stable-rt.orig/arch/powerpc/kvm/book3s_hv.c 2014-11-25 14:13:39.195899942 -0200
+++ linux-stable-rt/arch/powerpc/kvm/book3s_hv.c 2014-11-25 14:14:38.625810085 -0200
@@ -74,11 +74,11 @@
{
int me;
int cpu = vcpu->cpu;
- wait_queue_head_t *wqp;
+ struct swait_head *wqp;

wqp = kvm_arch_vcpu_wq(vcpu);
- if (waitqueue_active(wqp)) {
- wake_up_interruptible(wqp);
+ if (swaitqueue_active(wqp)) {
+ swait_wake_interruptible(wqp);
++vcpu->stat.halt_wakeup;
}

@@ -583,8 +583,8 @@
tvcpu->arch.prodded = 1;
smp_mb();
if (vcpu->arch.ceded) {
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
+ if (swaitqueue_active(&vcpu->wq)) {
+ swait_wake_interruptible(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -1199,7 +1199,7 @@
if (vcore) {
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
- init_waitqueue_head(&vcore->wq);
+ init_swait_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_core;
@@ -1566,13 +1566,13 @@
*/
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
- DEFINE_WAIT(wait);
+ DEFINE_SWAITER(wait);

- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ swait_prepare(&vc->wq, &wait, TASK_INTERRUPTIBLE);
vc->vcore_state = VCORE_SLEEPING;
spin_unlock(&vc->lock);
schedule();
- finish_wait(&vc->wq, &wait);
+ swait_finish(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
}
@@ -1613,7 +1613,7 @@
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- wake_up(&vc->wq);
+ swait_wake(&vc->wq);
}

}
Index: linux-stable-rt/arch/s390/include/asm/kvm_host.h
===================================================================
--- linux-stable-rt.orig/arch/s390/include/asm/kvm_host.h 2014-11-25 14:13:39.196899940 -0200
+++ linux-stable-rt/arch/s390/include/asm/kvm_host.h 2014-11-25 14:14:38.627810082 -0200
@@ -233,7 +233,7 @@
atomic_t active;
struct kvm_s390_float_interrupt *float_int;
int timer_due; /* event indicator for waitqueue below */
- wait_queue_head_t *wq;
+ struct swait_head *wq;
atomic_t *cpuflags;
unsigned int action_bits;
};
Index: linux-stable-rt/arch/s390/kvm/interrupt.c
===================================================================
--- linux-stable-rt.orig/arch/s390/kvm/interrupt.c 2014-11-25 14:13:39.197899939 -0200
+++ linux-stable-rt/arch/s390/kvm/interrupt.c 2014-11-25 14:14:38.630810077 -0200
@@ -403,7 +403,7 @@
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
{
u64 now, sltime;
- DECLARE_WAITQUEUE(wait, current);
+ DECLARE_SWAITER(wait);

vcpu->stat.exit_wait_state++;
if (kvm_cpu_has_interrupt(vcpu))
@@ -440,12 +440,11 @@
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
- add_wait_queue(&vcpu->wq, &wait);
+ swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
while (list_empty(&vcpu->arch.local_int.list) &&
list_empty(&vcpu->arch.local_int.float_int->list) &&
(!vcpu->arch.local_int.timer_due) &&
!signal_pending(current)) {
- set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
schedule();
@@ -453,8 +452,7 @@
spin_lock_bh(&vcpu->arch.local_int.lock);
}
__unset_cpu_idle(vcpu);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&vcpu->wq, &wait);
+ swait_finish(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -469,8 +467,8 @@

spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1;
- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
+ if (swaitqueue_active(&vcpu->wq))
+ swait_wake_interruptible(&vcpu->wq);
spin_unlock(&vcpu->arch.local_int.lock);
}

@@ -617,7 +615,7 @@
spin_lock_bh(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
- BUG_ON(waitqueue_active(li->wq));
+ BUG_ON(swaitqueue_active(li->wq));
spin_unlock_bh(&li->lock);
return 0;
}
@@ -750,8 +748,8 @@
li = fi->local_int[sigcpu];
spin_lock_bh(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ if (swaitqueue_active(li->wq))
+ swait_wake_interruptible(li->wq);
spin_unlock_bh(&li->lock);
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
@@ -836,8 +834,8 @@
if (inti->type == KVM_S390_SIGP_STOP)
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
+ if (swaitqueue_active(&vcpu->wq))
+ swait_wake_interruptible(&vcpu->wq);
spin_unlock_bh(&li->lock);
mutex_unlock(&vcpu->kvm->lock);
return 0;
Index: linux-stable-rt/arch/s390/kvm/sigp.c
===================================================================
--- linux-stable-rt.orig/arch/s390/kvm/sigp.c 2014-11-25 14:13:39.198899937 -0200
+++ linux-stable-rt/arch/s390/kvm/sigp.c 2014-11-25 14:14:38.633810073 -0200
@@ -79,8 +79,8 @@
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ if (swaitqueue_active(li->wq))
+ swait_wake_interruptible(li->wq);
spin_unlock_bh(&li->lock);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
@@ -148,8 +148,8 @@
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ if (swaitqueue_active(li->wq))
+ swait_wake_interruptible(li->wq);
spin_unlock_bh(&li->lock);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
@@ -179,8 +179,8 @@
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
li->action_bits |= action;
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ if (swaitqueue_active(li->wq))
+ swait_wake_interruptible(li->wq);
out:
spin_unlock_bh(&li->lock);

@@ -288,8 +288,8 @@

list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ if (swaitqueue_active(li->wq))
+ swait_wake_interruptible(li->wq);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;

VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
Index: linux-stable-rt/arch/x86/kvm/lapic.c
===================================================================
--- linux-stable-rt.orig/arch/x86/kvm/lapic.c 2014-11-25 14:13:39.199899935 -0200
+++ linux-stable-rt/arch/x86/kvm/lapic.c 2015-01-14 15:02:41.135771065 -0200
@@ -1533,7 +1533,7 @@
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
struct kvm_vcpu *vcpu = apic->vcpu;
- wait_queue_head_t *q = &vcpu->wq;
+ struct swait_head *q = &vcpu->wq;

/*
* There is a race window between reading and incrementing, but we do
@@ -1547,8 +1547,8 @@
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
}

- if (waitqueue_active(q))
- wake_up_interruptible(q);
+ if (swaitqueue_active(q))
+ swait_wake_interruptible(q);

if (lapic_is_periodic(apic)) {
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
Index: linux-stable-rt/include/linux/kvm_host.h
===================================================================
--- linux-stable-rt.orig/include/linux/kvm_host.h 2014-11-25 14:13:39.201899932 -0200
+++ linux-stable-rt/include/linux/kvm_host.h 2014-11-25 14:14:38.638810065 -0200
@@ -231,7 +231,7 @@

int fpu_active;
int guest_fpu_loaded, guest_xcr0_loaded;
- wait_queue_head_t wq;
+ struct swait_head wq;
struct pid *pid;
int sigset_active;
sigset_t sigset;
@@ -677,7 +677,7 @@
}
#endif

-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct swait_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
{
#ifdef __KVM_HAVE_ARCH_WQP
return vcpu->arch.wqp;
Index: linux-stable-rt/virt/kvm/async_pf.c
===================================================================
--- linux-stable-rt.orig/virt/kvm/async_pf.c 2014-11-25 14:13:39.202899931 -0200
+++ linux-stable-rt/virt/kvm/async_pf.c 2014-11-25 14:14:38.639810063 -0200
@@ -82,8 +82,8 @@

trace_kvm_async_pf_completed(addr, gva);

- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
+ if (swaitqueue_active(&vcpu->wq))
+ swait_wake_interruptible(&vcpu->wq);

mmput(mm);
kvm_put_kvm(vcpu->kvm);
Index: linux-stable-rt/virt/kvm/kvm_main.c
===================================================================
--- linux-stable-rt.orig/virt/kvm/kvm_main.c 2014-11-25 14:13:39.204899928 -0200
+++ linux-stable-rt/virt/kvm/kvm_main.c 2014-11-25 14:14:38.641810060 -0200
@@ -219,7 +219,7 @@
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
- init_waitqueue_head(&vcpu->wq);
+ init_swait_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);

page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -1675,10 +1675,10 @@
*/
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
- DEFINE_WAIT(wait);
+ DEFINE_SWAITER(wait);

for (;;) {
- prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+ swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);

if (kvm_arch_vcpu_runnable(vcpu)) {
kvm_make_request(KVM_REQ_UNHALT, vcpu);
@@ -1692,7 +1692,7 @@
schedule();
}

- finish_wait(&vcpu->wq, &wait);
+ swait_finish(&vcpu->wq, &wait);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);

@@ -1704,11 +1704,11 @@
{
int me;
int cpu = vcpu->cpu;
- wait_queue_head_t *wqp;
+ struct swait_head *wqp;

wqp = kvm_arch_vcpu_wq(vcpu);
- if (waitqueue_active(wqp)) {
- wake_up_interruptible(wqp);
+ if (swaitqueue_active(wqp)) {
+ swait_wake_interruptible(wqp);
++vcpu->stat.halt_wakeup;
}

@@ -1814,7 +1814,7 @@
continue;
if (vcpu == me)
continue;
- if (waitqueue_active(&vcpu->wq))
+ if (swaitqueue_active(&vcpu->wq))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/