[PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

From: Davidlohr Bueso
Date: Wed Apr 22 2020 - 00:12:06 EST


The use of any sort of waitqueue (simple or regular) for
wait/waking vcpus has always been an overkill and semantically
wrong. Because this is per-vcpu (which is blocked) there is
only ever a single waiting vcpu, thus no need for any sort of
queue.

As such, make use of the rcuwait primitive, with the following
considerations:

- rcuwait already provides the proper barriers that serialize
concurrent waiter and waker.

- Task wakeup is done in rcu read critical region, with a
stable task pointer.

- Because there is no concurrency among waiters, we need
not worry about rcuwait_wait_event() calls corrupting
the wait->task. As a consequence, this saves the locking
done in swait when modifying the queue. This also applies
to per-vcore wait for powerpc kvm-hv.

The x86 tscdeadline_latency test mentioned in 8577370fb0cb
("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
latency is reduced by around 15-20% with this change.

Cc: Paul Mackerras <paulus@xxxxxxxxxx>
Cc: kvmarm@xxxxxxxxxxxxxxxxxxxxx
Cc: linux-mips@xxxxxxxxxxxxxxx
Signed-off-by: Davidlohr Bueso <dbueso@xxxxxxx>
---
arch/mips/kvm/mips.c | 6 ++----
arch/powerpc/include/asm/kvm_book3s.h | 2 +-
arch/powerpc/include/asm/kvm_host.h | 2 +-
arch/powerpc/kvm/book3s_hv.c | 22 ++++++++--------------
arch/powerpc/kvm/powerpc.c | 2 +-
arch/x86/kvm/lapic.c | 2 +-
include/linux/kvm_host.h | 10 +++++-----
virt/kvm/arm/arch_timer.c | 2 +-
virt/kvm/arm/arm.c | 9 +++++----
virt/kvm/async_pf.c | 3 +--
virt/kvm/kvm_main.c | 19 +++++++++----------
11 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8f05dd0a0f4e..fad6acce46e4 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -284,8 +284,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
kvm_mips_callbacks->queue_timer_int(vcpu);

vcpu->arch.wait = 0;
- if (swq_has_sleeper(&vcpu->wq))
- swake_up_one(&vcpu->wq);
+ rcuwait_wake_up(&vcpu->wait);

return kvm_mips_count_timeout(vcpu);
}
@@ -511,8 +510,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,

dvcpu->arch.wait = 0;

- if (swq_has_sleeper(&dvcpu->wq))
- swake_up_one(&dvcpu->wq);
+ rcuwait_wake_up(&dvcpu->wait);

return 0;
}
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 506e4df2d730..6e5d85ba588d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -78,7 +78,7 @@ struct kvmppc_vcore {
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
struct list_head preempt_list;
spinlock_t lock;
- struct swait_queue_head wq;
+ struct rcuwait wait;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1dc63101ffe1..337047ba4a56 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -751,7 +751,7 @@ struct kvm_vcpu_arch {
u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;

- struct swait_queue_head *wqp;
+ struct rcuwait *waitp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 93493f0cbfe8..b8d42f523ca7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu)
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int cpu;
- struct swait_queue_head *wqp;
+ struct rcuwait *wait;

- wqp = kvm_arch_vcpu_wq(vcpu);
- if (swq_has_sleeper(wqp)) {
- swake_up_one(wqp);
+ wait = kvm_arch_vcpu_get_wait(vcpu);
+ if (rcuwait_wake_up(wait))
++vcpu->stat.halt_wakeup;
- }

cpu = READ_ONCE(vcpu->arch.thread_cpu);
if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)

spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
- init_swait_queue_head(&vcore->wq);
+ rcuwait_init(&vcore->wait);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = id;
@@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
ktime_t cur, start_poll, start_wait;
int do_sleep = 1;
u64 block_ns;
- DECLARE_SWAITQUEUE(wait);

/* Poll for pending exceptions and ceded state */
cur = start_poll = ktime_get();
@@ -3812,10 +3809,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
}
}

- prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
-
if (kvmppc_vcore_check_block(vc)) {
- finish_swait(&vc->wq, &wait);
do_sleep = 0;
/* If we polled, count this as a successful poll */
if (vc->halt_poll_ns)
@@ -3828,8 +3822,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_SLEEPING;
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
- schedule();
- finish_swait(&vc->wq, &wait);
+ rcuwait_wait_event(&vc->wait,
+ kvmppc_vcore_check_block(vc), TASK_INTERRUPTIBLE);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
@@ -3940,7 +3934,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- swake_up_one(&vc->wq);
+ rcuwait_wake_up(&vc->wait);
}

}
@@ -4279,7 +4273,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
user_vrsave = mfspr(SPRN_VRSAVE);

- vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+ vcpu->arch.waitp = &vcpu->arch.vcore->wait;
vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e15166b0a16d..4a074b587520 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -751,7 +751,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
goto out_vcpu_uninit;

- vcpu->arch.wqp = &vcpu->wq;
+ vcpu->arch.waitp = &vcpu->wait;
kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
return 0;

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9af25c97612a..fb2f56ba171d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1833,7 +1833,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
/* If the preempt notifier has already run, it also called apic_timer_expired */
if (!apic->lapic_timer.hv_timer_in_use)
goto out;
- WARN_ON(swait_active(&vcpu->wq));
+ WARN_ON(rcu_dereference(vcpu->wait.task));
cancel_hv_timer(apic);
apic_timer_expired(apic);

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6d58beb65454..fc34021546bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,7 +23,7 @@
#include <linux/irqflags.h>
#include <linux/context_tracking.h>
#include <linux/irqbypass.h>
-#include <linux/swait.h>
+#include <linux/rcuwait.h>
#include <linux/refcount.h>
#include <linux/nospec.h>
#include <asm/signal.h>
@@ -277,7 +277,7 @@ struct kvm_vcpu {
struct mutex mutex;
struct kvm_run *run;

- struct swait_queue_head wq;
+ struct rcuwait wait;
struct pid __rcu *pid;
int sigset_active;
sigset_t sigset;
@@ -956,12 +956,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
}
#endif

-static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
{
#ifdef __KVM_HAVE_ARCH_WQP
- return vcpu->arch.wqp;
+ return vcpu->arch.waitp;
#else
- return &vcpu->wq;
+ return &vcpu->wait;
#endif
}

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 93bd59b46848..b2805105bbe5 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);

- if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+ if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
kvm_timer_blocking(vcpu);

/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 48d0ec44ad77..f94a10bb1251 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)

kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.pause = false;
- swake_up_one(kvm_arch_vcpu_wq(vcpu));
+ rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
}
}

static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
{
- struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);

- swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
- (!vcpu->arch.pause)));
+ rcuwait_wait_event(*wait,
+ (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+ TASK_INTERRUPTIBLE);

if (vcpu->arch.power_off || vcpu->arch.pause) {
/* Awaken to handle a signal, request we sleep again later. */
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 15e5b037f92d..10b533f641a6 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,8 +80,7 @@ static void async_pf_execute(struct work_struct *work)

trace_kvm_async_pf_completed(addr, cr2_or_gpa);

- if (swq_has_sleeper(&vcpu->wq))
- swake_up_one(&vcpu->wq);
+ rcuwait_wake_up(&vcpu->wait);

mmput(mm);
kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 74bdb7bf3295..835fb109badf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -341,7 +341,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
- init_swait_queue_head(&vcpu->wq);
+ rcuwait_init(&vcpu->wait);
kvm_async_pf_vcpu_init(vcpu);

vcpu->pre_pcpu = -1;
@@ -2671,7 +2671,6 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
ktime_t start, cur;
- DECLARE_SWAITQUEUE(wait);
bool waited = false;
u64 block_ns;

@@ -2697,8 +2696,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
} while (single_task_running() && ktime_before(cur, stop));
}

+ prepare_to_rcuwait(&vcpu->wait);
for (;;) {
- prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE);

if (kvm_vcpu_check_block(vcpu) < 0)
break;
@@ -2706,8 +2706,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
waited = true;
schedule();
}
-
- finish_swait(&vcpu->wq, &wait);
+ finish_rcuwait(&vcpu->wait);
cur = ktime_get();
out:
kvm_arch_vcpu_unblocking(vcpu);
@@ -2738,11 +2737,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block);

bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
- struct swait_queue_head *wqp;
+ struct rcuwait *wait;

- wqp = kvm_arch_vcpu_wq(vcpu);
- if (swq_has_sleeper(wqp)) {
- swake_up_one(wqp);
+ wait = kvm_arch_vcpu_get_wait(vcpu);
+ if (rcuwait_wake_up(wait)) {
WRITE_ONCE(vcpu->ready, true);
++vcpu->stat.halt_wakeup;
return true;
@@ -2884,7 +2882,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
if (vcpu == me)
continue;
- if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+ if (rcu_dereference(vcpu->wait.task) &&
+ !vcpu_dy_runnable(vcpu))
continue;
if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
!kvm_arch_vcpu_in_kernel(vcpu))
--
2.16.4