[PATCH] KVM: x86: use TPAUSE to replace PAUSE in halt polling

From: Dapeng Mi
Date: Wed Aug 24 2022 - 05:06:56 EST


TPAUSE is a new instruction on Intel processors which can instruct
processor enters a power/performance optimized state. Halt polling
uses PAUSE instruction to wait vCPU is waked up. The polling time
could be long and cause extra power consumption in some cases.

Use TPAUSE to replace the PAUSE instruction in halt polling to get
a better power saving and performance.

Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxx>
---
drivers/cpuidle/poll_state.c | 3 ++-
include/linux/kvm_host.h | 20 ++++++++++++++++++++
virt/kvm/kvm_main.c | 2 +-
3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index f7e83613ae94..51ec333cbf80 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/sched/idle.h>
+#include <linux/kvm_host.h>

#define POLL_IDLE_RELAX_COUNT 200

@@ -25,7 +26,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
limit = cpuidle_poll_time(drv, dev);

while (!need_resched()) {
- cpu_relax();
+ kvm_cpu_poll_pause(limit);
if (loop_count++ < POLL_IDLE_RELAX_COUNT)
continue;

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f4519d3689e1..810e749949b7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -35,6 +35,7 @@
#include <linux/interval_tree.h>
#include <linux/rbtree.h>
#include <linux/xarray.h>
+#include <linux/delay.h>
#include <asm/signal.h>

#include <linux/kvm.h>
@@ -2247,6 +2248,25 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */

+/*
+ * This function is intended to replace the cpu_relax function in
+ * halt polling. If TPAUSE instruction is supported, use TPAUSE
+ * instead fo PAUSE to get better power saving and performance.
+ * Selecting 1 us is a compromise between scheduling latency and
+ * power saving time.
+ */
+static inline void kvm_cpu_poll_pause(u64 timeout_ns)
+{
+#ifdef CONFIG_X86
+ if (static_cpu_has(X86_FEATURE_WAITPKG) && timeout_ns > 1000)
+ udelay(1);
+ else
+ cpu_relax();
+#else
+ cpu_relax();
+#endif
+}
+
/*
* This defines how many reserved entries we want to keep before we
* kick the vcpu to the userspace to avoid dirty ring full. This
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 584a5bab3af3..4afa776d21bd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3510,7 +3510,7 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
*/
if (kvm_vcpu_check_block(vcpu) < 0)
goto out;
- cpu_relax();
+ kvm_cpu_poll_pause(vcpu->halt_poll_ns);
poll_end = cur = ktime_get();
} while (kvm_vcpu_can_poll(cur, stop));
}
--
2.34.1