[PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode

From: Yin Fengwei
Date: Wed Oct 23 2019 - 03:49:50 EST


In function acpi_idle_do_entry(), an ioport access is used for dummy
wait to guarantee hardware behavior. But it could trigger unnecessary
vmexit if kernel is running as guest in virtualization environtment.

If it's in virtualization environment, the deeper C state enter
operation (inb()) will trap to hyervisor. It's not needed to do
dummy wait after the inb() call. So we remove the dummy io port
access to avoid unnecessary VMexit.

We keep dummy io port access to maintain timing for native environment.

Signed-off-by: Yin Fengwei <fengwei.yin@xxxxxxxxx>
---
ChangeLog:
v2 -> v3:
- Remove dummy io port access totally for virtualization env.

v1 -> v2:
- Use ndelay instead of dead loop for dummy delay.

drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ed56c6d20b08..0c4a97dd6917 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
static
DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);

+static void (*dummy_wait)(u64 address);
+
+static void default_dummy_wait(u64 address)
+{
+ inl(address);
+}
+
+static void default_noop_wait(u64 address)
+{
+}
+
static int disabled_by_idle_boot_param(void)
{
return boot_option_idle_override == IDLE_POLL ||
@@ -660,8 +671,13 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
inb(cx->address);
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
- gets asserted in time to freeze execution properly. */
- inl(acpi_gbl_FADT.xpm_timer_block.address);
+ gets asserted in time to freeze execution properly.
+
+ This dummy wait is only needed for native env. If we are running
+ as guest of a hypervisor, we don't need wait op here. We have
+ different implementation for dummy_wait on native/virtual env. */
+
+ dummy_wait(acpi_gbl_FADT.xpm_timer_block.address);
}
}

@@ -683,7 +699,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
inb(cx->address);
/* See comment in acpi_idle_do_entry() */
- inl(acpi_gbl_FADT.xpm_timer_block.address);
+ dummy_wait(acpi_gbl_FADT.xpm_timer_block.address);
} else
return -ENODEV;
}
@@ -912,6 +928,20 @@ static inline void acpi_processor_cstate_first_run_checks(void)
max_cstate);
first_run++;

+ dummy_wait = default_dummy_wait;
+
+#ifdef CONFIG_X86
+ /* For x86, if we are running in guest, we don't need extra
+ * access ioport as dummy wait.
+ */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ pr_err("We are in virtual env");
+ dummy_wait = default_noop_wait;
+ } else {
+ pr_err("We are not in virtual env");
+ }
+#endif
+
if (acpi_gbl_FADT.cst_control && !nocst) {
status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
acpi_gbl_FADT.cst_control, 8);
--
2.19.1