[RFC v2-fix-v3 1/1] x86/tdx: Skip WBINVD instruction for TDX guest

From: Kuppuswamy Sathyanarayanan
Date: Tue Jun 08 2021 - 17:35:46 EST


Current TDX spec does not have support to emulate the WBINVD
instruction. So, add support to skip WBINVD instruction in
drivers that are currently enabled in the TDX guest.

Functionally only devices outside the CPU (such as DMA devices,
or persistent memory for flushing) can notice the external side
effects from WBINVD's cache flushing for write back mappings.
One exception here is MKTME, but that is not visible outside
the TDX module and not possible inside a TDX guest.

Currently TDX does not support DMA, because DMA typically needs
uncached access for MMIO, and the current TDX module always
sets the IgnorePAT bit, which prevents that.
   
Persistent memory is also currently not supported. Another code
path that uses WBINVD is the MTRR driver, but EPT/virtualization
always disables MTRRs so those are not needed. This all implies
WBINVD is not needed with current TDX.

So, most drivers/code-paths that use wbinvd instructions are
already disabled for TDX guest platforms via config-option/BIOS.
Following are the list of drivers that use wbinvd instruction
and are still enabled for TDX guests.
   
drivers/acpi/sleep.c
drivers/acpi/acpica/hwsleep.c
   
Since cache is always coherent in TDX guests, making wbinvd as
noop should not cause any issues. This behavior is the same as
KVM guest.
   
Also, hwsleep shouldn't happen for TDX guest because the TDX
BIOS won't enable it, but it's better to disable it anyways

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx>
---

Changes since RFC v2-fix-v2:
* Instead of handling WBINVD #VE exception as nop, we skip its
usage in currently enabled drivers.
* Adapted commit log for above change.

arch/x86/kernel/tdx.c | 1 +
drivers/acpi/acpica/hwsleep.c | 12 +++++++++---
drivers/acpi/sleep.c | 26 +++++++++++++++++++++++---
include/linux/protected_guest.h | 2 ++
4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
index 1caf9fa5bb30..e33928131e6a 100644
--- a/arch/x86/kernel/tdx.c
+++ b/arch/x86/kernel/tdx.c
@@ -100,6 +100,7 @@ bool tdx_protected_guest_has(unsigned long flag)
case PR_GUEST_MEM_ENCRYPT_ACTIVE:
case PR_GUEST_UNROLL_STRING_IO:
case PR_GUEST_SHARED_MAPPING_INIT:
+ case PR_GUEST_DISABLE_WBINVD:
return true;
}

diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index 14baa13bf848..9d40df1b8a74 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -9,6 +9,7 @@
*****************************************************************************/

#include <acpi/acpi.h>
+#include <linux/protected_guest.h>
#include "accommon.h"

#define _COMPONENT ACPI_HARDWARE
@@ -108,9 +109,14 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state)
pm1a_control |= sleep_enable_reg_info->access_bit_mask;
pm1b_control |= sleep_enable_reg_info->access_bit_mask;

- /* Flush caches, as per ACPI specification */
-
- ACPI_FLUSH_CPU_CACHE();
+ /*
+ * WBINVD instruction is not supported in TDX
+ * guest. Since ACPI_FLUSH_CPU_CACHE() uses
+ * WBINVD, skip cache flushes for TDX guests.
+ */
+ if (prot_guest_has(PR_GUEST_DISABLE_WBINVD))
+ /* Flush caches, as per ACPI specification */
+ ACPI_FLUSH_CPU_CACHE();

status = acpi_os_enter_sleep(sleep_state, pm1a_control, pm1b_control);
if (status == AE_CTRL_TERMINATE) {
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index df386571da98..3d6c213481f0 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -18,6 +18,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/syscore_ops.h>
+#include <linux/protected_guest.h>
#include <asm/io.h>
#include <trace/events/power.h>

@@ -71,7 +72,14 @@ static int acpi_sleep_prepare(u32 acpi_state)
acpi_set_waking_vector(acpi_wakeup_address);

}
- ACPI_FLUSH_CPU_CACHE();
+
+ /*
+ * WBINVD instruction is not supported in TDX
+ * guest. Since ACPI_FLUSH_CPU_CACHE() uses
+ * WBINVD, skip cache flushes for TDX guests.
+ */
+ if (prot_guest_has(PR_GUEST_DISABLE_WBINVD))
+ ACPI_FLUSH_CPU_CACHE();
#endif
printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n",
acpi_state);
@@ -566,7 +574,13 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
u32 acpi_state = acpi_target_sleep_state;
int error;

- ACPI_FLUSH_CPU_CACHE();
+ /*
+ * WBINVD instruction is not supported in TDX
+ * guest. Since ACPI_FLUSH_CPU_CACHE() uses
+ * WBINVD, skip cache flushes for TDX guests.
+ */
+ if (prot_guest_has(PR_GUEST_DISABLE_WBINVD))
+ ACPI_FLUSH_CPU_CACHE();

trace_suspend_resume(TPS("acpi_suspend"), acpi_state, true);
switch (acpi_state) {
@@ -899,7 +913,13 @@ static int acpi_hibernation_enter(void)
{
acpi_status status = AE_OK;

- ACPI_FLUSH_CPU_CACHE();
+ /*
+ * WBINVD instruction is not supported in TDX
+ * guest. Since ACPI_FLUSH_CPU_CACHE() uses
+ * WBINVD, skip cache flushes for TDX guests.
+ */
+ if (prot_guest_has(PR_GUEST_DISABLE_WBINVD))
+ ACPI_FLUSH_CPU_CACHE();

/* This shouldn't return. If it returns, we have a problem */
status = acpi_enter_sleep_state(ACPI_STATE_S4);
diff --git a/include/linux/protected_guest.h b/include/linux/protected_guest.h
index adfa62e2615e..0ec4dab86f67 100644
--- a/include/linux/protected_guest.h
+++ b/include/linux/protected_guest.h
@@ -18,6 +18,8 @@
#define PR_GUEST_HOST_MEM_ENCRYPT 0x103
/* Support for shared mapping initialization (after early init) */
#define PR_GUEST_SHARED_MAPPING_INIT 0x104
+/* Support to disable WBINVD */
+#define PR_GUEST_DISABLE_WBINVD 0x105

#if defined(CONFIG_INTEL_TDX_GUEST) || defined(CONFIG_AMD_MEM_ENCRYPT)

--
2.25.1