[PATCHv5 26/30] x86/mm/cpa: Add support for TDX shared memory
From: Kirill A. Shutemov
Date: Wed Mar 02 2022 - 09:30:19 EST
Intel TDX protects guest memory from VMM access. Any memory that is
required for communication with the VMM must be explicitly shared.
It is a two-step process: the guest sets the shared bit in the page
table entry and notifies VMM about the change. The notification happens
using MapGPA hypercall.
Conversion back to private memory requires clearing the shared bit,
notifying VMM with MapGPA hypercall following with accepting the memory
with AcceptPage hypercall.
Provide a TDX version of x86_platform.guest.* callbacks. It makes
__set_memory_enc_pgtable() work right in TDX guest.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
arch/x86/coco/core.c | 1 +
arch/x86/coco/tdx.c | 101 ++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/traps.c | 2 +-
3 files changed, 103 insertions(+), 1 deletion(-)
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index 54344122e2fe..9778cf4c6901 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -21,6 +21,7 @@ static bool intel_cc_platform_has(enum cc_attr attr)
switch (attr) {
case CC_ATTR_GUEST_UNROLL_STRING_IO:
case CC_ATTR_HOTPLUG_DISABLED:
+ case CC_ATTR_GUEST_MEM_ENCRYPT:
return true;
default:
return false;
diff --git a/arch/x86/coco/tdx.c b/arch/x86/coco/tdx.c
index c82e8eda8c8b..2168ee25a52c 100644
--- a/arch/x86/coco/tdx.c
+++ b/arch/x86/coco/tdx.c
@@ -10,10 +10,15 @@
#include <asm/vmx.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <asm/x86_init.h>
/* TDX module Call Leaf IDs */
#define TDX_GET_INFO 1
#define TDX_GET_VEINFO 3
+#define TDX_ACCEPT_PAGE 6
+
+/* TDX hypercall Leaf IDs */
+#define TDVMCALL_MAP_GPA 0x10001
/* MMIO direction */
#define EPT_READ 0
@@ -495,6 +500,98 @@ bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
return ret;
}
+static bool tdx_tlb_flush_required(bool enc)
+{
+ /*
+ * TDX guest is responsible for flushing caches on private->shared
+ * transition. VMM is responsible for flushing on shared->private.
+ */
+ return !enc;
+}
+
+static bool tdx_cache_flush_required(void)
+{
+ return true;
+}
+
+static bool accept_page(phys_addr_t gpa, enum pg_level pg_level)
+{
+ /*
+ * Pass the page physical address to the TDX module to accept the
+ * pending, private page.
+ *
+ * Bits 2:0 of GPA encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
+ */
+ switch (pg_level) {
+ case PG_LEVEL_4K:
+ break;
+ case PG_LEVEL_2M:
+ gpa |= 1;
+ break;
+ case PG_LEVEL_1G:
+ gpa |= 2;
+ break;
+ default:
+ return false;
+ }
+
+ return !__tdx_module_call(TDX_ACCEPT_PAGE, gpa, 0, 0, 0, NULL);
+}
+
+/*
+ * Inform the VMM of the guest's intent for this physical page: shared with
+ * the VMM or private to the guest. The VMM is expected to change its mapping
+ * of the page in response.
+ */
+static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+{
+ phys_addr_t start = __pa(vaddr);
+ phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
+
+ if (!enc) {
+ start |= cc_mkdec(0);
+ end |= cc_mkdec(0);
+ }
+
+ /*
+ * Notify the VMM about page mapping conversion. More info about ABI
+ * can be found in TDX Guest-Host-Communication Interface (GHCI),
+ * section "TDG.VP.VMCALL<MapGPA>"
+ */
+ if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
+ return false;
+
+ /* private->shared conversion requires only MapGPA call */
+ if (!enc)
+ return true;
+
+ /*
+ * For shared->private conversion, accept the page using
+ * TDX_ACCEPT_PAGE TDX module call.
+ */
+ while (start < end) {
+ /* Try if 1G page accept is possible */
+ if (!(start & ~PUD_MASK) && end - start >= PUD_SIZE &&
+ accept_page(start, PG_LEVEL_1G)) {
+ start += PUD_SIZE;
+ continue;
+ }
+
+ /* Try if 2M page accept is possible */
+ if (!(start & ~PMD_MASK) && end - start >= PMD_SIZE &&
+ accept_page(start, PG_LEVEL_2M)) {
+ start += PMD_SIZE;
+ continue;
+ }
+
+ if (!accept_page(start, PG_LEVEL_4K))
+ return false;
+ start += PAGE_SIZE;
+ }
+
+ return true;
+}
+
void __init tdx_early_init(void)
{
unsigned int gpa_width;
@@ -526,5 +623,9 @@ void __init tdx_early_init(void)
*/
cc_set_mask(BIT_ULL(gpa_width - 1));
+ x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
+ x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+ x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
+
pr_info("Guest detected\n");
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1c3cb952fa2a..080f21171b27 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1308,7 +1308,7 @@ static void ve_raise_fault(struct pt_regs *regs, long error_code)
*
* In the settings that Linux will run in, virtualization exceptions are
* never generated on accesses to normal, TD-private memory that has been
- * accepted.
+ * accepted (by BIOS or with tdx_enc_status_changed()).
*
* Syscall entry code has a critical window where the kernel stack is not
* yet set up. Any exception in this window leads to hard to debug issues
--
2.34.1