[PATCH v1 4/4] x86/tdx: Implement movs for MMIO

From: Alexey Gladkov (Intel)
Date: Tue Jul 30 2024 - 13:37:13 EST


Adapt AMD's implementation of the MOVS instruction. Since the
implementations are similar, it is possible to reuse the code.

MOVS emulation consists of dividing it into a series of read and write
operations, which in turn will be validated separately.

Signed-off-by: Alexey Gladkov (Intel) <legion@xxxxxxxxxx>
---


I don't really understand the reasoning behind AMD's approach of returning to
userspace after every read/write operation in vc_handle_mmio_movs(). I didn't
change this so as not to break their implementation.

But if this can be changed then the whole vc_handle_mmio_movs() could be used as
a common helper.


arch/x86/coco/sev/core.c | 133 ++++----------------------------------
arch/x86/coco/tdx/tdx.c | 56 ++++++++++++++--
arch/x86/include/asm/io.h | 3 +
arch/x86/lib/iomem.c | 132 +++++++++++++++++++++++++++++++++++++
4 files changed, 199 insertions(+), 125 deletions(-)

diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 082d61d85dfc..3135c89802e9 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -369,72 +369,17 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
char *dst, char *buf, size_t size)
{
- unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+ unsigned long error_code;
+ int ret = __put_iomem(dst, buf, size);

- /*
- * This function uses __put_user() independent of whether kernel or user
- * memory is accessed. This works fine because __put_user() does no
- * sanity checks of the pointer being accessed. All that it does is
- * to report when the access failed.
- *
- * Also, this function runs in atomic context, so __put_user() is not
- * allowed to sleep. The page-fault handler detects that it is running
- * in atomic context and will not try to take mmap_sem and handle the
- * fault, so additional pagefault_enable()/disable() calls are not
- * needed.
- *
- * The access can't be done via copy_to_user() here because
- * vc_write_mem() must not use string instructions to access unsafe
- * memory. The reason is that MOVS is emulated by the #VC handler by
- * splitting the move up into a read and a write and taking a nested #VC
- * exception on whatever of them is the MMIO access. Using string
- * instructions here would cause infinite nesting.
- */
- switch (size) {
- case 1: {
- u8 d1;
- u8 __user *target = (u8 __user *)dst;
-
- memcpy(&d1, buf, 1);
- if (__put_user(d1, target))
- goto fault;
- break;
- }
- case 2: {
- u16 d2;
- u16 __user *target = (u16 __user *)dst;
-
- memcpy(&d2, buf, 2);
- if (__put_user(d2, target))
- goto fault;
- break;
- }
- case 4: {
- u32 d4;
- u32 __user *target = (u32 __user *)dst;
-
- memcpy(&d4, buf, 4);
- if (__put_user(d4, target))
- goto fault;
- break;
- }
- case 8: {
- u64 d8;
- u64 __user *target = (u64 __user *)dst;
+ if (!ret)
+ return ES_OK;

- memcpy(&d8, buf, 8);
- if (__put_user(d8, target))
- goto fault;
- break;
- }
- default:
- WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ if (ret == -EIO)
return ES_UNSUPPORTED;
- }

- return ES_OK;
+ error_code = X86_PF_PROT | X86_PF_WRITE;

-fault:
if (user_mode(ctxt->regs))
error_code |= X86_PF_USER;

@@ -448,71 +393,17 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
char *src, char *buf, size_t size)
{
- unsigned long error_code = X86_PF_PROT;
+ unsigned long error_code;
+ int ret = __get_iomem(src, buf, size);

- /*
- * This function uses __get_user() independent of whether kernel or user
- * memory is accessed. This works fine because __get_user() does no
- * sanity checks of the pointer being accessed. All that it does is
- * to report when the access failed.
- *
- * Also, this function runs in atomic context, so __get_user() is not
- * allowed to sleep. The page-fault handler detects that it is running
- * in atomic context and will not try to take mmap_sem and handle the
- * fault, so additional pagefault_enable()/disable() calls are not
- * needed.
- *
- * The access can't be done via copy_from_user() here because
- * vc_read_mem() must not use string instructions to access unsafe
- * memory. The reason is that MOVS is emulated by the #VC handler by
- * splitting the move up into a read and a write and taking a nested #VC
- * exception on whatever of them is the MMIO access. Using string
- * instructions here would cause infinite nesting.
- */
- switch (size) {
- case 1: {
- u8 d1;
- u8 __user *s = (u8 __user *)src;
-
- if (__get_user(d1, s))
- goto fault;
- memcpy(buf, &d1, 1);
- break;
- }
- case 2: {
- u16 d2;
- u16 __user *s = (u16 __user *)src;
-
- if (__get_user(d2, s))
- goto fault;
- memcpy(buf, &d2, 2);
- break;
- }
- case 4: {
- u32 d4;
- u32 __user *s = (u32 __user *)src;
+ if (!ret)
+ return ES_OK;

- if (__get_user(d4, s))
- goto fault;
- memcpy(buf, &d4, 4);
- break;
- }
- case 8: {
- u64 d8;
- u64 __user *s = (u64 __user *)src;
- if (__get_user(d8, s))
- goto fault;
- memcpy(buf, &d8, 8);
- break;
- }
- default:
- WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ if (ret == -EIO)
return ES_UNSUPPORTED;
- }

- return ES_OK;
+ error_code = X86_PF_PROT;

-fault:
if (user_mode(ctxt->regs))
error_code |= X86_PF_USER;

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 26b2e52457be..cf209381d63b 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -499,6 +499,53 @@ static int decode_insn_struct(struct insn *insn, struct pt_regs *regs)
return 0;
}

+static int handle_mmio_movs(struct insn *insn, struct pt_regs *regs, int size, struct ve_info *ve)
+{
+ unsigned long ds_base, es_base;
+ unsigned char *src, *dst;
+ unsigned char buffer[8];
+ int off, ret;
+ bool rep;
+
+ /*
+ * The in-kernel code must use a special API that does not use MOVS.
+ * If the MOVS instruction is received from in-kernel, then something
+ * is broken.
+ */
+ WARN_ON_ONCE(!user_mode(regs));
+
+ ds_base = insn_get_seg_base(regs, INAT_SEG_REG_DS);
+ es_base = insn_get_seg_base(regs, INAT_SEG_REG_ES);
+
+ if (ds_base == -1L || es_base == -1L)
+ return -EINVAL;
+
+ rep = insn_has_rep_prefix(insn);
+
+ do {
+ src = ds_base + (unsigned char *) regs->si;
+ dst = es_base + (unsigned char *) regs->di;
+
+ ret = __get_iomem(src, buffer, size);
+ if (ret)
+ return ret;
+
+ ret = __put_iomem(dst, buffer, size);
+ if (ret)
+ return ret;
+
+ off = (regs->flags & X86_EFLAGS_DF) ? -size : size;
+
+ regs->si += off;
+ regs->di += off;
+
+ if (rep)
+ regs->cx -= 1;
+ } while (rep || regs->cx > 0);
+
+ return insn->length;
+}
+
static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, int size,
struct pt_regs *regs, struct ve_info *ve)
{
@@ -520,9 +567,8 @@ static int handle_mmio_write(struct insn *insn, enum insn_mmio_type mmio, int si
return insn->length;
case INSN_MMIO_MOVS:
/*
- * MMIO was accessed with an instruction that could not be
- * decoded or handled properly. It was likely not using io.h
- * helpers or accessed MMIO accidentally.
+ * MOVS is processed through higher level emulation which breaks
+ * this instruction into a sequence of reads and writes.
*/
return -EINVAL;
default:
@@ -591,6 +637,9 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
if (WARN_ON_ONCE(mmio == INSN_MMIO_DECODE_FAILED))
return -EINVAL;

+ if (mmio == INSN_MMIO_MOVS)
+ return handle_mmio_movs(&insn, regs, size, ve);
+
vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);

if (user_mode(regs)) {
@@ -619,7 +668,6 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
switch (mmio) {
case INSN_MMIO_WRITE:
case INSN_MMIO_WRITE_IMM:
- case INSN_MMIO_MOVS:
ret = handle_mmio_write(&insn, mmio, size, regs, ve);
break;
case INSN_MMIO_READ:
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 1d60427379c9..ac01d53466cb 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -402,4 +402,7 @@ static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
}
}

+int __get_iomem(char *src, char *buf, size_t size);
+int __put_iomem(char *src, char *buf, size_t size);
+
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c
index 5eecb45d05d5..e4919ad22206 100644
--- a/arch/x86/lib/iomem.c
+++ b/arch/x86/lib/iomem.c
@@ -2,6 +2,7 @@
#include <linux/module.h>
#include <linux/io.h>
#include <linux/kmsan-checks.h>
+#include <asm/uaccess.h>

#define movs(type,to,from) \
asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory")
@@ -124,3 +125,134 @@ void memset_io(volatile void __iomem *a, int b, size_t c)
}
}
EXPORT_SYMBOL(memset_io);
+
+int __get_iomem(char *src, char *buf, size_t size)
+{
+ /*
+ * This function uses __get_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __get_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __get_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_from_user() here because
+ * mmio_read_mem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
+ switch (size) {
+ case 1: {
+ u8 d1;
+ u8 __user *s = (u8 __user *)src;
+
+ if (__get_user(d1, s))
+ return -EFAULT;
+ memcpy(buf, &d1, 1);
+ break;
+ }
+ case 2: {
+ u16 d2;
+ u16 __user *s = (u16 __user *)src;
+
+ if (__get_user(d2, s))
+ return -EFAULT;
+ memcpy(buf, &d2, 2);
+ break;
+ }
+ case 4: {
+ u32 d4;
+ u32 __user *s = (u32 __user *)src;
+
+ if (__get_user(d4, s))
+ return -EFAULT;
+ memcpy(buf, &d4, 4);
+ break;
+ }
+ case 8: {
+ u64 d8;
+ u64 __user *s = (u64 __user *)src;
+ if (__get_user(d8, s))
+ return -EFAULT;
+ memcpy(buf, &d8, 8);
+ break;
+ }
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int __put_iomem(char *dst, char *buf, size_t size)
+{
+ /*
+ * This function uses __put_user() independent of whether kernel or user
+ * memory is accessed. This works fine because __put_user() does no
+ * sanity checks of the pointer being accessed. All that it does is
+ * to report when the access failed.
+ *
+ * Also, this function runs in atomic context, so __put_user() is not
+ * allowed to sleep. The page-fault handler detects that it is running
+ * in atomic context and will not try to take mmap_sem and handle the
+ * fault, so additional pagefault_enable()/disable() calls are not
+ * needed.
+ *
+ * The access can't be done via copy_to_user() here because
+ * put_iomem() must not use string instructions to access unsafe
+ * memory. The reason is that MOVS is emulated by the #VC handler by
+ * splitting the move up into a read and a write and taking a nested #VC
+ * exception on whatever of them is the MMIO access. Using string
+ * instructions here would cause infinite nesting.
+ */
+ switch (size) {
+ case 1: {
+ u8 d1;
+ u8 __user *target = (u8 __user *)dst;
+
+ memcpy(&d1, buf, 1);
+ if (__put_user(d1, target))
+ return -EFAULT;
+ break;
+ }
+ case 2: {
+ u16 d2;
+ u16 __user *target = (u16 __user *)dst;
+
+ memcpy(&d2, buf, 2);
+ if (__put_user(d2, target))
+ return -EFAULT;
+ break;
+ }
+ case 4: {
+ u32 d4;
+ u32 __user *target = (u32 __user *)dst;
+
+ memcpy(&d4, buf, 4);
+ if (__put_user(d4, target))
+ return -EFAULT;
+ break;
+ }
+ case 8: {
+ u64 d8;
+ u64 __user *target = (u64 __user *)dst;
+
+ memcpy(&d8, buf, 8);
+ if (__put_user(d8, target))
+ return -EFAULT;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return -EIO;
+ }
+
+ return 0;
+}
--
2.45.2