Re: [PATCH 1/2] x86/tdx: Add prctl to allow userlevel TDX hypercalls

From: Kirill A . Shutemov
Date: Mon Jul 08 2024 - 08:20:15 EST


On Wed, Jul 03, 2024 at 11:36:00PM +0000, Tim Merrifield wrote:
> Add a new prctl option to enable/disable user-level hypercalls when
> running in a confidential VM. Add support for checking this flag on
> VMCALL #VE for TDX and transfer control to a hypervisor
> vendor-specific handler.
>
> Signed-off-by: Tim Merrifield <tim.merrifield@xxxxxxxxxxxx>
> ---
> arch/x86/coco/tdx/tdx.c | 18 ++++++++++++++++++
> arch/x86/include/asm/thread_info.h | 2 ++
> arch/x86/include/asm/x86_init.h | 1 +
> arch/x86/include/uapi/asm/prctl.h | 3 +++
> arch/x86/kernel/process.c | 20 ++++++++++++++++++++
> 5 files changed, 44 insertions(+)
>
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index ef8ec2425998..23111e4c1f91 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -239,6 +239,7 @@ static int ve_instr_len(struct ve_info *ve)
> case EXIT_REASON_MSR_WRITE:
> case EXIT_REASON_CPUID:
> case EXIT_REASON_IO_INSTRUCTION:
> + case EXIT_REASON_VMCALL:
> /* It is safe to use ve->instr_len for #VE due instructions */
> return ve->instr_len;
> case EXIT_REASON_EPT_VIOLATION:
> @@ -635,6 +636,21 @@ void tdx_get_ve_info(struct ve_info *ve)
> ve->instr_info = upper_32_bits(args.r10);
> }
>
> +/*
> + * Handle user-initiated, hypervisor-specific VMCALLs.
> + */
> +static int handle_user_vmcall(struct pt_regs *regs, struct ve_info *ve)
> +{
> + if (x86_platform.hyper.tdx_hcall &&
> + test_thread_flag(TIF_COCO_USER_HCALL)) {
> + if (!x86_platform.hyper.tdx_hcall(regs))
> + return -EIO;
> + return ve_instr_len(ve);
> + } else {
> + return -EOPNOTSUPP;
> + }

Maybe something like this would be more readable:

if (!x86_platform.hyper.tdx_hcall)
return -EOPNOTSUPP;

if (!test_thread_flag(TIF_COCO_USER_HCALL))
return -EOPNOTSUPP;

if (!x86_platform.hyper.tdx_hcall(regs))
return -EIO;

return ve_instr_len(ve);

BTW, do we want tdx_hcall() to return errno instead of bool?

> +}
> +
> /*
> * Handle the user initiated #VE.
> *
> @@ -646,6 +662,8 @@ static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
> switch (ve->exit_reason) {
> case EXIT_REASON_CPUID:
> return handle_cpuid(regs, ve);
> + case EXIT_REASON_VMCALL:
> + return handle_user_vmcall(regs, ve);
> default:
> pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
> return -EIO;
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 12da7dfd5ef1..9f69a26a5e68 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -106,6 +106,7 @@ struct thread_info {
> #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
> #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
> #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
> +#define TIF_COCO_USER_HCALL 30 /* Userland hypercalls allowed in CoCo */

Tabs instead of spaces for alignment, please.

> #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
> @@ -128,6 +129,7 @@ struct thread_info {
> #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
> #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
> #define _TIF_ADDR32 (1 << TIF_ADDR32)
> +#define _TIF_COCO_USER_HCALL (1 << TIF_COCO_USER_HCALL)

Ditto.

>
> /* flags to check in __switch_to() */
> #define _TIF_WORK_CTXSW_BASE \
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index 213cf5379a5a..52975bedd33e 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -282,6 +282,7 @@ struct x86_hyper_runtime {
> void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
> bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
> bool (*is_private_mmio)(u64 addr);
> + bool (*tdx_hcall)(struct pt_regs *regs);
> };
>
> /**
> diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
> index 384e2cc6ac19..7fa289a1815b 100644
> --- a/arch/x86/include/uapi/asm/prctl.h
> +++ b/arch/x86/include/uapi/asm/prctl.h
> @@ -16,6 +16,9 @@
> #define ARCH_GET_XCOMP_GUEST_PERM 0x1024
> #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
>
> +#define ARCH_GET_COCO_USER_HCALL 0x1030
> +#define ARCH_SET_COCO_USER_HCALL 0x1031
> +

Ditto.

> #define ARCH_XCOMP_TILECFG 17
> #define ARCH_XCOMP_TILEDATA 18
>
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 1b3d417cd6c4..16f8ab6cde2e 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -1039,6 +1039,21 @@ unsigned long __get_wchan(struct task_struct *p)
> return addr;
> }
>
> +static int get_coco_user_hcall_mode(void)
> +{
> + return !test_thread_flag(TIF_COCO_USER_HCALL);
> +}
> +
> +static int set_coco_user_hcall_mode(unsigned long enabled)
> +{
> + if (enabled)
> + set_thread_flag(TIF_COCO_USER_HCALL);
> + else
> + clear_thread_flag(TIF_COCO_USER_HCALL);
> +
> + return 0;
> +}
> +

Hm. Per-thread flag is odd. I think it should be per-process.

> long do_arch_prctl_common(int option, unsigned long arg2)
> {
> switch (option) {
> @@ -1052,6 +1067,11 @@ long do_arch_prctl_common(int option, unsigned long arg2)
> case ARCH_GET_XCOMP_GUEST_PERM:
> case ARCH_REQ_XCOMP_GUEST_PERM:
> return fpu_xstate_prctl(option, arg2);
> + case ARCH_GET_COCO_USER_HCALL:
> + return get_coco_user_hcall_mode();
> + case ARCH_SET_COCO_USER_HCALL:
> + return set_coco_user_hcall_mode(arg2);
> +
> }
>
> return -EINVAL;
> --
> 2.40.1
>

--
Kiryl Shutsemau / Kirill A. Shutemov