Re: [PATCHv4 7/8] x86/tdx: Unaccepted memory support

From: Dave Hansen
Date: Fri Apr 08 2022 - 14:28:34 EST


On 4/5/22 16:43, Kirill A. Shutemov wrote:
> All preparations are complete. Hookup TDX-specific code to accept memory.
>
> There are two tdx_accept_memory() implementations: one in main kernel
> and one in the decompresser.
>
> The implementation in core kernel uses tdx_enc_status_changed().
> The helper is not available in the decompresser, self-contained
> implementation added there instead.

Why isn't it available?

> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 7021ec725dd3..e4c31dbea6d7 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -885,6 +885,7 @@ config INTEL_TDX_GUEST
> select ARCH_HAS_CC_PLATFORM
> select X86_MEM_ENCRYPT
> select X86_MCE
> + select UNACCEPTED_MEMORY
> help
> Support running as a guest under Intel TDX. Without this support,
> the guest kernel can not boot or run under TDX.

Ahh, there we go. Nice.

> diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
> index 918a7606f53c..a0bd1426d235 100644
> --- a/arch/x86/boot/compressed/tdx.c
> +++ b/arch/x86/boot/compressed/tdx.c
> @@ -9,6 +9,7 @@
> #include <uapi/asm/vmx.h>
>
> #include <asm/shared/tdx.h>
> +#include <asm/page_types.h>
>
> /* Called from __tdx_hypercall() for unrecoverable failure */
> void __tdx_hypercall_failed(void)
> @@ -75,3 +76,43 @@ void early_tdx_detect(void)
> pio_ops.f_outb = tdx_outb;
> pio_ops.f_outw = tdx_outw;
> }
> +
> +#define TDACCEPTPAGE 6
> +#define TDVMCALL_MAP_GPA 0x10001

That seems like unnecessary duplication. Can't a #define be trivially
shared?

> +/*
> + * Wrapper for standard use of __tdx_hypercall with no output aside from
> + * return code.
> + */
> +static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
> +{
> + struct tdx_hypercall_args args = {
> + .r10 = TDX_HYPERCALL_STANDARD,
> + .r11 = fn,
> + .r12 = r12,
> + .r13 = r13,
> + .r14 = r14,
> + .r15 = r15,
> + };
> +
> + return __tdx_hypercall(&args, 0);
> +}

Ditto on the sharing.

> +void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
> +{
> + int i;
> +
> + if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
> + error("Cannot accept memory: MapGPA failed\n");
> +
> + /*
> + * For shared->private conversion, accept the page using TDACCEPTPAGE
> + * TDX module call.
> + */

What does the shared->private conversion have to do with this? I feel
like I'm missing something. Is unaccepted memory shared initially?

> + for (i = 0; i < (end - start) / PAGE_SIZE; i++) {
> + if (__tdx_module_call(TDACCEPTPAGE, start + i * PAGE_SIZE,
> + 0, 0, 0, NULL)) {
> + error("Cannot accept memory: page accept failed\n");
> + }
> + }
> +}
> diff --git a/arch/x86/boot/compressed/unaccepted_memory.c b/arch/x86/boot/compressed/unaccepted_memory.c
> index 3ebab63789bb..662ec32e3c42 100644
> --- a/arch/x86/boot/compressed/unaccepted_memory.c
> +++ b/arch/x86/boot/compressed/unaccepted_memory.c
> @@ -1,12 +1,33 @@
> // SPDX-License-Identifier: GPL-2.0-only
>
> +#include <asm/shared/tdx.h>
> #include "error.h"
> #include "misc.h"
> +#include "tdx.h"
> +
> +static bool is_tdx_guest(void)
> +{
> + static bool once;
> + static bool is_tdx;
> +
> + if (!once) {
> + u32 eax, sig[3];
> +
> + cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax,
> + &sig[0], &sig[2], &sig[1]);
> + is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig));
> + }
> +
> + return is_tdx;
> +}

Do you need to set 'once'?

> static inline void __accept_memory(phys_addr_t start, phys_addr_t end)
> {
> /* Platform-specific memory-acceptance call goes here */
> - error("Cannot accept memory");
> + if (is_tdx_guest())
> + tdx_accept_memory(start, end);
> + else
> + error("Cannot accept memory");
> }

Should those is_tdx_guest() checks be a new CC_ attribute, say:

cc_guest_has(CC_UNACCEPTED_MEMORY)

and then have a:

cc_accept_memory(start, end);

?

Or is that overkill when we only have TDX?

> void mark_unaccepted(struct boot_params *params, u64 start, u64 end)
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 03deb4d6920d..0fcb54e07797 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -606,16 +606,8 @@ static bool try_accept_one(phys_addr_t *start, unsigned long len,
> return true;
> }
>
> -/*
> - * Inform the VMM of the guest's intent for this physical page: shared with
> - * the VMM or private to the guest. The VMM is expected to change its mapping
> - * of the page in response.
> - */
> -static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> +static bool __tdx_enc_status_changed(phys_addr_t start, phys_addr_t end, bool enc)
> {
> - phys_addr_t start = __pa(vaddr);
> - phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
> -
> if (!enc) {
> /* Set the shared (decrypted) bits: */
> start |= cc_mkdec(0);
> @@ -660,6 +652,25 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> return true;
> }
>
> +void tdx_accept_memory(phys_addr_t start, phys_addr_t end)
> +{
> + if (!__tdx_enc_status_changed(start, end, true))
> + panic("Accepting memory failed\n");
> +}

Could we make a bit better use of our naming bytes? "__" tells us
nothing. What if we had:

tdx_enc_status_changed_phys()

we could call the other one _virt() or leave it alone too.

> +/*
> + * Inform the VMM of the guest's intent for this physical page: shared with
> + * the VMM or private to the guest. The VMM is expected to change its mapping
> + * of the page in response.
> + */
> +static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
> +{
> + phys_addr_t start = __pa(vaddr);
> + phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
> +
> + return __tdx_enc_status_changed(start, end, enc);
> +}
> +
> void __init tdx_early_init(void)
> {
> u64 cc_mask;
> diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
> index e53f26228fbb..4eca6492b108 100644
> --- a/arch/x86/include/asm/shared/tdx.h
> +++ b/arch/x86/include/asm/shared/tdx.h
> @@ -36,5 +36,25 @@ u64 __tdx_hypercall(struct tdx_hypercall_args *args, unsigned long flags);
> /* Called from __tdx_hypercall() for unrecoverable failure */
> void __tdx_hypercall_failed(void);
>
> +/*
> + * Used in __tdx_module_call() to gather the output registers' values of the
> + * TDCALL instruction when requesting services from the TDX module. This is a
> + * software only structure and not part of the TDX module/VMM ABI
> + */
> +struct tdx_module_output {
> + u64 rcx;
> + u64 rdx;
> + u64 r8;
> + u64 r9;
> + u64 r10;
> + u64 r11;
> +};

Is this the first module call with an output?

> +/* Used to communicate with the TDX module */
> +u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> + struct tdx_module_output *out);
> +
> +void tdx_accept_memory(phys_addr_t start, phys_addr_t end);
> +
> #endif /* !__ASSEMBLY__ */
> #endif /* _ASM_X86_SHARED_TDX_H */
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 020c81a7c729..d9106d3e89f8 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -20,21 +20,6 @@
>
> #ifndef __ASSEMBLY__
>
> -/*
> - * Used to gather the output registers values of the TDCALL and SEAMCALL
> - * instructions when requesting services from the TDX module.
> - *
> - * This is a software only structure and not part of the TDX module/VMM ABI.
> - */
> -struct tdx_module_output {
> - u64 rcx;
> - u64 rdx;
> - u64 r8;
> - u64 r9;
> - u64 r10;
> - u64 r11;
> -};
> -
> /*
> * Used by the #VE exception handler to gather the #VE exception
> * info from the TDX module. This is a software only structure
> @@ -55,10 +40,6 @@ struct ve_info {
>
> void __init tdx_early_init(void);
>
> -/* Used to communicate with the TDX module */
> -u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
> - struct tdx_module_output *out);
> -
> void tdx_get_ve_info(struct ve_info *ve);

There's enough TDX helper munging going on here that I'd probably break
this out into a separate patch.

> diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c
> index 3588a7cb954c..2f1c3c0375cd 100644
> --- a/arch/x86/mm/unaccepted_memory.c
> +++ b/arch/x86/mm/unaccepted_memory.c
> @@ -6,6 +6,7 @@
>
> #include <asm/io.h>
> #include <asm/setup.h>
> +#include <asm/shared/tdx.h>
> #include <asm/unaccepted_memory.h>
>
> static DEFINE_SPINLOCK(unaccepted_memory_lock);
> @@ -26,7 +27,10 @@ void accept_memory(phys_addr_t start, phys_addr_t end)
> for_each_set_bitrange_from(rs, re, unaccepted_memory,
> DIV_ROUND_UP(end, PMD_SIZE)) {
> /* Platform-specific memory-acceptance call goes here */
> - panic("Cannot accept memory");
> + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
> + tdx_accept_memory(rs * PMD_SIZE, re * PMD_SIZE);
> + else
> + panic("Cannot accept memory");
> bitmap_clear(unaccepted_memory, rs, re - rs);
> }
> spin_unlock_irqrestore(&unaccepted_memory_lock, flags);