Re: [RFC v2-fix-v1 2/3] x86/tdx: Handle early IO operations

From: Williams, Dan J
Date: Sat Jun 05 2021 - 00:26:43 EST


On Wed, 2021-05-26 at 21:23 -0700, Kuppuswamy Sathyanarayanan wrote:
> From: Andi Kleen <ak@xxxxxxxxxxxxxxx>
>
> Add an early #VE handler to convert early port IOs into TDCALLs.
>
> TDX cannot do port IO directly. The TDX module triggers a #VE
> exception to let the guest kernel to emulate operations like
> IO ports, by converting them into TDCALLs to call the host.

s,kernel to emulate operations like IO ports,kernel emulate port I/O,

>
> A fully featured #VE handler support for port IO will be added
> later in this patch set (in patch titled "x86/tdx: Handle port
> I/O). But it can be used only at later point in the boot
> process. So to support port IO in early boot code, add a
> minimal support in early exception handler. This is similar to
> what AMD SEV does.

Clarify "fully featured". I naively thought that the notes below about
trace and printk were implying that the full featured #VE handler will
use printk() so it can't also use #VE since printk() would recurse into
the #VE handler if the serial console is using port IO.

...but that does not seem to be the reason since:

http://lore.kernel.org/r/20210527042356.3983284-4-sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx

...is also using #VE for port IO emulation?

> This is mainly to support early_printk's serial driver, as
> well as potentially the VGA driver (although it is expected
> not to be used).
>
> The early handler only does IO calls and nothing else, and
> anything that goes wrong results in a normal early exception
> panic.
>
> It cannot share the code paths with the normal #VE handler
> because it needs to avoid using trace calls or printk.
>
> This early handler allows us to use the normal in*/out*
> macros without patching them for every driver. We don't
> expect IO port IO to be performance critical at all, so an
> extra #VE exception is no problem.

"There is no expectation that early port IO is performance critical, so
the #VE emulation cost is worth the simplicity benefit of not patching
out port IO usage in early code."

> There are also no concerns
> with nesting, since there should be no NMIs this early.
>
> Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>

Missing a Sathya signed-off-by.

> ---
>  arch/x86/include/asm/tdx.h |  6 ++++
>  arch/x86/kernel/head64.c   |  4 +++
>  arch/x86/kernel/tdx.c      | 59 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 69 insertions(+)
>
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 53f844200909..e880a9dd40d3 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -72,6 +72,7 @@ u64 __tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15,
>                     struct tdx_hypercall_output *out);
>  
>  bool tdx_protected_guest_has(unsigned long flag);
> +bool tdg_early_handle_ve(struct pt_regs *regs);
>  
>  #else // !CONFIG_INTEL_TDX_GUEST
>  
> @@ -87,6 +88,11 @@ static inline bool tdx_protected_guest_has(unsigned long flag)
>         return false;
>  }
>  
> +static inline bool tdg_early_handle_ve(struct pt_regs *regs)
> +{
> +       return false;
> +}
> +
>  #endif /* CONFIG_INTEL_TDX_GUEST */
>  
>  #ifdef CONFIG_INTEL_TDX_GUEST_KVM
> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> index 75f2401cb5db..23d1ff4626aa 100644
> --- a/arch/x86/kernel/head64.c
> +++ b/arch/x86/kernel/head64.c
> @@ -410,6 +410,10 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr)
>             trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
>                 return;
>  
> +       if (IS_ENABLED(CONFIG_INTEL_TDX_GUEST) &&

This explicit IS_ENABLED() is unnecessary given tdg_early_handle_ve()
returns false in the CONFIG_INTEL_TDX_GUEST=n case as defined above.

> +           trapnr == X86_TRAP_VE && tdg_early_handle_ve(regs))
> +               return;
> +
>         early_fixup_exception(regs, trapnr);
>  }
>  
> diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
> index 858e7f3d8f36..ca3442b7accf 100644
> --- a/arch/x86/kernel/tdx.c
> +++ b/arch/x86/kernel/tdx.c
> @@ -13,6 +13,10 @@
>  #define TDINFO                         1
>  #define TDGETVEINFO                    3
>  
> +#define VE_GET_IO_TYPE(exit_qual)      (((exit_qual) & 8) ? 0 : 1)

How about VE_IS_IO_OUT()? To match its usage as a flag below...

> +#define VE_GET_IO_SIZE(exit_qual)      (((exit_qual) & 7) + 1)
> +#define VE_GET_PORT_NUM(exit_qual)     ((exit_qual) >> 16)
> +
>  static struct {
>         unsigned int gpa_width;
>         unsigned long attributes;
> @@ -256,6 +260,61 @@ int tdg_handle_virtualization_exception(struct pt_regs *regs,
>         return ret;
>  }
>  
> +/*
> + * Handle early IO, mainly for early printks serial output.
> + * This avoids anything that doesn't work early on, like tracing
> + * or printks, by calling the low level functions directly. Any
> + * problems are handled by falling back to a standard early exception.
> + *
> + * Assumes the IO instruction was using ax, which is enforced
> + * by the standard io.h macros.
> + */
> +static __init bool tdx_early_io(struct ve_info *ve, struct pt_regs *regs)
> +{
> +       struct tdx_hypercall_output outh;
> +       int out = VE_GET_IO_TYPE(ve->exit_qual);
> +       int size = VE_GET_IO_SIZE(ve->exit_qual);
> +       int port = VE_GET_PORT_NUM(ve->exit_qual);
> +       int ret;
> +

...and if @out is a flag then the below can be simplified to:

ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION, size, out, port,
regs->ax, &outh);
if (!out && !ret) {
u64 mask = GENMASK(8 * size, 0);

regs->ax &= ~mask;
regs->ax |= outh.r11 & mask;
}

return !ret;


With the above fixups and clarifications you can add:

Reviewed-by: Dan Williams <dan.j.williams@xxxxxxxxx>

...but the discussion here about fully featured leaves me confused
about the approach taken in the next patch.

> +       if (out) {
> +               ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION,
> +                                     size, 1, port,
> +                                     regs->ax,
> +                                     &outh);
> +       } else {
> +               u64 mask = GENMASK(8 * size, 0);
> +
> +               ret = __tdx_hypercall(EXIT_REASON_IO_INSTRUCTION,
> +                                     size, 0, port,
> +                                     regs->ax, &outh);
> +               if (!ret) {
> +                       regs->ax &= ~mask;
> +                       regs->ax |= outh.r11 & mask;
> +               }
> +       }
> +
> +       return !ret;
> +}
> +
> +/*
> + * Early #VE exception handler. Just used to handle port IOs
> + * for early_printk. If anything goes wrong handle it like
> + * a normal early exception.
> + */
> +__init bool tdg_early_handle_ve(struct pt_regs *regs)
> +{
> +       struct ve_info ve;
> +
> +       if (tdg_get_ve_info(&ve))
> +               return false;
> +
> +       if (ve.exit_reason == EXIT_REASON_IO_INSTRUCTION)
> +               return tdx_early_io(&ve, regs);
> +
> +       return false;
> +}
> +
>  void __init tdx_early_init(void)
>  {
>         if (!cpuid_has_tdx_guest())