Re: [RFC PATCH v2 08/69] KVM: TDX: add trace point before/after TDX SEAMCALLs

From: Sean Christopherson
Date: Tue Jul 13 2021 - 15:33:24 EST


On Fri, Jul 02, 2021, isaku.yamahata@xxxxxxxxx wrote:
> From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> ---
> arch/x86/kvm/trace.h | 80 ++++++++++++++++++++++++++++++
> arch/x86/kvm/vmx/seamcall.h | 22 ++++++++-
> arch/x86/kvm/vmx/tdx_arch.h | 47 ++++++++++++++++++
> arch/x86/kvm/vmx/tdx_errno.h | 96 ++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/x86.c | 2 +
> 5 files changed, 246 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 4f839148948b..c3398d0de9a7 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -8,6 +8,9 @@
> #include <asm/clocksource.h>
> #include <asm/pvclock-abi.h>
>
> +#include "vmx/tdx_arch.h"
> +#include "vmx/tdx_errno.h"
> +
> #undef TRACE_SYSTEM
> #define TRACE_SYSTEM kvm
>
> @@ -659,6 +662,83 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
> __entry->exit_int_info, __entry->exit_int_info_err)
> );
>
> +/*
> + * Tracepoint for the start of TDX SEAMCALLs.
> + */
> +TRACE_EVENT(kvm_tdx_seamcall_enter,

To avoid confusion, I think it makes sense to avoid "enter" and "exit". E.g.
my first reaction was that the tracepoint was specific to TDENTER. And under
the hood, SEAMCALL is technically an exit :-)

What about kvm_tdx_seamcall and kvm_tdx_seamret? If the seamret usage is too
much of a stretch, kvm_tdx_seamcall_begin/end?

> + TP_PROTO(int cpuid, __u64 op, __u64 rcx, __u64 rdx, __u64 r8,
> + __u64 r9, __u64 r10),
> + TP_ARGS(cpuid, op, rcx, rdx, r8, r9, r10),

"cpuid" is potentially confusing without looking at the caller. pcpu or pcpu_id
would be preferable.

> diff --git a/arch/x86/kvm/vmx/seamcall.h b/arch/x86/kvm/vmx/seamcall.h
> index a318940f62ed..2c83ab46eeac 100644
> --- a/arch/x86/kvm/vmx/seamcall.h
> +++ b/arch/x86/kvm/vmx/seamcall.h
> @@ -9,12 +9,32 @@
> #else
>
> #ifndef seamcall
> +#include "trace.h"
> +
> struct tdx_ex_ret;
> asmlinkage u64 __seamcall(u64 op, u64 rcx, u64 rdx, u64 r8, u64 r9, u64 r10,
> struct tdx_ex_ret *ex);
>
> +static inline u64 _seamcall(u64 op, u64 rcx, u64 rdx, u64 r8, u64 r9, u64 r10,
> + struct tdx_ex_ret *ex)
> +{
> + u64 err;
> +
> + trace_kvm_tdx_seamcall_enter(smp_processor_id(), op,
> + rcx, rdx, r8, r9, r10);
> + err = __seamcall(op, rcx, rdx, r8, r9, r10, ex);

What was the motivation behind switching from the macro magic[*] to a dedicated
asm subroutine? The macros are gross, but IMO they yielded much more readable
code for the upper level helpers, which is what people will look at the vast
majority of time. E.g.

static inline u64 tdh_sys_lp_shutdown(void)
{
return seamcall(TDH_SYS_LP_SHUTDOWN, 0, 0, 0, 0, 0, NULL);
}

static inline u64 tdh_mem_track(hpa_t tdr)
{
return seamcall(TDH_MEM_TRACK, tdr, 0, 0, 0, 0, NULL);
}

versus

static inline u64 tdsysshutdownlp(void)
{
seamcall_0(TDSYSSHUTDOWNLP);
}

static inline u64 tdtrack(hpa_t tdr)
{
seamcall_1(TDTRACK, tdr);
}


The new approach also generates very suboptimal code due to the need to shuffle
registers everywhere, e.g. gcc doesn't inline _seamcall because it's a whopping
200+ bytes.

[*] https://patchwork.kernel.org/project/kvm/patch/25f0d2c2f73c20309a1b578cc5fc15f4fd6b9a13.1605232743.git.isaku.yamahata@xxxxxxxxx/

> + if (ex)
> + trace_kvm_tdx_seamcall_exit(smp_processor_id(), op, err, ex->rcx,

smp_processor_id() is not stable since this code runs with IRQs and preemption
enabled, e.g. if the task is preempted between the tracepoint and the actual
SEAMCALL then the tracepoint may be wrong. There could also be weirdly "nested"
tracepoints since migrating the task will generate TDH_VP_FLUSH.

> + ex->rdx, ex->r8, ex->r9, ex->r10,
> + ex->r11);
> + else
> + trace_kvm_tdx_seamcall_exit(smp_processor_id(), op, err,
> + 0, 0, 0, 0, 0, 0);
> + return err;
> +}
> +
> #define seamcall(op, rcx, rdx, r8, r9, r10, ex) \
> - __seamcall(SEAMCALL_##op, (rcx), (rdx), (r8), (r9), (r10), (ex))
> + _seamcall(SEAMCALL_##op, (rcx), (rdx), (r8), (r9), (r10), (ex))
> #endif