RE: [PATCH v2 6/9] x86/hyperv: Introduce a global variable hyperv_paravisor_present

From: Michael Kelley (LINUX)
Date: Mon Aug 21 2023 - 15:33:24 EST


From: Dexuan Cui <decui@xxxxxxxxxxxxx> Sent: Sunday, August 20, 2023 1:27 PM
>
> The new variable hyperv_paravisor_present is set only when the VM
> is a SNP/TDX with the paravisor running: see ms_hyperv_init_platform().
>
> In many places, hyperv_paravisor_present can replace

You said "In many places". Are there places where it can't replace
ms_hyperv.paravisor_present? It looks like all the uses are gone
after this patch.

> ms_hyperv.paravisor_present, and it's also used to replace
> hv_isolation_type_snp() in drivers/hv/hv.c.
>
> Call hv_vtom_init() when it's a VBS VM or when hyperv_paravisor_present
> is true (i.e. the VM is a SNP/TDX VM with the paravisor).
>
> Enhance hv_vtom_init() for a TDX VM with the paravisor.
>
> The biggest motive to introduce hyperv_paravisor_present is that we
> can not use ms_hyperv.paravisor_present in arch/x86/include/asm/mshyperv.h:
> that would introduce a complicated header file dependency issue.

The discussion in this commit messages about hyperv_paravisor_present
is a bit scattered and confusing. I think you are introducing the global variable
to solve the header file dependency issue. Otherwise, the ms_hyperv field
would be equivalent. Then you are using hyperv_paravisor_present for
several purposes, including to decide whether to call hv_vtom_init() and
to simplify the logic in drivers/hv/hv.c. Maybe you could reorganize
the commit message a bit to be more direct regarding the purpose.

>
> In arch/x86/include/asm/mshyperv.h, _hv_do_fast_hypercall8()
> is changed to specially handle HVCALL_SIGNAL_EVENT for a TDX VM with the
> paravisor, because such a VM must use TDX GHCI (see hv_tdx_hypercall())
> for this hypercall. See vmbus_set_event() -> hv_do_fast_hypercall8() ->
> _hv_do_fast_hypercall8() -> hv_tdx_hypercall().

Embedding the special case for HVCALL_SIGNAL_EVENT within
hv_do_fast_hypercall8() is not consistent with how this special case
is handled for SNP. For SNP, the special case is coded directly into
vmbus_set_event(). Any reason not to do the same for TDX + paravisor?

>
> In hv_common_cpu_init(), don't decrypt the hyperv_pcpu_input_arg
> for a TDX VM with the paravisor, just like we don't decrypt the page
> for a SNP VM with the paravisor.
>
> Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx>
> ---
>
> Changes in v2: None
>
> arch/x86/hyperv/hv_apic.c | 4 ++--
> arch/x86/hyperv/hv_init.c | 4 ++--
> arch/x86/hyperv/ivm.c | 20 ++++++++++++++++++--
> arch/x86/include/asm/mshyperv.h | 9 ++++++---
> arch/x86/kernel/cpu/mshyperv.c | 13 ++++++++++---
> drivers/hv/connection.c | 2 +-
> drivers/hv/hv.c | 12 ++++++------
> drivers/hv/hv_common.c | 6 +++++-
> include/asm-generic/mshyperv.h | 1 +
> 9 files changed, 51 insertions(+), 20 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index cb7429046d18d..8958836500d01 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -179,7 +179,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int
> vector,
>
> /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */
> if (!hv_hypercall_pg) {
> - if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx())
> + if (hyperv_paravisor_present || !hv_isolation_type_tdx())
> return false;
> }
>
> @@ -239,7 +239,7 @@ static bool __send_ipi_one(int cpu, int vector)
>
> /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */
> if (!hv_hypercall_pg) {
> - if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx())
> + if (hyperv_paravisor_present || !hv_isolation_type_tdx())
> return false;
> }
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index c1c1b4e1502f0..933a53ef81197 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -658,8 +658,8 @@ bool hv_is_hyperv_initialized(void)
> if (x86_hyper_type != X86_HYPER_MS_HYPERV)
> return false;
>
> - /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */
> - if (hv_isolation_type_tdx())
> + /* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */
> + if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> return true;
> /*
> * Verify that earlier initialization succeeded by checking
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 6c7598d9e68a3..920ecb85802b8 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -497,13 +497,29 @@ int hv_snp_boot_ap(int cpu, unsigned long start_ip)
>
> void __init hv_vtom_init(void)
> {
> + enum hv_isolation_type type = hv_get_isolation_type();
> /*
> * By design, a VM using vTOM doesn't see the SEV setting,
> * so SEV initialization is bypassed and sev_status isn't set.
> * Set it here to indicate a vTOM VM.
> */

The above comment applies just to the case HV_ISOLATION_TYPE_SNP,
not to the entire switch statement, so it should be moved under the
case.

> - sev_status = MSR_AMD64_SNP_VTOM;
> - cc_vendor = CC_VENDOR_AMD;
> + switch (type) {
> + case HV_ISOLATION_TYPE_VBS:
> + fallthrough;
> +
> + case HV_ISOLATION_TYPE_SNP:
> + sev_status = MSR_AMD64_SNP_VTOM;
> + cc_vendor = CC_VENDOR_AMD;
> + break;
> +
> + case HV_ISOLATION_TYPE_TDX:
> + cc_vendor = CC_VENDOR_INTEL;
> + break;
> +
> + default:
> + panic("hv_vtom_init: unsupported isolation type %d\n", type);
> + }
> +
> cc_set_mask(ms_hyperv.shared_gpa_boundary);
> physical_mask &= ms_hyperv.shared_gpa_boundary - 1;
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 24d7f662a8beb..2a4c7dcf64038 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -43,6 +43,7 @@ static inline unsigned char hv_get_nmi_reason(void)
>
> #if IS_ENABLED(CONFIG_HYPERV)
> extern int hyperv_init_cpuhp;
> +extern bool hyperv_paravisor_present;
>
> extern void *hv_hypercall_pg;
>
> @@ -76,7 +77,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> u64 hv_status;
>
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx())
> + if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> return hv_tdx_hypercall(control,
> cc_mkdec(input_address),
> cc_mkdec(output_address));
> @@ -134,7 +135,9 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
> u64 hv_status;
>
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx())
> + if (hv_isolation_type_tdx() &&
> + (!hyperv_paravisor_present ||
> + control == (HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT)))

See comment above. This would be more consistent with SNP if it were
handled directly in vmbus_set_event().

> return hv_tdx_hypercall(control, input1, 0);
>
> if (hv_isolation_type_en_snp()) {
> @@ -188,7 +191,7 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
> u64 hv_status;
>
> #ifdef CONFIG_X86_64
> - if (hv_isolation_type_tdx())
> + if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> return hv_tdx_hypercall(control, input1, input2);
>
> if (hv_isolation_type_en_snp()) {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 14baa288b1935..3dff2ef43bc73 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -40,6 +40,12 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +/*
> + * Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h.
> + * Exported in drivers/hv/hv_common.c to not break the ARM64 build.
> + */
> +bool hyperv_paravisor_present __ro_after_init;
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> static inline unsigned int hv_get_nested_reg(unsigned int reg)
> {
> @@ -429,6 +435,8 @@ static void __init ms_hyperv_init_platform(void)
> ms_hyperv.shared_gpa_boundary =
> BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
>
> + hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
> +
> pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
> ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
>
> @@ -443,7 +451,7 @@ static void __init ms_hyperv_init_platform(void)
> /* A TDX VM must use x2APIC and doesn't use lazy EOI. */
> ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
>
> - if (!ms_hyperv.paravisor_present) {
> + if (!hyperv_paravisor_present) {
> /*
> * The ms_hyperv.shared_gpa_boundary_active in
> * a fully enlightened TDX VM is 0, but the GPAs
> @@ -534,8 +542,7 @@ static void __init ms_hyperv_init_platform(void)
>
> #if IS_ENABLED(CONFIG_HYPERV)
> if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
> - ((hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) &&
> - ms_hyperv.paravisor_present))
> + hyperv_paravisor_present)
> hv_vtom_init();
> /*
> * Setup the hook to get control post apic initialization.
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index 02b54f85dc607..7f64fc942323b 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -484,7 +484,7 @@ void vmbus_set_event(struct vmbus_channel *channel)
>
> ++channel->sig_events;
>
> - if (hv_isolation_type_snp())
> + if (hv_isolation_type_snp() && hyperv_paravisor_present)

This code change seems to be more properly part of Patch 9 in the
series when hv_isolation_type_en_snp() goes away.

> hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
> NULL, sizeof(channel->sig_event));
> else
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 28bbb354324d6..20bc44923e4f0 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -109,7 +109,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp() && !hv_root_partition) {
> + if (!hyperv_paravisor_present && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -128,7 +128,7 @@ int hv_synic_alloc(void)
> }
> }
>
> - if (!ms_hyperv.paravisor_present &&
> + if (!hyperv_paravisor_present &&
> (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) {
> ret = set_memory_decrypted((unsigned long)
> hv_cpu->synic_message_page, 1);
> @@ -226,7 +226,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hyperv_paravisor_present || hv_root_partition) {
> /* Mask out vTOM bit. ioremap_cache() maps decrypted */
> u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
> ~ms_hyperv.shared_gpa_boundary;
> @@ -249,7 +249,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hyperv_paravisor_present || hv_root_partition) {
> /* Mask out vTOM bit. ioremap_cache() maps decrypted */
> u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
> ~ms_hyperv.shared_gpa_boundary;
> @@ -336,7 +336,7 @@ void hv_synic_disable_regs(unsigned int cpu)
> * addresses.
> */
> simp.simp_enabled = 0;
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hyperv_paravisor_present || hv_root_partition) {
> iounmap(hv_cpu->synic_message_page);
> hv_cpu->synic_message_page = NULL;
> } else {
> @@ -348,7 +348,7 @@ void hv_synic_disable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 0;
>
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hyperv_paravisor_present || hv_root_partition) {
> iounmap(hv_cpu->synic_event_page);
> hv_cpu->synic_event_page = NULL;
> } else {
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 4c858e1636da7..c0b0ac44ffa3c 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -40,6 +40,9 @@
> bool __weak hv_root_partition;
> EXPORT_SYMBOL_GPL(hv_root_partition);
>
> +bool __weak hyperv_paravisor_present;
> +EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
> +
> bool __weak hv_nested;
> EXPORT_SYMBOL_GPL(hv_nested);
>
> @@ -382,7 +385,8 @@ int hv_common_cpu_init(unsigned int cpu)
> *outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
> }
>
> - if (hv_isolation_type_en_snp() || hv_isolation_type_tdx()) {
> + if (!hyperv_paravisor_present &&
> + (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) {
> ret = set_memory_decrypted((unsigned long)mem, pgcount);
> if (ret) {
> /* It may be unsafe to free 'mem' */
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index f577eff58ea0b..94f87a0344590 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -176,6 +176,7 @@ extern int vmbus_interrupt;
> extern int vmbus_irq;
>
> extern bool hv_root_partition;
> +extern bool hyperv_paravisor_present;
>
> #if IS_ENABLED(CONFIG_HYPERV)
> /*
> --
> 2.25.1