RE: [PATCH V3 01/13] x86/HV: Initialize GHCB page in Isolation VM
From: Michael Kelley
Date: Thu Aug 12 2021 - 15:15:09 EST
From: Tianyu Lan <ltykernel@xxxxxxxxx> Sent: Monday, August 9, 2021 10:56 AM
> Subject: [PATCH V3 01/13] x86/HV: Initialize GHCB page in Isolation VM
The subject line tag on patches under arch/x86/hyperv is generally "x86/hyperv:".
There's some variation in the spelling of "hyperv", but let's go with the all
lowercase "hyperv".
>
> Hyper-V exposes GHCB page via SEV ES GHCB MSR for SNP guest
> to communicate with hypervisor. Map GHCB page for all
> cpus to read/write MSR register and submit hvcall request
> via GHCB.
>
> Signed-off-by: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx>
> ---
> arch/x86/hyperv/hv_init.c | 66 +++++++++++++++++++++++++++++++--
> arch/x86/include/asm/mshyperv.h | 2 +
> include/asm-generic/mshyperv.h | 2 +
> 3 files changed, 66 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 708a2712a516..0bb4d9ca7a55 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -20,6 +20,7 @@
> #include <linux/kexec.h>
> #include <linux/version.h>
> #include <linux/vmalloc.h>
> +#include <linux/io.h>
> #include <linux/mm.h>
> #include <linux/hyperv.h>
> #include <linux/slab.h>
> @@ -42,6 +43,31 @@ static void *hv_hypercall_pg_saved;
> struct hv_vp_assist_page **hv_vp_assist_page;
> EXPORT_SYMBOL_GPL(hv_vp_assist_page);
>
> +static int hyperv_init_ghcb(void)
> +{
> + u64 ghcb_gpa;
> + void *ghcb_va;
> + void **ghcb_base;
> +
> + if (!ms_hyperv.ghcb_base)
> + return -EINVAL;
> +
> + /*
> + * GHCB page is allocated by paravisor. The address
> + * returned by MSR_AMD64_SEV_ES_GHCB is above shared
> + * ghcb boundary and map it here.
> + */
> + rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
> + ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> + if (!ghcb_va)
> + return -ENOMEM;
> +
> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> + *ghcb_base = ghcb_va;
> +
> + return 0;
> +}
> +
> static int hv_cpu_init(unsigned int cpu)
> {
> union hv_vp_assist_msr_contents msr = { 0 };
> @@ -85,6 +111,8 @@ static int hv_cpu_init(unsigned int cpu)
> }
> }
>
> + hyperv_init_ghcb();
> +
> return 0;
> }
>
> @@ -177,6 +205,14 @@ static int hv_cpu_die(unsigned int cpu)
> {
> struct hv_reenlightenment_control re_ctrl;
> unsigned int new_cpu;
> + void **ghcb_va = NULL;
I'm not seeing any reason why this needs to be initialized.
> +
> + if (ms_hyperv.ghcb_base) {
> + ghcb_va = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> + if (*ghcb_va)
> + memunmap(*ghcb_va);
> + *ghcb_va = NULL;
> + }
>
> hv_common_cpu_die(cpu);
>
> @@ -383,9 +419,19 @@ void __init hyperv_init(void)
> VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
> VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> __builtin_return_address(0));
> - if (hv_hypercall_pg == NULL) {
> - wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
> - goto remove_cpuhp_state;
> + if (hv_hypercall_pg == NULL)
> + goto clean_guest_os_id;
> +
> + if (hv_isolation_type_snp()) {
> + ms_hyperv.ghcb_base = alloc_percpu(void *);
> + if (!ms_hyperv.ghcb_base)
> + goto clean_guest_os_id;
> +
> + if (hyperv_init_ghcb()) {
> + free_percpu(ms_hyperv.ghcb_base);
> + ms_hyperv.ghcb_base = NULL;
> + goto clean_guest_os_id;
> + }
Having the GHCB setup code here splits the hypercall page setup into
two parts, which is unexpected. First the memory is allocated
for the hypercall page, then the GHCB stuff is done, then the hypercall
MSR is setup. Is there a need to do this split? Also, if the GHCB stuff
fails and you goto clean_guest_os_id, the memory allocated for the
hypercall page is never freed.
It's also unexpected to have hyperv_init_ghcb() called here and called
in hv_cpu_init(). Wouldn't it be possible to setup ghcb_base *before*
cpu_setup_state() is called, so that hv_cpu_init() would take care of
calling hyperv_init_ghcb() for the boot CPU? That's the pattern used
by the VP assist page, the percpu input page, etc.
> }
>
> rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> @@ -456,7 +502,8 @@ void __init hyperv_init(void)
> hv_query_ext_cap(0);
> return;
>
> -remove_cpuhp_state:
> +clean_guest_os_id:
> + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
> cpuhp_remove_state(cpuhp);
> free_vp_assist_page:
> kfree(hv_vp_assist_page);
> @@ -484,6 +531,9 @@ void hyperv_cleanup(void)
> */
> hv_hypercall_pg = NULL;
>
> + if (ms_hyperv.ghcb_base)
> + free_percpu(ms_hyperv.ghcb_base);
> +
I don't think this cleanup is necessary. The primary purpose of
hyperv_cleanup() is to ensure that things like overlay pages are
properly reset in Hyper-V before doing a kexec(), or before
panic'ing and running the kdump kernel. There's no need to do
general memory free'ing in Linux. Doing so just adds to the risk
that the panic path could itself fail.
> /* Reset the hypercall page */
> hypercall_msr.as_uint64 = 0;
> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> @@ -559,3 +609,11 @@ bool hv_is_isolation_supported(void)
> {
> return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
> }
> +
> +DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
> +
> +bool hv_isolation_type_snp(void)
> +{
> + return static_branch_unlikely(&isolation_type_snp);
> +}
> +EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index adccbc209169..6627cfd2bfba 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -11,6 +11,8 @@
> #include <asm/paravirt.h>
> #include <asm/mshyperv.h>
>
> +DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
> +
> typedef int (*hyperv_fill_flush_list_func)(
> struct hv_guest_mapping_flush_list *flush,
> void *data);
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index c1ab6a6e72b5..4269f3174e58 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -36,6 +36,7 @@ struct ms_hyperv_info {
> u32 max_lp_index;
> u32 isolation_config_a;
> u32 isolation_config_b;
> + void __percpu **ghcb_base;
This doesn't feel like the right place to put this pointer. The other
fields in the ms_hyperv_info structure are just fixed values obtained
from the CPUID instruction. The existing patterns similar to ghcb_base
are the VP assist page and the percpu input and output args. They are
all based on standalone global variables. It would be more consistent
to do the same with the ghcb_base.
> };
> extern struct ms_hyperv_info ms_hyperv;
>
> @@ -237,6 +238,7 @@ bool hv_is_hyperv_initialized(void);
> bool hv_is_hibernation_supported(void);
> enum hv_isolation_type hv_get_isolation_type(void);
> bool hv_is_isolation_supported(void);
> +bool hv_isolation_type_snp(void);
> void hyperv_cleanup(void);
> bool hv_query_ext_cap(u64 cap_query);
> #else /* CONFIG_HYPERV */
> --
> 2.25.1