RE: [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE

From: Sunil Muthuswamy
Date: Sat Jul 31 2021 - 16:36:47 EST


> For Root partition the VP assist pages are pre-determined by the
> hypervisor. The Root kernel is not allowed to change them to
> different locations. And thus, we are getting below stack as in
> current implementation Root is trying to perform write to specific
> MSR.
>
> [ 2.778197] unchecked MSR access error: WRMSR to 0x40000073 (tried to
> write 0x0000000145ac5001) at rIP: 0xffffffff810c1084
> (native_write_msr+0x4/0x30)
> [ 2.784867] Call Trace:
> [ 2.791507] hv_cpu_init+0xf1/0x1c0
> [ 2.798144] ? hyperv_report_panic+0xd0/0xd0
> [ 2.804806] cpuhp_invoke_callback+0x11a/0x440
> [ 2.811465] ? hv_resume+0x90/0x90
> [ 2.818137] cpuhp_issue_call+0x126/0x130
> [ 2.824782] __cpuhp_setup_state_cpuslocked+0x102/0x2b0
> [ 2.831427] ? hyperv_report_panic+0xd0/0xd0
> [ 2.838075] ? hyperv_report_panic+0xd0/0xd0
> [ 2.844723] ? hv_resume+0x90/0x90
> [ 2.851375] __cpuhp_setup_state+0x3d/0x90
> [ 2.858030] hyperv_init+0x14e/0x410
> [ 2.864689] ? enable_IR_x2apic+0x190/0x1a0
> [ 2.871349] apic_intr_mode_init+0x8b/0x100
> [ 2.878017] x86_late_time_init+0x20/0x30
> [ 2.884675] start_kernel+0x459/0x4fb
> [ 2.891329] secondary_startup_64_no_verify+0xb0/0xbb
>
> Since, the hypervisor already provides the VP assist page for root
> partition, we need to memremap the memory from hypervisor for root
> kernel to use. The mapping is done in hv_cpu_init during bringup and
> is unmaped in hv_cpu_die during teardown.
>
> Signed-off-by: Praveen Kumar <kumarpraveen@xxxxxxxxxxxxxxxxxxx>
> ---
> arch/x86/hyperv/hv_init.c | 64 ++++++++++++++++++++----------
> arch/x86/include/asm/hyperv-tlfs.h | 9 +++++
> 2 files changed, 53 insertions(+), 20 deletions(-)
>
> changelog:
> v1: initial patch
> v2: commit message changes, removal of HV_MSR_APIC_ACCESS_AVAILABLE
> check and addition of null check before reading the VP assist MSR
> for root partition
> v3: added new data structure to handle VP ASSIST MSR page and done
> handling in hv_cpu_init and hv_cpu_die
> v4: better code alignment, VP ASSIST handling correction for root
> partition in hv_cpu_die and renaming of hv_vp_assist_msr_contents
> attribute
> v5: disable VP ASSIST page for root partition during hv_cpu_die
> ---
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 6f247e7e07eb..a46bd92c532a 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -44,6 +44,7 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
>
> static int hv_cpu_init(unsigned int cpu)
> {
> + union hv_vp_assist_msr_contents msr = {0};
> struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
> int ret;
>
> @@ -54,25 +55,34 @@ static int hv_cpu_init(unsigned int cpu)
> if (!hv_vp_assist_page)
> return 0;
>
> - /*
> - * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
> - * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
> - * we always write the EOI MSR in hv_apic_eoi_write() *after* the
> - * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
> - * not be stopped in the case of CPU offlining and the VM will hang.
> - */
> if (!*hvp) {
> - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
> - }
> -
> - if (*hvp) {
> - u64 val;
> -
> - val = vmalloc_to_pfn(*hvp);
> - val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
> - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
> -
> - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
> + if (hv_root_partition) {
> + /*
> + * For Root partition we get the hypervisor provided VP ASSIST
> + * PAGE, instead of allocating a new page.
> + */
> + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> + *hvp = memremap(msr.pfn <<
> + HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
> + PAGE_SIZE, MEMREMAP_WB);
> + } else {
> + /*
> + * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's
> + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
> + * out to make sure we always write the EOI MSR in
> + * hv_apic_eoi_write() *after* theEOI optimization is disabled
> + * in hv_cpu_die(), otherwise a CPU may not be stopped in the
> + * case of CPU offlining and the VM will hang.
> + */
> + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
> + if (*hvp)
> + msr.pfn = vmalloc_to_pfn(*hvp);
> + }
> + WARN_ON(!(*hvp));
> + if (*hvp) {
> + msr.enable = 1;
> + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> + }
> }
>
> return 0;
> @@ -170,8 +180,22 @@ static int hv_cpu_die(unsigned int cpu)
>
> hv_common_cpu_die(cpu);
>
> - if (hv_vp_assist_page && hv_vp_assist_page[cpu])
> - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
> + if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
> + union hv_vp_assist_msr_contents msr = {0};
> + if (hv_root_partition) {
> + /*
> + * For Root partition the VP ASSIST page is mapped to
> + * hypervisor provided page, and thus, we unmap the
> + * page here and nullify it, so that in future we have
> + * correct page address mapped in hv_cpu_init.
> + */
> + memunmap(hv_vp_assist_page[cpu]);
> + hv_vp_assist_page[cpu] = NULL;
> + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> + msr.enable = 0;
> + }
> + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> + }
>
> if (hv_reenlightenment_cb == NULL)
> return 0;
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index f1366ce609e3..2322d6bd5883 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -288,6 +288,15 @@ union hv_x64_msr_hypercall_contents {
> } __packed;
> };
>
> +union hv_vp_assist_msr_contents {
> + u64 as_uint64;
> + struct {
> + u64 enable:1;
> + u64 reserved:11;
> + u64 pfn:52;
> + } __packed;
> +};
> +
> struct hv_reenlightenment_control {
> __u64 vector:8;
> __u64 reserved1:8;
> --
> 2.25.1

Reviewed-by: Sunil Muthuswamy <sunilmut@xxxxxxxxxxxxx>