Re: [PATCH 2/6] x86: hv: hv_init.c: Replace alloc_page() with kmem_cache_alloc()

From: Vitaly Kuznetsov
Date: Fri Apr 05 2019 - 07:31:09 EST


Maya Nakamura <m.maya.nakamura@xxxxxxxxx> writes:

> Switch from the function that allocates a single Linux guest page to a
> different one to use a Hyper-V page because the guest page size and
> hypervisor page size concepts are different, even though they happen to
> be the same value on x86.
>
> Signed-off-by: Maya Nakamura <m.maya.nakamura@xxxxxxxxx>
> ---
> arch/x86/hyperv/hv_init.c | 38 +++++++++++++++++++++++++++-----------
> 1 file changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index e4ba467a9fc6..5f946135aa18 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -31,6 +31,7 @@
> #include <linux/hyperv.h>
> #include <linux/slab.h>
> #include <linux/cpuhotplug.h>
> +#include <asm/set_memory.h>
>
> #ifdef CONFIG_HYPERV_TSCPAGE
>
> @@ -98,18 +99,20 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
> u32 hv_max_vp_index;
> EXPORT_SYMBOL_GPL(hv_max_vp_index);
>
> +struct kmem_cache *cachep;
> +EXPORT_SYMBOL_GPL(cachep);
> +
> static int hv_cpu_init(unsigned int cpu)
> {
> u64 msr_vp_index;
> struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
> void **input_arg;
> - struct page *pg;
>
> input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> - pg = alloc_page(GFP_KERNEL);
> - if (unlikely(!pg))
> + *input_arg = kmem_cache_alloc(cachep, GFP_KERNEL);

I'm not sure use of kmem_cache is justified here: pages we allocate are
not cache-line and all these allocations are supposed to persist for the
lifetime of the guest. In case you think that even on x86 it will be
possible to see PAGE_SIZE != HV_HYP_PAGE_SIZE you can use alloc_pages()
instead.

Also, in case the idea is to generalize stuff, what will happen if
PAGE_SIZE > HV_HYP_PAGE_SIZE? Who will guarantee proper alignment?

I think we can leave hypercall arguments, vp_assist and similar pages
alone for now: the code is not going to be shared among architectures
anyways.

> +
> + if (unlikely(!*input_arg))
> return -ENOMEM;
> - *input_arg = page_address(pg);
>
> hv_get_vp_index(msr_vp_index);
>
> @@ -122,14 +125,12 @@ static int hv_cpu_init(unsigned int cpu)
> return 0;
>
> if (!*hvp)
> - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
> + *hvp = kmem_cache_alloc(cachep, GFP_KERNEL);
>
> if (*hvp) {
> u64 val;
>
> - val = vmalloc_to_pfn(*hvp);
> - val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
> - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
> + val = virt_to_phys(*hvp) | HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
>
> wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
> }
> @@ -233,17 +234,22 @@ static int hv_cpu_die(unsigned int cpu)
> unsigned long flags;
> void **input_arg;
> void *input_pg = NULL;
> + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu];
>
> local_irq_save(flags);
> input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> input_pg = *input_arg;
> *input_arg = NULL;
> local_irq_restore(flags);
> - free_page((unsigned long)input_pg);
> + kmem_cache_free(cachep, input_pg);
> + input_pg = NULL;
>
> if (hv_vp_assist_page && hv_vp_assist_page[cpu])
> wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
>
> + kmem_cache_free(cachep, *hvp);
> + *hvp = NULL;
> +
> if (hv_reenlightenment_cb == NULL)
> return 0;
>
> @@ -325,6 +331,11 @@ void __init hyperv_init(void)
> goto free_vp_index;
> }
>
> + cachep = kmem_cache_create("hyperv_pages", HV_HYP_PAGE_SIZE,
> + HV_HYP_PAGE_SIZE, 0, NULL);
> + if (!cachep)
> + goto free_vp_assist_page;
> +
> cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
> hv_cpu_init, hv_cpu_die);
> if (cpuhp < 0)
> @@ -338,7 +349,10 @@ void __init hyperv_init(void)
> guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
> wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
>
> - hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
> + hv_hypercall_pg = kmem_cache_alloc(cachep, GFP_KERNEL);
> + if (hv_hypercall_pg)
> + set_memory_x((unsigned long)hv_hypercall_pg, 1);

_RX is not writeable, right?

> +
> if (hv_hypercall_pg == NULL) {
> wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
> goto remove_cpuhp_state;
> @@ -346,7 +360,8 @@ void __init hyperv_init(void)
>
> rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> hypercall_msr.enable = 1;
> - hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
> + hypercall_msr.guest_physical_address = virt_to_phys(hv_hypercall_pg) >>
> + HV_HYP_PAGE_SHIFT;
> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
>
> hv_apic_init();
> @@ -416,6 +431,7 @@ void hyperv_cleanup(void)
> * let hypercall operations fail safely rather than
> * panic the kernel for using invalid hypercall page
> */
> + kmem_cache_free(cachep, hv_hypercall_pg);

Please don't do that: hyperv_cleanup() is called on kexec/kdump and
we're trying to do the bare minimum to allow next kernel to boot. Doing
excessive work here will likely lead to consequent problems (we're
already crashing the case it's kdump!).

> hv_hypercall_pg = NULL;
>
> /* Reset the hypercall page */

--
Vitaly