RE: [RFC PATCH 07/18] virt/mshv: withdraw memory hypercall

From: Michael Kelley
Date: Mon Feb 08 2021 - 15:56:22 EST


From: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> Sent: Friday, November 20, 2020 4:30 PM
>
> Withdraw the memory from a finalized partition and free the pages.
> The partition is now cleaned up correctly when the fd is released.
>
> Co-developed-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx>
> Signed-off-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx>
> Signed-off-by: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx>
> ---
> include/asm-generic/hyperv-tlfs.h | 10 ++++++
> virt/mshv/mshv_main.c | 54 ++++++++++++++++++++++++++++++-
> 2 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index ab6ae6c164f5..2a49503b7396 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -148,6 +148,7 @@ struct ms_hyperv_tsc_page {
> #define HVCALL_DELETE_PARTITION 0x0043
> #define HVCALL_GET_PARTITION_ID 0x0046
> #define HVCALL_DEPOSIT_MEMORY 0x0048
> +#define HVCALL_WITHDRAW_MEMORY 0x0049
> #define HVCALL_CREATE_VP 0x004e
> #define HVCALL_GET_VP_REGISTERS 0x0050
> #define HVCALL_SET_VP_REGISTERS 0x0051
> @@ -472,6 +473,15 @@ union hv_proximity_domain_info {
> u64 as_uint64;
> };
>
> +struct hv_withdraw_memory_in {
> + u64 partition_id;
> + union hv_proximity_domain_info proximity_domain_info;
> +};
> +
> +struct hv_withdraw_memory_out {
> + u64 gpa_page_list[0];

For a variable size array, the Linux kernel community has an effort
underway to replace occurrences of [0] and [1] with just []. I think
[] can be used here.

> +};
> +

Add __packed to the above two structs.

> struct hv_lp_startup_status {
> u64 hv_status;
> u64 substatus1;
> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
> index c4130a6508e5..162a1bb42a4a 100644
> --- a/virt/mshv/mshv_main.c
> +++ b/virt/mshv/mshv_main.c
> @@ -14,6 +14,7 @@
> #include <linux/slab.h>
> #include <linux/file.h>
> #include <linux/anon_inodes.h>
> +#include <linux/mm.h>
> #include <linux/mshv.h>
> #include <asm/mshyperv.h>
>
> @@ -57,8 +58,58 @@ static struct miscdevice mshv_dev = {
> .mode = 600,
> };
>
> +#define HV_WITHDRAW_BATCH_SIZE (PAGE_SIZE / sizeof(u64))

Use HV_HYP_PAGE_SIZE so that we're explicit that the dependency
is on the page size used by Hyper-V, which might be different from the
guest page size (at least on architectures like ARM64).

> #define HV_INIT_PARTITION_DEPOSIT_PAGES 208
>
> +static int
> +hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
> +{
> + struct hv_withdraw_memory_in *input_page;
> + struct hv_withdraw_memory_out *output_page;
> + u16 completed;
> + u64 hypercall_status;
> + unsigned long remaining = count;
> + int status;
> + int i;
> + unsigned long flags;
> +
> + while (remaining) {
> + local_irq_save(flags);
> +
> + input_page = (struct hv_withdraw_memory_in *)(*this_cpu_ptr(
> + hyperv_pcpu_input_arg));
> + output_page = (struct hv_withdraw_memory_out *)(*this_cpu_ptr(
> + hyperv_pcpu_output_arg));
> +
> + input_page->partition_id = partition_id;
> + input_page->proximity_domain_info.as_uint64 = 0;
> + hypercall_status = hv_do_rep_hypercall(
> + HVCALL_WITHDRAW_MEMORY,
> + min(remaining, HV_WITHDRAW_BATCH_SIZE), 0, input_page,
> + output_page);
> +
> + completed = (hypercall_status & HV_HYPERCALL_REP_COMP_MASK) >>
> + HV_HYPERCALL_REP_COMP_OFFSET;
> +
> + for (i = 0; i < completed; i++)
> + __free_page(pfn_to_page(output_page->gpa_page_list[i]));
> +
> + local_irq_restore(flags);

Seems like there's some risk that we have interrupts disabled for too long.
We could be calling __free_page() up to 512 times. It might be better for this
function to allocate its own page to be used as the output page, so that interrupts
can be enabled immediately after the hypercall completes. Then the __free_page()
loop can execute with interrupts enabled. We have the per-cpu input and output
pages to avoid the overhead of allocating/freeing pages for each hypercall, but in this
case a private output page might be warranted.

> +
> + status = hypercall_status & HV_HYPERCALL_RESULT_MASK;
> + if (status != HV_STATUS_SUCCESS) {
> + if (status != HV_STATUS_NO_RESOURCES)
> + pr_err("%s: %s\n", __func__,
> + hv_status_to_string(status));
> + break;
> + }
> +
> + remaining -= completed;
> + }
> +
> + return -hv_status_to_errno(status);
> +}
> +
> static int
> hv_call_create_partition(
> u64 flags,
> @@ -230,7 +281,8 @@ destroy_partition(struct mshv_partition *partition)
>
> /* Deallocates and unmaps everything including vcpus, GPA mappings etc */
> hv_call_finalize_partition(partition->id);
> - /* TODO: Withdraw and free all pages we deposited */
> + /* Withdraw and free all pages we deposited */
> + hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->id);
>
> hv_call_delete_partition(partition->id);
>
> --
> 2.25.1