RE: [RFC PATCH V2 03/18] x86/hyperv: apic change for sev-snp enlightened guest

From: Michael Kelley (LINUX)
Date: Mon Dec 12 2022 - 14:02:43 EST


From: Tianyu Lan <ltykernel@xxxxxxxxx> Sent: Friday, November 18, 2022 7:46 PM
>
> Hyperv sev-snp enlightened guest doesn't support x2apic and

How does lack of support for x2apic factor into the code below? I
didn't see anything specific to the x2apic, but maybe I'm just not
knowledgeable about the association.

> apic page read/write operation. Bypass these requests. ipi
> request maybe returned with timeout error code and add retry
> mechanism.
>
> Signed-off-by: Tianyu Lan <tiala@xxxxxxxxxxxxx>
> ---
> arch/x86/hyperv/hv_apic.c | 79 ++++++++++++++++++++++++-------
> include/asm-generic/hyperv-tlfs.h | 1 +
> 2 files changed, 63 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index fb8b2c088681..214354d20833 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -66,9 +66,15 @@ static u32 hv_apic_read(u32 reg)
> rdmsr(HV_X64_MSR_TPR, reg_val, hi);
> (void)hi;
> return reg_val;
> -
> + case APIC_ID:
> + if (hv_isolation_type_en_snp())
> + return smp_processor_id();

Hmmm. The Linux processor ID is not always equal to the APIC ID.
The specific case I'm aware of is a VM with multiple NUMA nodes,
where the number of vCPUs in a NUMA node is not a power of 2.
In that case, there's a gap in the APIC IDs, but not in the Linux
processor IDs. But even outside that specific case, there's no
guarantee that the Linux processor IDs match the APIC IDs.

What specific code is trying to read the APIC ID this way? That use
case may influence choosing an alternate method of getting the
APIC ID that will be correct in all cases.

> + fallthrough;
> default:
> - return native_apic_mem_read(reg);
> + if (!hv_isolation_type_en_snp())
> + return native_apic_mem_read(reg);
> + else
> + return 0;

At first glance, just silently returning zero seems dangerous.
Is it a scenario that we never expect this to happen? Or is there a
known set of scenarios where we know it is OK to return zero? If
the former, what about using WARN_ON_ONCE(1) to catch any
unexpected uses?

In any case, a comment with an explanation would help folks
in the future who look at this code.

> }
> }
>
> @@ -82,7 +88,8 @@ static void hv_apic_write(u32 reg, u32 val)
> wrmsr(HV_X64_MSR_TPR, val, 0);
> break;
> default:
> - native_apic_mem_write(reg, val);
> + if (!hv_isolation_type_en_snp())
> + native_apic_mem_write(reg, val);

Same here. Doing nothing seems dangerous.

> }
> }
>
> @@ -106,6 +113,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> struct hv_send_ipi_ex *ipi_arg;
> unsigned long flags;
> int nr_bank = 0;
> + int retry = 5;
> u64 status = HV_STATUS_INVALID_PARAMETER;
>
> if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
> @@ -144,8 +152,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
> }
>
> - status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
> + do {
> + status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
> ipi_arg, NULL);
> + } while (status == HV_STATUS_TIME_OUT && retry--);

Since the u64 status returned by hv_do_hypercall contains other fields besides
just the hypercall result, test "hv_result(status)" instead of just "status"

>
> ipi_mask_ex_done:
> local_irq_restore(flags);
> @@ -159,6 +169,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
> struct hv_send_ipi ipi_arg;
> u64 status;
> unsigned int weight;
> + int retry = 5;
>
> trace_hyperv_send_ipi_mask(mask, vector);
>
> @@ -212,8 +223,11 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
> __set_bit(vcpu, (unsigned long *)&ipi_arg.cpu_mask);
> }
>
> - status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
> - ipi_arg.cpu_mask);
> + do {
> + status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
> + ipi_arg.cpu_mask);
> + } while (status == HV_STATUS_TIME_OUT && retry--);
> +

Same here. Test "hv_result(status)".


> return hv_result_success(status);
>
> do_ex_hypercall:
> @@ -224,6 +238,7 @@ static bool __send_ipi_one(int cpu, int vector)
> {
> int vp = hv_cpu_number_to_vp_number(cpu);
> u64 status;
> + int retry = 5;
>
> trace_hyperv_send_ipi_one(cpu, vector);
>
> @@ -236,26 +251,48 @@ static bool __send_ipi_one(int cpu, int vector)
> if (vp >= 64)
> return __send_ipi_mask_ex(cpumask_of(cpu), vector, false);
>
> - status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
> + do {
> + status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
> + } while (status == HV_STATUS_TIME_OUT || retry--);
> +

Same here. And I think you want "&&" like in the previous two cases
instead of "||".

> return hv_result_success(status);
> }
>
> static void hv_send_ipi(int cpu, int vector)
> {
> - if (!__send_ipi_one(cpu, vector))
> - orig_apic.send_IPI(cpu, vector);
> + if (!__send_ipi_one(cpu, vector)) {
> + if (!hv_isolation_type_en_snp())
> + orig_apic.send_IPI(cpu, vector);
> + else
> + WARN_ON_ONCE(1);
> + }
> }
>
> static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
> {
> - if (!__send_ipi_mask(mask, vector, false))
> - orig_apic.send_IPI_mask(mask, vector);
> + if (!__send_ipi_mask(mask, vector, false)) {
> + if (!hv_isolation_type_en_snp())
> + orig_apic.send_IPI_mask(mask, vector);
> + else
> + WARN_ON_ONCE(1);
> + }
> }
>
> static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> {
> - if (!__send_ipi_mask(mask, vector, true))
> - orig_apic.send_IPI_mask_allbutself(mask, vector);
> + unsigned int this_cpu = smp_processor_id();
> + struct cpumask new_mask;
> + const struct cpumask *local_mask;
> +
> + cpumask_copy(&new_mask, mask);
> + cpumask_clear_cpu(this_cpu, &new_mask);
> + local_mask = &new_mask;
> + if (!__send_ipi_mask(local_mask, vector, true)) {
> + if (!hv_isolation_type_en_snp())
> + orig_apic.send_IPI_mask_allbutself(mask, vector);
> + else
> + WARN_ON_ONCE(1);
> + }
> }
>
> static void hv_send_ipi_allbutself(int vector)
> @@ -265,14 +302,22 @@ static void hv_send_ipi_allbutself(int vector)
>
> static void hv_send_ipi_all(int vector)
> {
> - if (!__send_ipi_mask(cpu_online_mask, vector, false))
> - orig_apic.send_IPI_all(vector);
> + if (!__send_ipi_mask(cpu_online_mask, vector, false)) {
> + if (!hv_isolation_type_en_snp())
> + orig_apic.send_IPI_all(vector);
> + else
> + WARN_ON_ONCE(1);
> + }
> }
>
> static void hv_send_ipi_self(int vector)
> {
> - if (!__send_ipi_one(smp_processor_id(), vector))
> - orig_apic.send_IPI_self(vector);
> + if (!__send_ipi_one(smp_processor_id(), vector)) {
> + if (!hv_isolation_type_en_snp())
> + orig_apic.send_IPI_self(vector);
> + else
> + WARN_ON_ONCE(1);
> + }
> }
>
> void __init hv_apic_init(void)
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index fdce7a4cfc6f..6e2a090e2649 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -208,6 +208,7 @@ enum HV_GENERIC_SET_FORMAT {
> #define HV_STATUS_INVALID_PORT_ID 17
> #define HV_STATUS_INVALID_CONNECTION_ID 18
> #define HV_STATUS_INSUFFICIENT_BUFFERS 19
> +#define HV_STATUS_TIME_OUT 0x78
>
> /*
> * The Hyper-V TimeRefCount register and the TSC
> --
> 2.25.1