Re: [PATCH 04/18] KVM: x86: hyper-v: Introduce VTL awareness to Hyper-V's PV-IPIs

From: Sean Christopherson
Date: Fri Sep 13 2024 - 14:03:04 EST


On Sun, Jun 09, 2024, Nicolas Saenz Julienne wrote:
> HvCallSendSyntheticClusterIpi and HvCallSendSyntheticClusterIpiEx allow
> sending VTL-aware IPIs. Honour the hcall by exiting to user-space upon
> receiving a request with a valid VTL target. This behaviour is only
> available if the VSM CPUID flag is available and exposed to the guest.
> It doesn't introduce a behaviour change otherwise.
>
> User-space is accountable for the correct processing of the PV-IPI
> before resuming execution.
>
> Signed-off-by: Nicolas Saenz Julienne <nsaenz@xxxxxxxxxx>
> ---
> arch/x86/kvm/hyperv.c | 19 ++++++++++++++++++-
> 1 file changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 42f44546fe79c..d00baf3ffb165 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2217,16 +2217,20 @@ static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
>
> static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> {
> + bool vsm_enabled = kvm_hv_cpuid_vsm_enabled(vcpu);
> struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> u64 *sparse_banks = hv_vcpu->sparse_banks;
> struct kvm *kvm = vcpu->kvm;
> struct hv_send_ipi_ex send_ipi_ex;
> struct hv_send_ipi send_ipi;
> + union hv_input_vtl *in_vtl;
> u64 valid_bank_mask;
> + int rsvd_shift;
> u32 vector;
> bool all_cpus;
>
> if (hc->code == HVCALL_SEND_IPI) {
> + in_vtl = &send_ipi.in_vtl;

I don't see any value in having a local pointer to a union. Just use send_ipi.in_vtl.

> if (!hc->fast) {
> if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
> sizeof(send_ipi))))
> @@ -2235,16 +2239,22 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> vector = send_ipi.vector;
> } else {
> /* 'reserved' part of hv_send_ipi should be 0 */
> - if (unlikely(hc->ingpa >> 32 != 0))
> + rsvd_shift = vsm_enabled ? 40 : 32;
> + if (unlikely(hc->ingpa >> rsvd_shift != 0))
> return HV_STATUS_INVALID_HYPERCALL_INPUT;

The existing error handling doesn't make any sense to me. Why is this the _only_
path that enforces reserved bits?

Regarding the shift, I think it makes more sense to do:

/* Bits 63:40 are always reserved. */
if (unlikely(hc->ingpa >> 40 != 0))
return HV_STATUS_INVALID_HYPERCALL_INPUT;

send_ipi.in_vtl.as_uint8 = (u8)(hc->ingpa >> 32);
if (unlikely(!vsm_enabled && send_ipi.in_vtl.as_uint8))
return HV_STATUS_INVALID_HYPERCALL_INPUT;

so that it's more obvious exactly what is/isn't reserved when VSM isn't/is enabled.

> + in_vtl->as_uint8 = (u8)(hc->ingpa >> 32);
> sparse_banks[0] = hc->outgpa;
> vector = (u32)hc->ingpa;
> }
> all_cpus = false;
> valid_bank_mask = BIT_ULL(0);
>
> + if (in_vtl->use_target_vtl)

Due to the lack of error checking for the !hc->fast case, this will do the wrong
thing if vsm_enabled=false.

> + return -ENODEV;
> +
> trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
> } else {
> + in_vtl = &send_ipi_ex.in_vtl;
> if (!hc->fast) {
> if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
> sizeof(send_ipi_ex))))
> @@ -2253,8 +2263,12 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> send_ipi_ex.vector = (u32)hc->ingpa;
> send_ipi_ex.vp_set.format = hc->outgpa;
> send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]);
> + in_vtl->as_uint8 = (u8)(hc->ingpa >> 32);
> }
>
> + if (vsm_enabled && in_vtl->use_target_vtl)
> + return -ENODEV;
> +
> trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
> send_ipi_ex.vp_set.format,
> send_ipi_ex.vp_set.valid_bank_mask);
> @@ -2682,6 +2696,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> break;
> }
> ret = kvm_hv_send_ipi(vcpu, &hc);
> + /* VTL-enabled ipi, let user-space handle it */
> + if (ret == -ENODEV)

I generally don't love "magic" error codes, but I don't see an obvious better
solution either. The other weird thing is that "ret" is a u64, versus the more
common int or even long. I doubt it's problematic in practice, just a bit odd.

> + goto hypercall_userspace_exit;
> break;
> case HVCALL_POST_DEBUG_DATA:
> case HVCALL_RETRIEVE_DEBUG_DATA:
> --
> 2.40.1
>