Re: [PATCH v5 5/5] KVM: x86: hyperv: implement PV IPI send hypercalls

From: Roman Kagan
Date: Tue Aug 28 2018 - 13:55:45 EST


On Mon, Aug 27, 2018 at 06:48:58PM +0200, Vitaly Kuznetsov wrote:
> Using hypercall for sending IPIs is faster because this allows to specify
> any number of vCPUs (even > 64 with sparse CPU set), the whole procedure
> will take only one VMEXIT.
>
> Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi
> hypercall can't be 'fast' (passing parameters through registers) but
> apparently this is not true, Windows always uses it as 'fast' so we need
> to support that.
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
> ---
> Documentation/virtual/kvm/api.txt | 7 +++
> arch/x86/kvm/hyperv.c | 105 ++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/trace.h | 42 +++++++++++++++
> arch/x86/kvm/x86.c | 1 +
> include/uapi/linux/kvm.h | 1 +
> 5 files changed, 156 insertions(+)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index c664064f76fb..d6fb93f22c0b 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -4762,3 +4762,10 @@ CPU when the exception is taken. If this virtual SError is taken to EL1 using
> AArch64, this value will be reported in the ISS field of ESR_ELx.
>
> See KVM_CAP_VCPU_EVENTS for more details.
> +8.20 KVM_CAP_HYPERV_SEND_IPI
> +
> +Architectures: x86
> +
> +This capability indicates that KVM supports paravirtualized Hyper-V IPI send
> +hypercalls:
> +HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d1a911132b59..dadec987a39b 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1360,6 +1360,97 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
> ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
> }
>
> +static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
> + bool ex, bool fast)
> +{
> + struct kvm *kvm = current_vcpu->kvm;
> + struct hv_send_ipi_ex send_ipi_ex;
> + struct hv_send_ipi send_ipi;
> + struct kvm_vcpu *vcpu;
> + unsigned long valid_bank_mask;
> + u64 sparse_banks[64];
> + int sparse_banks_len, bank, i;
> + struct kvm_lapic_irq irq = {.delivery_mode = APIC_DM_FIXED};
> + bool all_cpus;
> +
> + if (!ex) {
> + if (!fast) {
> + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
> + sizeof(send_ipi))))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + sparse_banks[0] = send_ipi.cpu_mask;
> + irq.vector = send_ipi.vector;
> + } else {
> + /* 'reserved' part of hv_send_ipi should be 0 */
> + if (unlikely(ingpa >> 32 != 0))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + sparse_banks[0] = outgpa;
> + irq.vector = (u32)ingpa;
> + }
> + all_cpus = false;
> + valid_bank_mask = BIT_ULL(0);
> +
> + trace_kvm_hv_send_ipi(irq.vector, sparse_banks[0]);
> + } else {
> + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
> + sizeof(send_ipi_ex))))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +
> + trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
> + send_ipi_ex.vp_set.format,
> + send_ipi_ex.vp_set.valid_bank_mask);
> +
> + irq.vector = send_ipi_ex.vector;
> + valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
> + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
> + sizeof(sparse_banks[0]);
> +
> + all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
> +
> + if (!sparse_banks_len)
> + goto ret_success;
> +
> + if (!all_cpus &&
> + kvm_read_guest(kvm,
> + ingpa + offsetof(struct hv_send_ipi_ex,
> + vp_set.bank_contents),
> + sparse_banks,
> + sparse_banks_len))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + }
> +
> + if ((irq.vector < HV_IPI_LOW_VECTOR) ||
> + (irq.vector > HV_IPI_HIGH_VECTOR))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +
> + if (all_cpus) {
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + /* We fail only when APIC is disabled */
> + if (!kvm_apic_set_irq(vcpu, &irq, NULL))
> + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> + }
> + goto ret_success;
> + }
> +
> + for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64) {
> + for_each_set_bit(i, (unsigned long *)&sparse_banks[bank], 64) {
> + u32 vp_index = bank * 64 + i;
> + struct kvm_vcpu *vcpu =
> + get_vcpu_by_vpidx(kvm, vp_index);
> +
> + /* Unknown vCPU specified */
> + if (!vcpu)
> + continue;
> +
> + /* We fail only when APIC is disabled */
> + kvm_apic_set_irq(vcpu, &irq, NULL);
> + }
> + }
> +
> +ret_success:
> + return HV_STATUS_SUCCESS;
> +}
> +

I still think that splitting kvm_hv_send_ipi into three functions would
make it more readable, but that's a matter of taste of course, so I'm OK
if Radim insists otherwise.

Reviewed-by: Roman Kagan <rkagan@xxxxxxxxxxxxx>