[PATCH] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

From: Feng Wu
Date: Sun Nov 08 2015 - 22:04:25 EST


Use vector-hashing to handle lowest-priority interrupts for
posted-interrupts. As an example, modern Intel CPUs use this
method to handle lowest-priority interrupts.

Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/irq_comm.c | 52 +++++++++++++++++++++++++++++++++++++
arch/x86/kvm/lapic.c | 57 +++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/lapic.h | 2 ++
arch/x86/kvm/vmx.c | 14 ++++++++--
5 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9265196..e225106 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1258,6 +1258,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);

bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq);

void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..8156e45 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -266,6 +266,58 @@ out:
return r;
}

+/*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ * vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ * destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+ struct kvm_lapic_irq *irq)
+
+{
+ unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int dest_vcpus = 0;
+ struct kvm_vcpu *vcpu;
+ unsigned int i, mod, idx = 0;
+
+ vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
+ if (vcpu)
+ return vcpu;
+
+ memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!kvm_apic_present(vcpu))
+ continue;
+
+ if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+ irq->dest_id, irq->dest_mode))
+ continue;
+
+ __set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
+ dest_vcpus++;
+ }
+
+ if (dest_vcpus == 0)
+ return NULL;
+
+ mod = irq->vector % dest_vcpus;
+
+ for (i = 0; i <= mod; i++) {
+ idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + 1;
+ BUG_ON(idx >= KVM_MAX_VCPUS);
+ }
+
+ return kvm_get_vcpu(kvm, idx - 1);
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ecd4ea1..4937aa4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -816,6 +816,63 @@ out:
return ret;
}

+struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
+ struct kvm_lapic_irq *irq)
+{
+ struct kvm_apic_map *map;
+ struct kvm_vcpu *vcpu = NULL;
+
+ if (irq->shorthand)
+ return NULL;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (!map)
+ goto out;
+
+ if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+ kvm_lowest_prio_delivery(irq)) {
+ u16 cid;
+ int i, idx = 0;
+ unsigned long bitmap = 1;
+ unsigned int mod, dest_vcpus = 0;
+ struct kvm_lapic **dst = NULL;
+
+
+ if (!kvm_apic_logical_map_valid(map))
+ goto out;
+
+ apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+ if (cid >= ARRAY_SIZE(map->logical_map))
+ goto out;
+
+ dst = map->logical_map[cid];
+
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i])
+ continue;
+
+ dest_vcpus++;
+ }
+
+ mod = irq->vector % dest_vcpus;
+
+ for (i = 0; i <= mod; i++) {
+ idx = find_next_bit(&bitmap, KVM_MAX_VCPUS, idx) + 1;
+ BUG_ON(idx >= KVM_MAX_VCPUS);
+ }
+
+ if (kvm_apic_present(dst[idx-1]->vcpu))
+ vcpu = dst[idx-1]->vcpu;
+ }
+
+out:
+ rcu_read_unlock();
+ return vcpu;
+}
+
/*
* Add a pending IRQ into lapic.
* Return 1 if successfully added and 0 if discarded.
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index fde8e35d..a6a775d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -170,4 +170,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu);

bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
+ struct kvm_lapic_irq *irq);
#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5eb56ed..57f71ee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -159,6 +159,9 @@ static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, int, S_IRUGO);

+static bool __read_mostly enable_pi_vector_hashing = 1;
+module_param(enable_pi_vector_hashing, bool, S_IRUGO);
+
extern const ulong vmx_return;

#define NR_AUTOLOAD_MSRS 8
@@ -10702,8 +10705,15 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
*/

kvm_set_msi_irq(e, &irq);
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
- continue;
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+ if ((!enable_pi_vector_hashing ||
+ irq.delivery_mode != APIC_DM_LOWEST))
+ continue;
+
+ vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
+ if (!vcpu)
+ continue;
+ }

vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/