[RFC PATCH 24/67] KVM: x86: Add per-VM flag to disable in-kernel I/O APIC and level routes

From: isaku . yamahata
Date: Mon Nov 16 2020 - 13:32:45 EST


From: Kai Huang <kai.huang@xxxxxxxxxxxxxxx>

Add a flag to let TDX disallow the in-kernel I/O APIC, level triggered
routes for a userspace I/O APIC, and anything else that relies on being
able to intercept EOIs. TDX-SEAM does not allow intercepting EOI.

Note, technically KVM could partially emulate the I/O APIC by allowing
only edge triggered interrupts, but that adds a lot of complexity for
basically zero benefit. Ideally KVM wouldn't even allow I/O APIC route
reservation, but disabling that is a train wreck for Qemu.

Signed-off-by: Kai Huang <kai.huang@xxxxxxxxxxxxxxx>
Co-developed-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/ioapic.c | 4 ++++
arch/x86/kvm/irq_comm.c | 6 +++++-
arch/x86/kvm/lapic.c | 3 ++-
arch/x86/kvm/x86.c | 6 ++++++
5 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e5b706889d09..7537ba0bada2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -977,6 +977,7 @@ struct kvm_arch {

enum kvm_irqchip_mode irqchip_mode;
u8 nr_reserved_ioapic_pins;
+ bool eoi_intercept_unsupported;

bool disabled_lapic_found;

diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 698969e18fe3..e2de6e552d25 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -311,6 +311,10 @@ void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
if (!ioapic_in_kernel(kvm))
return;
+
+ if (WARN_ON_ONCE(kvm->arch.eoi_intercept_unsupported))
+ return;
+
kvm_make_scan_ioapic_request(kvm);
}

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 4aa1c2e00e2a..1523e9d66867 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -307,6 +307,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;

+ if (kvm->arch.eoi_intercept_unsupported &&
+ e->msi.data & (1 << MSI_DATA_TRIGGER_SHIFT))
+ return -EINVAL;
+
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
break;
@@ -390,7 +394,7 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)

void kvm_arch_post_irq_routing_update(struct kvm *kvm)
{
- if (!irqchip_split(kvm))
+ if (!irqchip_split(kvm) || kvm->arch.eoi_intercept_unsupported)
return;
kvm_make_scan_ioapic_request(kvm);
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 105e7859d1f2..e6c0aaf4044e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -278,7 +278,8 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
if (old)
call_rcu(&old->rcu, kvm_apic_map_free);

- kvm_make_scan_ioapic_request(kvm);
+ if (!kvm->arch.eoi_intercept_unsupported)
+ kvm_make_scan_ioapic_request(kvm);
}

static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4060f3d91f74..8d58141256c5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5454,6 +5454,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto create_irqchip_unlock;

r = -EINVAL;
+ if (kvm->arch.eoi_intercept_unsupported)
+ goto create_irqchip_unlock;
+
if (kvm->created_vcpus)
goto create_irqchip_unlock;

@@ -5484,6 +5487,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
goto create_pit;
case KVM_CREATE_PIT2:
+ r = -EINVAL;
+ if (kvm->arch.eoi_intercept_unsupported)
+ goto out;
r = -EFAULT;
if (copy_from_user(&u.pit_config, argp,
sizeof(struct kvm_pit_config)))
--
2.17.1