Re: [PATCH] KVM: x86: Add a module param to control and enumerate device posted IRQs

From: Yosry Ahmed
Date: Mon Mar 17 2025 - 14:54:50 EST


On Fri, Mar 14, 2025 at 07:56:15PM -0700, Sean Christopherson wrote:
> Add a module param to allow disabling device posted interrupts without
> having to sacrifice all of APICv/AVIC, and to also effectively enumerate
> to userspace whether or not KVM may be utilizing device posted IRQs.
> Disabling device posted interrupts is very desirable for testing, and can
> even be desirable for production environments, e.g. if the host kernel
> wants to interpose on device interrupts.
>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm/avic.c | 3 +--
> arch/x86/kvm/vmx/posted_intr.c | 7 +++----
> arch/x86/kvm/x86.c | 9 ++++++++-
> 4 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d881e7d276b1..bf11c5ee50cb 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1922,6 +1922,7 @@ struct kvm_arch_async_pf {
> extern u32 __read_mostly kvm_nr_uret_msrs;
> extern bool __read_mostly allow_smaller_maxphyaddr;
> extern bool __read_mostly enable_apicv;
> +extern bool __read_mostly enable_device_posted_irqs;
> extern struct kvm_x86_ops kvm_x86_ops;
>
> #define kvm_x86_call(func) static_call(kvm_x86_##func)
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 65fd245a9953..e0f519565393 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -898,8 +898,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> struct kvm_irq_routing_table *irq_rt;
> int idx, ret = 0;
>
> - if (!kvm_arch_has_assigned_device(kvm) ||
> - !irq_remapping_cap(IRQ_POSTING_CAP))
> + if (!kvm_arch_has_assigned_device(kvm) || !enable_device_posted_irqs)

This function will now also be skipped if enable_apicv is false. Is this
always the case here for some reason? Sorry if I missed something
obvious.

> return 0;
>
> pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index ec08fa3caf43..a03988a138c5 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -134,9 +134,8 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>
> static bool vmx_can_use_vtd_pi(struct kvm *kvm)
> {
> - return irqchip_in_kernel(kvm) && enable_apicv &&
> - kvm_arch_has_assigned_device(kvm) &&
> - irq_remapping_cap(IRQ_POSTING_CAP);
> + return irqchip_in_kernel(kvm) && enable_device_posted_irqs &&
> + kvm_arch_has_assigned_device(kvm);
> }
>
> /*
> @@ -254,7 +253,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
> */
> void vmx_pi_start_assignment(struct kvm *kvm)
> {
> - if (!irq_remapping_cap(IRQ_POSTING_CAP))
> + if (!enable_device_posted_irqs)

Ditto here.

> return;
>
> kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 69c20a68a3f0..1b14319975b7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -227,6 +227,10 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
> bool __read_mostly enable_apicv = true;
> EXPORT_SYMBOL_GPL(enable_apicv);
>
> +bool __read_mostly enable_device_posted_irqs = true;
> +module_param(enable_device_posted_irqs, bool, 0444);
> +EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
> +
> const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
> KVM_GENERIC_VM_STATS(),
> STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
> @@ -9772,6 +9776,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> if (r != 0)
> goto out_mmu_exit;
>
> + enable_device_posted_irqs = enable_device_posted_irqs && enable_apicv &&
> + irq_remapping_cap(IRQ_POSTING_CAP);

Maybe this is clearer:

enable_device_posted_irqs &= enable_avivc && irq_remapping_cap(IRQ_POSTING_CAP);

> +
> kvm_ops_update(ops);
>
> for_each_online_cpu(cpu) {
> @@ -13552,7 +13559,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
>
> bool kvm_arch_has_irq_bypass(void)
> {
> - return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
> + return enable_device_posted_irqs;
> }
>
> int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>
> base-commit: c9ea48bb6ee6b28bbc956c1e8af98044618fed5e
> --
> 2.49.0.rc1.451.g8f38331e32-goog
>
>