Re: [PART1 RFC v3 07/12] svm: Add interrupt injection via AVIC

From: Paolo Bonzini
Date: Fri Mar 18 2016 - 06:23:09 EST




On 18/03/2016 07:09, Suravee Suthikulpanit wrote:
> This patch introduces a new mechanism to inject interrupt using AVIC.
> Since VINTR is not supported when enable AVIC, we need to inject
> interrupt via APIC backing page instead.
>
> This patch also adds support for AVIC doorbell, which is used by
> KVM to signal a running vcpu to check IRR for injected interrupts.
>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>

Looks good, but I think it breaks nested virtualization. See below.

> ---
> arch/x86/kvm/svm.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 621c948..8e31ad3 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -71,6 +71,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
> #define SVM_FEATURE_DECODE_ASSIST (1 << 7)
> #define SVM_FEATURE_PAUSE_FILTER (1 << 10)
>
> +#define SVM_AVIC_DOORBELL 0xc001011b
> +
> #define NESTED_EXIT_HOST 0 /* Exit handled on host level */
> #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
> #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
> @@ -217,6 +219,8 @@ static bool npt_enabled = true;
> static bool npt_enabled;
> #endif
>
> +static struct kvm_x86_ops svm_x86_ops;
> +
> /* allow nested paging (virtualized MMU) for all guests */
> static int npt = true;
> module_param(npt, int, S_IRUGO);
> @@ -291,6 +295,18 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
> return container_of(vcpu, struct vcpu_svm, vcpu);
> }
>
> +static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_svm *svm = to_svm(vcpu);
> + u64 *entry;
> +
> + entry = svm->avic_phy_apic_id_cache;
> + if (!entry)
> + return false;
> +
> + return (READ_ONCE(*entry) & AVIC_PHY_APIC_ID__IS_RUN_MSK);

AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK

> +}
> +
> static void recalc_intercepts(struct vcpu_svm *svm)
> {
> struct vmcb_control_area *c, *h;
> @@ -964,6 +980,8 @@ static __init int svm_hardware_setup(void)
>
> if (avic) {
> printk(KERN_INFO "kvm: AVIC enabled\n");
> + } else {
> + svm_x86_ops.deliver_posted_interrupt = NULL;
> }
>
> return 0;
> @@ -2877,8 +2895,10 @@ static int clgi_interception(struct vcpu_svm *svm)
> disable_gif(svm);
>
> /* After a CLGI no interrupts should come */
> - svm_clear_vintr(svm);
> - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> + if (!svm_vcpu_avic_enabled(svm)) {
> + svm_clear_vintr(svm);
> + svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> + }

This is for nested virtualization. Unless you support nested AVIC, the
L2 guest should run without AVIC (i.e. IsRunning should be false) and
use the old VINTR mechanism.

> mark_dirty(svm->vmcb, VMCB_INTR);
>
> @@ -3453,6 +3473,9 @@ static int msr_interception(struct vcpu_svm *svm)
>
> static int interrupt_window_interception(struct vcpu_svm *svm)
> {
> + if (svm_vcpu_avic_enabled(svm))
> + BUG();

Please WARN and return, but I suspect this may also need some changes
for nested virtualization.

> kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
> svm_clear_vintr(svm);
> svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> @@ -3767,6 +3790,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
> {
> struct vmcb_control_area *control;
>
> + /* The following fields are ignored when AVIC is enabled */
> control = &svm->vmcb->control;
> control->int_vector = irq;
> control->int_ctl &= ~V_INTR_PRIO_MASK;
> @@ -3844,6 +3868,20 @@ static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> return;
> }
>
> +static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
> +{
> + struct vcpu_svm *svm = to_svm(vcpu);
> +
> + kvm_lapic_set_vector(vec, svm->vcpu.arch.apic->regs + APIC_IRR);
> + smp_mb__after_atomic();
> +
> + if (avic_vcpu_is_running(vcpu))
> + wrmsrl(SVM_AVIC_DOORBELL,
> + __default_cpu_present_to_apicid(vcpu->cpu));
> + else
> + kvm_vcpu_wake_up(vcpu);
> +}
> +
> static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> @@ -3904,6 +3942,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
> * get that intercept, this function will be called again though and
> * we'll get the vintr intercept.
> */
> + if (svm_vcpu_avic_enabled(svm))
> + return;

Same here.

> if (gif_set(svm) && nested_svm_intr(svm)) {
> svm_set_vintr(svm);
> svm_inject_irq(svm, 0x0);
> @@ -4638,6 +4679,7 @@ static struct kvm_x86_ops svm_x86_ops = {
> .sched_in = svm_sched_in,
>
> .pmu_ops = &amd_pmu_ops,
> + .deliver_posted_interrupt = svm_deliver_avic_intr,
> };
>
> static int __init svm_init(void)
>