Re: [PATCH] KVM: VMX: Raise KVM_REQ_EVENT on TPR below threshold exit

From: Sean Christopherson

Date: Wed Jun 10 2026 - 15:12:20 EST


On Wed, Jun 10, 2026, Carlos López wrote:
> The TPR_THRESHOLD field in the VMCS is used by VMX to induce VM exits
> when the guest's virtual TPR falls under the specified threshold,
> allowing KVM to inject previously masked interrupts.
>
> KVM handles these VM exits in handle_tpr_below_threshold().
> Commit eb90f3417a0c ("KVM: vmx: speed up TPR below threshold vmexits")
> optimized this function by calling apic_update_ppr() instead of raising
> KVM_REQ_EVENT. apic_update_ppr() then raises KVM_REQ_EVENT if there is
> a pending, deliverable interrupt.
>
> However, if there are no new interrupts pending, apic_update_ppr()
> does not issue the request. This skips calling update_cr8_intercept(),
> and thus vmx_update_cr8_intercept() before VM entry, which results in
> a high, stale TPR_THRESHOLD. This is problematic due to the following
> sentence in 28.2.1.1 "VM-Execution Control Fields" in the SDM:
>
> The following check is performed if the “use TPR shadow” VM-execution
> control is 1 and the “virtualize APIC accesses” and “virtual-interrupt
> delivery” VM-execution controls are both 0: the value of bits 3:0 of
> the TPR threshold VM-execution control field should not be greater
> than the value of bits 7:4 of VTPR.
>
> This error condition is typically not observed when KVM runs on a bare
> metal system because modern processors support APICv, which enables
> virtual-interrupt delivery, and which KVM uses when possible. This
> causes the processor to no longer generate TPR-below threshold exits
> and to no longer check TPR_THRESHOLD on entry. However, when running
> on older platforms, or under nested virtualization on a hypervisor that
> does not support virtual-interrupt delivery and enforces this check
> (like Hyper-V) this can cause a VM entry failure with hardware error
> 0x7, as seen in [1].
>
> Fix this by re-introducing an unconditional KVM_REQ_EVENT when reacting
> to a TPR-below-threshold exit, ensuring that vmx_update_cr8_intercept()
> is called to re-evaluate TPR_THRESHOLD before entering the guest.
>
> Link: https://github.com/coconut-svsm/svsm/issues/1081 [1]
> Tested-by: Stefano Garzarella <sgarzare@xxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Fixes: eb90f3417a0c ("KVM: vmx: speed up TPR below threshold vmexits")
> Signed-off-by: Carlos López <clopez@xxxxxxx>
> ---
> arch/x86/kvm/vmx/vmx.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index c548f22375ad..21a469d3ba21 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5824,6 +5824,7 @@ void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
> static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
> {
> kvm_apic_update_ppr(vcpu);
> + kvm_make_request(KVM_REQ_EVENT, vcpu);
> return 1;
> }

Don't all the other flows that update PPR have the same bug, at least in theory?
Forcing KVM_REQ_EVENT is a bit of a hack, it seems like we should instead be able
to do something like this (probably not this aggressively for stable@):

---
arch/x86/kvm/lapic.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 37 ++-----------------------------------
2 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4078e624ca66..1b66c878bb67 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -939,6 +939,32 @@ static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
return val;
}

+static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+{
+ int max_irr, tpr;
+
+ if (!kvm_x86_ops.update_cr8_intercept)
+ return;
+
+ if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)))
+ return;
+
+ if (vcpu->arch.apic->apicv_active)
+ return;
+
+ if (!vcpu->arch.apic->vapic_addr)
+ max_irr = kvm_lapic_find_highest_irr(vcpu);
+ else
+ max_irr = -1;
+
+ if (max_irr != -1)
+ max_irr >>= 4;
+
+ tpr = kvm_lapic_get_cr8(vcpu);
+
+ kvm_x86_call(update_cr8_intercept)(vcpu, tpr, max_irr);
+}
+
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
@@ -980,6 +1006,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
if (__apic_update_ppr(apic, &ppr) &&
apic_has_interrupt_for_ppr(apic, ppr) != -1)
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ else
+ update_cr8_intercept(apic->vcpu);
}

void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
@@ -3290,6 +3318,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_apic_update_apicv(vcpu);
if (apic->apicv_active)
kvm_x86_call(apicv_post_state_restore)(vcpu);
+
kvm_make_request(KVM_REQ_EVENT, vcpu);

#ifdef CONFIG_KVM_IOAPIC
@@ -3394,6 +3423,8 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
int max_irr, max_isr;
struct kvm_lapic *apic = vcpu->arch.apic;

+ update_cr8_intercept(vcpu);
+
apic_sync_pv_eoi_to_guest(vcpu, apic);

if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0550359ed798..116ce6209c67 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -128,7 +128,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST | \
KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST)

-static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
@@ -5340,7 +5339,6 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
r = kvm_apic_set_state(vcpu, s);
if (r)
return r;
- update_cr8_intercept(vcpu);

return 0;
}
@@ -10595,33 +10593,6 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->flags |= KVM_RUN_X86_GUEST_MODE;
}

-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
-{
- int max_irr, tpr;
-
- if (!kvm_x86_ops.update_cr8_intercept)
- return;
-
- if (!lapic_in_kernel(vcpu))
- return;
-
- if (vcpu->arch.apic->apicv_active)
- return;
-
- if (!vcpu->arch.apic->vapic_addr)
- max_irr = kvm_lapic_find_highest_irr(vcpu);
- else
- max_irr = -1;
-
- if (max_irr != -1)
- max_irr >>= 4;
-
- tpr = kvm_lapic_get_cr8(vcpu);
-
- kvm_x86_call(update_cr8_intercept)(vcpu, tpr, max_irr);
-}
-
-
int kvm_check_nested_events(struct kvm_vcpu *vcpu)
{
if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
@@ -11361,10 +11332,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_int_win)
kvm_x86_call(enable_irq_window)(vcpu);

- if (kvm_lapic_enabled(vcpu)) {
- update_cr8_intercept(vcpu);
+ if (kvm_lapic_enabled(vcpu))
kvm_lapic_sync_to_vapic(vcpu);
- }
}

r = kvm_mmu_reload(vcpu);
@@ -12481,8 +12450,6 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
kvm_x86_call(post_set_cr3)(vcpu, sregs->cr3);

- kvm_set_cr8(vcpu, sregs->cr8);
-
*mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
kvm_x86_call(set_efer)(vcpu, sregs->efer);

@@ -12511,7 +12478,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);

- update_cr8_intercept(vcpu);
+ kvm_set_cr8(vcpu, sregs->cr8);

/* Older userspace won't unhalt the vcpu on reset. */
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&

base-commit: fd408f8da71c91d589cf05674c2e114fc2267b31
--