[PATCH 1/2] KVM: nVMX: Move vTPR vs. TPR Threshold consistency check into "normal" checks
From: Sean Christopherson
Date: Wed Jun 03 2026 - 18:37:16 EST
Move the off-by-default consistency check for vmcs12.tpr_threshold vs.
the virtual APIC vTPR into the "normal" controls checks, as waiting until
KVM has loaded some amount of state is unnecessary and actively dangerous.
Specifically, failure to unwind vmcs01.GUEST_CR3 to KVM's value when EPT
is disabled results in KVM running L1 with an L1-controlled CR3, not with
KVM's CR3!
Alternatively, KVM could simply reset the MMU to force a reload of
vmcs01.GUEST_CR3, but the _only_ reason the check was shoved into a "late"
flow was to wait until the vmcs12 pages were retrieved. Rather than build
up more crusty code, simply access vTPR using a regular guest memory access
(performance isn't a concern). To circumvent the restrictions that led to
KVM deferring nested_get_vmcs12_pages(), (a) use a VM-scoped API to read
guest memory so that it always hits non-SMM memslots (for RSM), and (b)
skip the check (since its off-by-default anyways) when the vCPU doesn't
want to run, i.e. when userspace is restoring/stuffing state.
Fixes: 1100e4910ad2 ("KVM: nVMX: Add an off-by-default module param to WARN on missed consistency checks")
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/vmx/nested.c | 65 +++++++++++++++++----------------------
1 file changed, 28 insertions(+), 37 deletions(-)
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b2c851cc7d5c..039e234e7d2b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -582,6 +582,8 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
+ u32 vtpr;
+
if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
return 0;
@@ -591,6 +593,32 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
if (CC(!nested_cpu_has_vid(vmcs12) && vmcs12->tpr_threshold >> 4))
return -EINVAL;
+ /*
+ * Do the illegal vTPR vs. TPR Threshold consistency check if and only
+ * if KVM is configured to WARN on missed consistency checks, otherwise
+ * it's a waste of time. KVM needs to rely on hardware to fully detect
+ * an illegal combination due to the vTPR being writable by L1 at all
+ * times (it's an in-memory value, not a VMCS field). I.e. even if the
+ * check passes now, it might fail at the actual VM-Enter.
+ *
+ * Keying off the module param also allows treating an invalid vAPIC
+ * page as a consistency check failure without increasing the risk of
+ * breaking a "real" VM.
+ *
+ * Note! Deliberately use the VM-scoped API when reading guest memory,
+ * to ensure the read doesn't hit SMRAM when restoring L2 state on RSM,
+ * and only perform the check when in KVM_RUN, to avoid a false failure
+ * if userspace hasn't yet configured memslots during state restore.
+ */
+ if (warn_on_missed_cc && vcpu->wants_to_run &&
+ nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW) &&
+ !nested_cpu_has_vid(vmcs12) &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
+ (CC(kvm_read_guest(vcpu->kvm, vmcs12->virtual_apic_page_addr + APIC_TASKPRI,
+ &vtpr, sizeof(vtpr))) ||
+ CC((vmcs12->tpr_threshold & GENMASK(3, 0)) > ((vtpr >> 4) & GENMASK(3, 0)))))
+ return -EINVAL;
+
return 0;
}
@@ -3115,38 +3143,6 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
return 0;
}
-static int nested_vmx_check_controls_late(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
-{
- void *vapic = to_vmx(vcpu)->nested.virtual_apic_map.hva;
- u32 vtpr = vapic ? (*(u32 *)(vapic + APIC_TASKPRI)) >> 4 : 0;
-
- /*
- * Don't bother with the consistency checks if KVM isn't configured to
- * WARN on missed consistency checks, as KVM needs to rely on hardware
- * to fully detect an illegal vTPR vs. TRP Threshold combination due to
- * the vTPR being writable by L1 at all times (it's an in-memory value,
- * not a VMCS field). I.e. even if the check passes now, it might fail
- * at the actual VM-Enter.
- *
- * Keying off the module param also allows treating an invalid vAPIC
- * mapping as a consistency check failure without increasing the risk
- * of breaking a "real" VM.
- */
- if (!warn_on_missed_cc)
- return 0;
-
- if ((exec_controls_get(to_vmx(vcpu)) & CPU_BASED_TPR_SHADOW) &&
- nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW) &&
- !nested_cpu_has_vid(vmcs12) &&
- !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
- (CC(!vapic) ||
- CC((vmcs12->tpr_threshold & GENMASK(3, 0)) > (vtpr & GENMASK(3, 0)))))
- return -EINVAL;
-
- return 0;
-}
-
static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -3696,11 +3692,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
}
- if (nested_vmx_check_controls_late(vcpu, vmcs12)) {
- vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- return NVMX_VMENTRY_VMFAIL;
- }
-
if (nested_vmx_check_guest_state(vcpu, vmcs12,
&entry_failure_code)) {
exit_reason.basic = EXIT_REASON_INVALID_STATE;
--
2.54.0.1032.g2f8565e1d1-goog