[PATCH] KVM: x86/mmu: Don't create SPTEs for addresses that aren't mappable
From: Sean Christopherson
Date: Wed Feb 18 2026 - 19:22:51 EST
Track the mask of guest physical address bits that can actually be mapped
by a given MMU instance that utilizes TDP, and either exit to userspace
with -EFAULT or go straight to emulation without creating an SPTE (for
emulated MMIO) if KVM can't map the address. Attempting to create an SPTE
can cause KVM to drop the unmappable bits, and thus install a bad SPTE.
E.g. when starting a walk, the TDP MMU will round the GFN based on the
root level, and drop the upper bits.
Exit with -EFAULT in the unlikely scenario userspace is misbehaving and
created a memslot that can't be addressed, e.g. if userspace installed
memory above the guest.MAXPHYADDR defined in CPUID, as there's nothing KVM
can do to make forward progress, and there _is_ a memslot for the address.
For emulated MMIO, KVM can at least kick the bad address out to userspace
via a normal MMIO exit.
The flaw has existed for a very long time, and was exposed by commit
988da7820206 ("KVM: x86/tdp_mmu: WARN if PFN changes for spurious faults")
thanks to a syzkaller program that prefaults memory at GPA 0x1000000000000
and then faults in memory at GPA 0x0 (the extra-large GPA gets wrapped to
'0').
WARNING: arch/x86/kvm/mmu/tdp_mmu.c:1183 at kvm_tdp_mmu_map+0x5c3/0xa30 [kvm], CPU#125: syz.5.22/18468
CPU: 125 UID: 0 PID: 18468 Comm: syz.5.22 Tainted: G S W 6.19.0-smp--23879af241d6-next #57 NONE
Tainted: [S]=CPU_OUT_OF_SPEC, [W]=WARN
Hardware name: Google Izumi-EMR/izumi, BIOS 0.20250917.0-0 09/17/2025
RIP: 0010:kvm_tdp_mmu_map+0x5c3/0xa30 [kvm]
Call Trace:
<TASK>
kvm_tdp_page_fault+0x107/0x140 [kvm]
kvm_mmu_do_page_fault+0x121/0x200 [kvm]
kvm_arch_vcpu_pre_fault_memory+0x18c/0x230 [kvm]
kvm_vcpu_pre_fault_memory+0x116/0x1e0 [kvm]
kvm_vcpu_ioctl+0x3a5/0x6b0 [kvm]
__se_sys_ioctl+0x6d/0xb0
do_syscall_64+0x8d/0x900
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
In practice, the flaw is benign (other than the new WARN) as it only
affects guests that ignore guest.MAXPHYADDR (e.g. on CPUs with 52-bit
physical addresses but only 4-level paging) or guests being run by a
misbehaving userspace VMM (e.g. a VMM that ignored allow_smaller_maxphyaddr
or is pre-faulting bad addresses).
For non-TDP shadow paging, always clear the unmappable mask as the flaw
only affects GPAs affected. For 32-bit paging, 64-bit virtual addresses
simply don't exist. Even when software can shove a 64-bit address
somewhere, e.g. into SYSENTER_EIP, the value is architecturally truncated
before it reaches the page table walker. And for 64-bit paging, KVM's use
of 4-level vs. 5-level paging is tied to the guest's CR4.LA57, i.e. KVM
won't observe a 57-bit virtual address with a 4-level MMU.
Cc: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx>
Cc: Yosry Ahmed <yosry.ahmed@xxxxxxxxx>
Cc: Yan Zhao <yan.y.zhao@xxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 6 +++++
arch/x86/kvm/mmu/mmu.c | 42 +++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ff07c45e3c73..43b9777b896d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -493,6 +493,12 @@ struct kvm_mmu {
*/
u8 permissions[16];
+ /*
+ * Mask of address bits that KVM can't map with this MMU given the root
+ * level, e.g. 5-level EPT/NPT only consume bits 51:0.
+ */
+ gpa_t unmappable_mask;
+
u64 *pae_root;
u64 *pml4_root;
u64 *pml5_root;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3911ac9bddfd..2dc9a297e6ed 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3540,6 +3540,14 @@ static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
if (unlikely(fault->gfn > kvm_mmu_max_gfn()))
return RET_PF_EMULATE;
+ /*
+ * Similarly, if KVM can't map the faulting address, don't attempt to
+ * install a SPTE because KVM will effectively truncate the address
+ * when walking KVM's page tables.
+ */
+ if (unlikely(fault->addr & vcpu->arch.mmu->unmappable_mask))
+ return RET_PF_EMULATE;
+
return RET_PF_CONTINUE;
}
@@ -4681,6 +4689,11 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
return RET_PF_RETRY;
}
+ if (fault->addr & vcpu->arch.mmu->unmappable_mask) {
+ kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
+ return -EFAULT;
+ }
+
if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) {
/*
* Don't map L1's APIC access page into L2, KVM doesn't support
@@ -5772,6 +5785,29 @@ u8 kvm_mmu_get_max_tdp_level(void)
return tdp_root_level ? tdp_root_level : max_tdp_level;
}
+static void reset_tdp_unmappable_mask(struct kvm_mmu *mmu)
+{
+ int max_addr_bit;
+
+ switch (mmu->root_role.level) {
+ case PT64_ROOT_5LEVEL:
+ max_addr_bit = 52;
+ break;
+ case PT64_ROOT_4LEVEL:
+ max_addr_bit = 48;
+ break;
+ case PT32E_ROOT_LEVEL:
+ max_addr_bit = 32;
+ break;
+ default:
+ WARN_ONCE(1, "Unhandled root level %u\n", mmu->root_role.level);
+ mmu->unmappable_mask = 0;
+ return;
+ }
+
+ mmu->unmappable_mask = rsvd_bits(max_addr_bit, 63);
+}
+
static union kvm_mmu_page_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
union kvm_cpu_role cpu_role)
@@ -5816,6 +5852,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
else
context->gva_to_gpa = paging32_gva_to_gpa;
+ reset_tdp_unmappable_mask(context);
reset_guest_paging_metadata(vcpu, context);
reset_tdp_shadow_zero_bits_mask(context);
}
@@ -5889,6 +5926,8 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
root_role.passthrough = 1;
shadow_mmu_init_context(vcpu, context, cpu_role, root_role);
+ reset_tdp_unmappable_mask(context);
+
kvm_mmu_new_pgd(vcpu, nested_cr3);
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_init_shadow_npt_mmu);
@@ -5939,6 +5978,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
update_permission_bitmask(context, true);
context->pkru_mask = 0;
+ reset_tdp_unmappable_mask(context);
reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level);
reset_ept_shadow_zero_bits_mask(context, execonly);
}
@@ -5954,6 +5994,8 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu,
kvm_init_shadow_mmu(vcpu, cpu_role);
+ context->unmappable_mask = 0;
+
context->get_guest_pgd = get_guest_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
base-commit: 183bb0ce8c77b0fd1fb25874112bc8751a461e49
--
2.53.0.345.g96ddfc5eaa-goog