[PATCH v11 10/16] KVM: x86: Virtualize LAM for supervisor pointer

From: Binbin Wu
Date: Wed Sep 13 2023 - 11:40:51 EST


From: Robert Hoo <robert.hu@xxxxxxxxxxxxxxx>

Add support to allow guests to set the new CR4 control bit for LAM and add
implementation to get untagged address for supervisor pointers.

LAM modifies the canonicality check applied to 64-bit linear addresses for
data accesses, allowing software to use of the untranslated address bits for
metadata and masks the metadata bits before using them as linear addresses
to access memory. LAM uses CR4.LAM_SUP (bit 28) to configure and enable LAM
for supervisor pointers. It also changes VMENTER to allow the bit to be set
in VMCS's HOST_CR4 and GUEST_CR4 to support virtualization. Note CR4.LAM_SUP
is allowed to be set even not in 64-bit mode, but it will not take effect
since LAM only applies to 64-bit linear addresses.

Move CR4.LAM_SUP out of CR4_RESERVED_BITS, its reservation depends on vcpu
supporting LAM or not. Leave it intercepted to prevent guest from setting
the bit if LAM is not exposed to guest as well as to avoid vmread every time
when KVM fetches its value, with the expectation that guest won't toggle the
bit frequently.

Set CR4.LAM_SUP bit in the emulated IA32_VMX_CR4_FIXED1 MSR for guests to
allow guests to enable LAM for supervisor pointers in nested VMX operation.

Hardware is not required to do TLB flush when CR4.LAM_SUP toggled, KVM doesn't
need to emulate TLB flush based on it.
There's no other features/vmx_exec_controls connection, no other code needed
in {kvm,vmx}_set_cr4().

Skip address untag for instruction fetch, branch target and operand of INVLPG,
which LAM doesn't apply to. Skip address untag for implicit system accesses
since LAM doesn't apply to the loading of base addresses of memory management
registers and segment registers, their values still need to be canonical (for
now, get_untagged_addr() interface is not called for implicit system accesses,
just for future proof).

Signed-off-by: Robert Hoo <robert.hu@xxxxxxxxxxxxxxx>
Co-developed-by: Binbin Wu <binbin.wu@xxxxxxxxxxxxxxx>
Signed-off-by: Binbin Wu <binbin.wu@xxxxxxxxxxxxxxx>
Reviewed-by: Chao Gao <chao.gao@xxxxxxxxx>
Reviewed-by: Kai Huang <kai.huang@xxxxxxxxx>
Tested-by: Xuelian Guo <xuelian.guo@xxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 3 ++-
arch/x86/kvm/vmx/vmx.c | 40 ++++++++++++++++++++++++++++++++-
arch/x86/kvm/x86.h | 2 ++
3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 08e94f30d376..d4e3657b840a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -125,7 +125,8 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+ | X86_CR4_LAM_SUP))

#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b572cfe27342..ee35a91aa584 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7677,6 +7677,9 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP));
cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57));

+ entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1);
+ cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM));
+
#undef cr4_fixed1_update
}

@@ -8209,9 +8212,44 @@ static void vmx_vm_destroy(struct kvm *kvm)
free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
}

+/*
+ * Note, the SDM states that the linear address is masked *after* the modified
+ * canonicality check, whereas KVM masks (untags) the address and then performs
+ * a "normal" canonicality check. Functionally, the two methods are identical,
+ * and when the masking occurs relative to the canonicality check isn't visible
+ * to software, i.e. KVM's behavior doesn't violate the SDM.
+ */
gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags)
{
- return gva;
+ int lam_bit;
+
+ if (flags & (X86EMUL_F_FETCH | X86EMUL_F_BRANCH | X86EMUL_F_IMPLICIT |
+ X86EMUL_F_INVLPG))
+ return gva;
+
+ if (!is_64_bit_mode(vcpu))
+ return gva;
+
+ /*
+ * Bit 63 determines if the address should be treated as user address
+ * or a supervisor address.
+ */
+ if (!(gva & BIT_ULL(63))) {
+ /* KVM doesn't yet virtualize LAM_U{48,57}. */
+ return gva;
+ } else {
+ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_LAM_SUP))
+ return gva;
+
+ lam_bit = kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 56 : 47;
+ }
+
+ /*
+ * Untag the address by sign-extending the lam_bit, but NOT to bit 63.
+ * Bit 63 is retained from the raw virtual address so that untagging
+ * doesn't change a user access to a supervisor access, and vice versa.
+ */
+ return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63));
}

static struct kvm_x86_ops vmx_x86_ops __initdata = {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 1e7be1f6ab29..53e883721e71 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -529,6 +529,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
__reserved_bits |= X86_CR4_VMXE; \
if (!__cpu_has(__c, X86_FEATURE_PCID)) \
__reserved_bits |= X86_CR4_PCIDE; \
+ if (!__cpu_has(__c, X86_FEATURE_LAM)) \
+ __reserved_bits |= X86_CR4_LAM_SUP; \
__reserved_bits; \
})

--
2.25.1