[RFC][PATCH 04/34] x86/mm: Introduce physical address limit helper

From: Dave Hansen
Date: Thu Feb 22 2024 - 13:43:46 EST



From: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>

Modern processors enumerate the sizes of the physical and virtual address
spaces that they can handle. The kernel stashes this information for each
CPU in 'cpuinfo_x86'.

Like a lot of system-wide configuration, in practice the kernel only uses
this information from the boot CPU. That makes a lot of sense because
the kernel can't practically support two CPUs with such different
fundamental support as the size of the address spaces.

Having a per-cpu copy of this data is silly. It is, at best, a waste
of space to keep it around for the non-boot CPUs. At worst, it is yet
another bit of data that must be initialized in a particular order and
can be a source of bugs.

Introduce a helper to look up the number of supported physical address bits:

x86_phys_bits()

Replace most open-coded references to boot_cpu_data.x86_phys_bits.

This form is more compact and also opens up the door to adding some
centralized checking and enforcing rules around this important system-
wide value.

Signed-off-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: "Rafael J. Wysocki" <rafael@xxxxxxxxxx>
Cc: Len Brown <lenb@xxxxxxxxxx>
---

b/arch/x86/include/asm/kmsan.h | 2 +-
b/arch/x86/include/asm/mce.h | 2 +-
b/arch/x86/include/asm/processor.h | 5 +++++
b/arch/x86/kernel/cpu/mtrr/cleanup.c | 2 +-
b/arch/x86/kernel/cpu/mtrr/generic.c | 2 +-
b/arch/x86/kernel/cpu/mtrr/mtrr.c | 5 ++---
b/arch/x86/kernel/setup.c | 2 +-
b/arch/x86/kvm/cpuid.c | 2 +-
b/arch/x86/kvm/mmu.h | 10 +++++-----
b/arch/x86/kvm/mmu/spte.c | 2 +-
b/arch/x86/kvm/svm/svm.c | 2 +-
b/arch/x86/kvm/vmx/vmx.c | 10 +++++-----
b/arch/x86/kvm/vmx/vmx.h | 2 +-
b/arch/x86/mm/physaddr.h | 2 +-
b/drivers/acpi/acpi_fpdt.c | 2 +-
15 files changed, 28 insertions(+), 24 deletions(-)

diff -puN arch/x86/include/asm/kmsan.h~introduce-x86_phys_bits arch/x86/include/asm/kmsan.h
--- a/arch/x86/include/asm/kmsan.h~introduce-x86_phys_bits 2024-02-22 10:08:49.828507052 -0800
+++ b/arch/x86/include/asm/kmsan.h 2024-02-22 10:08:49.852507994 -0800
@@ -52,7 +52,7 @@ static inline void *arch_kmsan_get_meta_
static inline bool kmsan_phys_addr_valid(unsigned long addr)
{
if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
- return !(addr >> boot_cpu_data.x86_phys_bits);
+ return !(addr >> x86_phys_bits());
else
return true;
}
diff -puN arch/x86/include/asm/mce.h~introduce-x86_phys_bits arch/x86/include/asm/mce.h
--- a/arch/x86/include/asm/mce.h~introduce-x86_phys_bits 2024-02-22 10:08:49.828507052 -0800
+++ b/arch/x86/include/asm/mce.h 2024-02-22 10:08:49.852507994 -0800
@@ -89,7 +89,7 @@
#define MCI_MISC_ADDR_GENERIC 7 /* generic */

/* MCi_ADDR register defines */
-#define MCI_ADDR_PHYSADDR GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0)
+#define MCI_ADDR_PHYSADDR GENMASK_ULL(x86_phys_bits() - 1, 0)

/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN BIT_ULL(30)
diff -puN arch/x86/include/asm/processor.h~introduce-x86_phys_bits arch/x86/include/asm/processor.h
--- a/arch/x86/include/asm/processor.h~introduce-x86_phys_bits 2024-02-22 10:08:49.828507052 -0800
+++ b/arch/x86/include/asm/processor.h 2024-02-22 10:08:49.852507994 -0800
@@ -767,4 +767,9 @@ static inline void weak_wrmsr_fence(void
alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
}

+static inline u8 x86_phys_bits(void)
+{
+ return boot_cpu_data.x86_phys_bits;
+}
+
#endif /* _ASM_X86_PROCESSOR_H */
diff -puN arch/x86/kernel/cpu/mtrr/cleanup.c~introduce-x86_phys_bits arch/x86/kernel/cpu/mtrr/cleanup.c
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c~introduce-x86_phys_bits 2024-02-22 10:08:49.832507209 -0800
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c 2024-02-22 10:08:49.852507994 -0800
@@ -170,7 +170,7 @@ set_var_mtrr(unsigned int reg, unsigned
return;
}

- mask = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
+ mask = (1ULL << x86_phys_bits()) - 1;
mask &= ~((((u64)sizek) << 10) - 1);

base = ((u64)basek) << 10;
diff -puN arch/x86/kernel/cpu/mtrr/generic.c~introduce-x86_phys_bits arch/x86/kernel/cpu/mtrr/generic.c
--- a/arch/x86/kernel/cpu/mtrr/generic.c~introduce-x86_phys_bits 2024-02-22 10:08:49.832507209 -0800
+++ b/arch/x86/kernel/cpu/mtrr/generic.c 2024-02-22 10:08:49.852507994 -0800
@@ -660,7 +660,7 @@ static void __init print_mtrr_state(void
}
pr_info("MTRR variable ranges %sabled:\n",
mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
- high_width = (boot_cpu_data.x86_phys_bits - (32 - PAGE_SHIFT) + 3) / 4;
+ high_width = (x86_phys_bits() - (32 - PAGE_SHIFT) + 3) / 4;

for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & MTRR_PHYSMASK_V)
diff -puN arch/x86/kernel/cpu/mtrr/mtrr.c~introduce-x86_phys_bits arch/x86/kernel/cpu/mtrr/mtrr.c
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c~introduce-x86_phys_bits 2024-02-22 10:08:49.836507366 -0800
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c 2024-02-22 10:08:49.852507994 -0800
@@ -252,8 +252,7 @@ int mtrr_add_page(unsigned long base, un
return -EINVAL;
}

- if ((base | (base + size - 1)) >>
- (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
+ if ((base | (base + size - 1)) >> (x86_phys_bits() - PAGE_SHIFT)) {
pr_warn("base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -556,7 +555,7 @@ void __init mtrr_bp_init(void)
const char *why = "(not available)";
unsigned long config, dummy;

- phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);
+ phys_hi_rsvd = GENMASK(31, x86_phys_bits() - 32);

if (!generic_mtrrs && mtrr_state.enabled) {
/*
diff -puN arch/x86/kernel/setup.c~introduce-x86_phys_bits arch/x86/kernel/setup.c
--- a/arch/x86/kernel/setup.c~introduce-x86_phys_bits 2024-02-22 10:08:49.836507366 -0800
+++ b/arch/x86/kernel/setup.c 2024-02-22 10:08:49.852507994 -0800
@@ -813,7 +813,7 @@ void __init setup_arch(char **cmdline_p)
*/
early_reserve_memory();

- iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
+ iomem_resource.end = (1ULL << x86_phys_bits()) - 1;
e820__memory_setup();
parse_setup_data();

diff -puN arch/x86/kvm/cpuid.c~introduce-x86_phys_bits arch/x86/kvm/cpuid.c
--- a/arch/x86/kvm/cpuid.c~introduce-x86_phys_bits 2024-02-22 10:08:49.836507366 -0800
+++ b/arch/x86/kvm/cpuid.c 2024-02-22 10:08:49.852507994 -0800
@@ -1236,7 +1236,7 @@ static inline int __do_cpuid_func(struct
* the HPAs do not affect GPAs.
*/
if (!tdp_enabled)
- g_phys_as = boot_cpu_data.x86_phys_bits;
+ g_phys_as = x86_phys_bits();
else if (!g_phys_as)
g_phys_as = phys_as;

diff -puN arch/x86/kvm/mmu.h~introduce-x86_phys_bits arch/x86/kvm/mmu.h
--- a/arch/x86/kvm/mmu.h~introduce-x86_phys_bits 2024-02-22 10:08:49.840507523 -0800
+++ b/arch/x86/kvm/mmu.h 2024-02-22 10:08:49.852507994 -0800
@@ -84,10 +84,10 @@ static inline gfn_t kvm_mmu_max_gfn(void
static inline u8 kvm_get_shadow_phys_bits(void)
{
/*
- * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
- * in CPU detection code, but the processor treats those reduced bits as
- * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
- * the physical address bits reported by CPUID.
+ * x86_phys_bits() is reduced when MKTME or SME are detected in CPU
+ * detection code, but the processor treats those reduced bits as 'keyID'
+ * thus they are not reserved bits. Therefore KVM needs to look at the
+ * physical address bits reported by CPUID.
*/
if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
return cpuid_eax(0x80000008) & 0xff;
@@ -97,7 +97,7 @@ static inline u8 kvm_get_shadow_phys_bit
* custom CPUID. Proceed with whatever the kernel found since these features
* aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
*/
- return boot_cpu_data.x86_phys_bits;
+ return x86_phys_bits();
}

void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
diff -puN arch/x86/kvm/mmu/spte.c~introduce-x86_phys_bits arch/x86/kvm/mmu/spte.c
--- a/arch/x86/kvm/mmu/spte.c~introduce-x86_phys_bits 2024-02-22 10:08:49.840507523 -0800
+++ b/arch/x86/kvm/mmu/spte.c 2024-02-22 10:08:49.852507994 -0800
@@ -468,7 +468,7 @@ void kvm_mmu_reset_all_pte_masks(void)
* the most significant bits of legal physical address space.
*/
shadow_nonpresent_or_rsvd_mask = 0;
- low_phys_bits = boot_cpu_data.x86_phys_bits;
+ low_phys_bits = x86_phys_bits();
if (boot_cpu_has_bug(X86_BUG_L1TF) &&
!WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >=
52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) {
diff -puN arch/x86/kvm/svm/svm.c~introduce-x86_phys_bits arch/x86/kvm/svm/svm.c
--- a/arch/x86/kvm/svm/svm.c~introduce-x86_phys_bits 2024-02-22 10:08:49.840507523 -0800
+++ b/arch/x86/kvm/svm/svm.c 2024-02-22 10:08:49.852507994 -0800
@@ -5054,7 +5054,7 @@ static __init void svm_adjust_mmio_mask(
return;

enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
- mask_bit = boot_cpu_data.x86_phys_bits;
+ mask_bit = x86_phys_bits();

/* Increment the mask bit if it is the same as the encryption bit */
if (enc_bit == mask_bit)
diff -puN arch/x86/kvm/vmx/vmx.c~introduce-x86_phys_bits arch/x86/kvm/vmx/vmx.c
--- a/arch/x86/kvm/vmx/vmx.c~introduce-x86_phys_bits 2024-02-22 10:08:49.844507680 -0800
+++ b/arch/x86/kvm/vmx/vmx.c 2024-02-22 10:08:49.852507994 -0800
@@ -8444,14 +8444,14 @@ static void __init vmx_setup_me_spte_mas
* kvm_get_shadow_phys_bits() returns shadow_phys_bits. Use
* the former to avoid exposing shadow_phys_bits.
*
- * On pre-MKTME system, boot_cpu_data.x86_phys_bits equals to
+ * On pre-MKTME system, x86_phys_bits() equals to
* shadow_phys_bits. On MKTME and/or TDX capable systems,
- * boot_cpu_data.x86_phys_bits holds the actual physical address
- * w/o the KeyID bits, and shadow_phys_bits equals to MAXPHYADDR
+ * x86_phys_bits() holds the actual physical address w/o the
+ * KeyID bits, and shadow_phys_bits equals to MAXPHYADDR
* reported by CPUID. Those bits between are KeyID bits.
*/
- if (boot_cpu_data.x86_phys_bits != kvm_get_shadow_phys_bits())
- me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits,
+ if (x86_phys_bits() != kvm_get_shadow_phys_bits())
+ me_mask = rsvd_bits(x86_phys_bits(),
kvm_get_shadow_phys_bits() - 1);
/*
* Unlike SME, host kernel doesn't support setting up any
diff -puN arch/x86/kvm/vmx/vmx.h~introduce-x86_phys_bits arch/x86/kvm/vmx/vmx.h
--- a/arch/x86/kvm/vmx/vmx.h~introduce-x86_phys_bits 2024-02-22 10:08:49.844507680 -0800
+++ b/arch/x86/kvm/vmx/vmx.h 2024-02-22 10:08:49.852507994 -0800
@@ -721,7 +721,7 @@ static inline bool vmx_need_pf_intercept
if (!enable_ept)
return true;

- return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+ return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < x86_phys_bits();
}

static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
diff -puN arch/x86/mm/physaddr.h~introduce-x86_phys_bits arch/x86/mm/physaddr.h
--- a/arch/x86/mm/physaddr.h~introduce-x86_phys_bits 2024-02-22 10:08:49.848507837 -0800
+++ b/arch/x86/mm/physaddr.h 2024-02-22 10:08:49.852507994 -0800
@@ -4,7 +4,7 @@
static inline int phys_addr_valid(resource_size_t addr)
{
#ifdef CONFIG_PHYS_ADDR_T_64BIT
- return !(addr >> boot_cpu_data.x86_phys_bits);
+ return !(addr >> x86_phys_bits());
#else
return 1;
#endif
diff -puN drivers/acpi/acpi_fpdt.c~introduce-x86_phys_bits drivers/acpi/acpi_fpdt.c
--- a/drivers/acpi/acpi_fpdt.c~introduce-x86_phys_bits 2024-02-22 10:08:49.848507837 -0800
+++ b/drivers/acpi/acpi_fpdt.c 2024-02-22 10:08:49.852507994 -0800
@@ -151,7 +151,7 @@ static bool fpdt_address_valid(u64 addre
* On some systems the table contains invalid addresses
* with unsuppored high address bits set, check for this.
*/
- return !(address >> boot_cpu_data.x86_phys_bits);
+ return !(address >> x86_phys_bits());
}
#else
static bool fpdt_address_valid(u64 address)
_