[RFC PATCH 57/73] x86/pvm: Make cpu entry area and vmalloc area variable

From: Lai Jiangshan
Date: Mon Feb 26 2024 - 10:17:15 EST


From: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>

For the PVM guest, the entire kernel layout should be within the allowed
virtual address range. Therefore, establish CPU_ENTRY_AREA_BASE and
VMEMORY_END as a variable for the PVM guest, allowing it to be
modified as necessary for the PVM guest.

Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
---
arch/x86/include/asm/page_64.h | 3 +++
arch/x86/include/asm/pgtable_64_types.h | 14 ++++++++++++--
arch/x86/kernel/head64.c | 7 +++++++
arch/x86/mm/dump_pagetables.c | 3 ++-
arch/x86/mm/kaslr.c | 4 ++--
5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index b8692e6cc939..4f64f049f3d0 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -18,6 +18,9 @@ extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;

+extern unsigned long cpu_entry_area_base;
+extern unsigned long vmemory_end;
+
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - KERNEL_MAP_BASE;
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6780f2e63717..66c8e7325d27 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -140,6 +140,7 @@ extern unsigned int ptrs_per_p4d;
# define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */

+#ifndef CONFIG_PVM_GUEST
/*
* End of the region for which vmalloc page tables are pre-allocated.
* For non-KMSAN builds, this is the same as VMALLOC_END.
@@ -147,6 +148,10 @@ extern unsigned int ptrs_per_p4d;
* VMALLOC_START..VMALLOC_END (see below).
*/
#define VMEMORY_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+#else
+#define RAW_VMEMORY_END (__VMALLOC_BASE_L4 + (VMALLOC_SIZE_TB_L4 << 40) - 1)
+#define VMEMORY_END vmemory_end
+#endif /* CONFIG_PVM_GUEST */

#ifndef CONFIG_KMSAN
#define VMALLOC_END VMEMORY_END
@@ -166,7 +171,7 @@ extern unsigned int ptrs_per_p4d;
* KMSAN_MODULES_ORIGIN_START to
* KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules.
*/
-#define VMALLOC_QUARTER_SIZE ((VMALLOC_SIZE_TB << 40) >> 2)
+#define VMALLOC_QUARTER_SIZE ((VMEMORY_END + 1 - VMALLOC_START) >> 2)
#define VMALLOC_END (VMALLOC_START + VMALLOC_QUARTER_SIZE - 1)

/*
@@ -202,7 +207,12 @@ extern unsigned int ptrs_per_p4d;
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)

#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
-#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+#define RAW_CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+#ifdef CONFIG_PVM_GUEST
+#define CPU_ENTRY_AREA_BASE cpu_entry_area_base
+#else
+#define CPU_ENTRY_AREA_BASE RAW_CPU_ENTRY_AREA_BASE
+#endif

#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0b0e460609e5..d0e8d648bd38 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -72,6 +72,13 @@ unsigned long kernel_map_base __ro_after_init = __START_KERNEL_map;
EXPORT_SYMBOL(kernel_map_base);
#endif

+#ifdef CONFIG_PVM_GUEST
+unsigned long cpu_entry_area_base __ro_after_init = RAW_CPU_ENTRY_AREA_BASE;
+EXPORT_SYMBOL(cpu_entry_area_base);
+unsigned long vmemory_end __ro_after_init = RAW_VMEMORY_END;
+EXPORT_SYMBOL(vmemory_end);
+#endif
+
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index d5c6f61242aa..166c7d36d8ff 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -95,7 +95,7 @@ static struct addr_marker address_markers[] = {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
[LDT_NR] = { 0UL, "LDT remap" },
#endif
- [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+ [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry Area" },
#ifdef CONFIG_X86_ESPFIX64
[ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
#endif
@@ -479,6 +479,7 @@ static int __init pt_dump_init(void)
address_markers[MODULES_VADDR_NR].start_address = MODULES_VADDR;
address_markers[MODULES_END_NR].start_address = MODULES_END;
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+ address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 37db264866b6..e3825c7542a3 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -38,7 +38,7 @@
* highest amount of space for randomization available, but that's too hard
* to keep straight and caused issues already.
*/
-static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
+static const unsigned long vaddr_end = RAW_CPU_ENTRY_AREA_BASE;

/*
* Memory regions randomized by KASLR (except modules that use a separate logic
@@ -79,7 +79,7 @@ void __init kernel_randomize_memory(void)
* limited....
*/
BUILD_BUG_ON(vaddr_start >= vaddr_end);
- BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
+ BUILD_BUG_ON(vaddr_end != RAW_CPU_ENTRY_AREA_BASE);
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);

if (!kaslr_memory_enabled())
--
2.19.1.6.gb485710b