[RFC PATCH 55/73] x86/pvm: Relocate kernel image to specific virtual address range

From: Lai Jiangshan
Date: Mon Feb 26 2024 - 09:57:45 EST


From: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>

For a PVM guest, it is only allowed to run in the specific virtual
address range provided by the hypervisor. Therefore, the PVM guest needs
to be a PIE kernel and perform relocation during the booting process.
Additionally, for a compressed kernel image, kaslr needs to be disabled;
otherwise, it will fail to boot.

Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
---
arch/x86/Kconfig | 3 ++-
arch/x86/kernel/head64_identity.c | 27 +++++++++++++++++++++++++++
arch/x86/kernel/head_64.S | 13 +++++++++++++
arch/x86/kernel/pvm.c | 5 ++++-
4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2ccc8a27e081..1b4bea3db53d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -853,7 +853,8 @@ config KVM_GUEST

config PVM_GUEST
bool "PVM Guest support"
- depends on X86_64 && KVM_GUEST
+ depends on X86_64 && KVM_GUEST && X86_PIE
+ select RELOCATABLE_UNCOMPRESSED_KERNEL
default n
help
This option enables the kernel to run as a PVM guest under the PVM
diff --git a/arch/x86/kernel/head64_identity.c b/arch/x86/kernel/head64_identity.c
index 4548ad615ecf..4e6a073d9e6c 100644
--- a/arch/x86/kernel/head64_identity.c
+++ b/arch/x86/kernel/head64_identity.c
@@ -20,6 +20,7 @@
#include <asm/trapnr.h>
#include <asm/sev.h>
#include <asm/init.h>
+#include <asm/pvm_para.h>

extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
extern unsigned int next_early_pgt;
@@ -385,3 +386,29 @@ void __head __relocate_kernel(unsigned long physbase, unsigned long virtbase)
}
}
#endif
+
+#ifdef CONFIG_PVM_GUEST
+extern unsigned long pvm_range_start;
+extern unsigned long pvm_range_end;
+
+static void __head detect_pvm_range(void)
+{
+ unsigned long msr_val;
+ unsigned long pml4_index_start, pml4_index_end;
+
+ msr_val = __rdmsr(MSR_PVM_LINEAR_ADDRESS_RANGE);
+ pml4_index_start = msr_val & 0x1ff;
+ pml4_index_end = (msr_val >> 16) & 0x1ff;
+ pvm_range_start = (0x1fffe00 | pml4_index_start) * P4D_SIZE;
+ pvm_range_end = (0x1fffe00 | pml4_index_end) * P4D_SIZE;
+}
+
+void __head pvm_relocate_kernel(unsigned long physbase)
+{
+ if (!pvm_detect())
+ return;
+
+ detect_pvm_range();
+ __relocate_kernel(physbase, pvm_range_end - (2UL << 30));
+}
+#endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b8278f05bbd0..1d931bab4393 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -91,6 +91,19 @@ SYM_CODE_START_NOALIGN(startup_64)
movq %rdx, PER_CPU_VAR(this_cpu_off)
#endif

+#ifdef CONFIG_PVM_GUEST
+ leaq _text(%rip), %rdi
+ call pvm_relocate_kernel
+#ifdef CONFIG_SMP
+ /* Fill __per_cpu_offset[0] again, because it got relocated. */
+ movabs $__per_cpu_load, %rdx
+ movabs $__per_cpu_start, %rax
+ subq %rax, %rdx
+ movq %rdx, __per_cpu_offset(%rip)
+ movq %rdx, PER_CPU_VAR(this_cpu_off)
+#endif
+#endif
+
call startup_64_setup_env

/* Now switch to __KERNEL_CS so IRET works reliably */
diff --git a/arch/x86/kernel/pvm.c b/arch/x86/kernel/pvm.c
index 2d27044eaf25..fc82c71b305b 100644
--- a/arch/x86/kernel/pvm.c
+++ b/arch/x86/kernel/pvm.c
@@ -13,9 +13,12 @@
#include <asm/cpufeature.h>
#include <asm/pvm_para.h>

+unsigned long pvm_range_start __initdata;
+unsigned long pvm_range_end __initdata;
+
void __init pvm_early_setup(void)
{
- if (!pvm_detect())
+ if (!pvm_range_end)
return;

setup_force_cpu_cap(X86_FEATURE_KVM_PVM_GUEST);
--
2.19.1.6.gb485710b