[tip: x86/boot] x86/kexec: Add data section to relocate_kernel

From: tip-bot2 for David Woodhouse
Date: Fri Dec 06 2024 - 05:20:26 EST


The following commit has been merged into the x86/boot branch of tip:

Commit-ID: 8dbec5c77bc32f04583d3973c8178a74e72fdf18
Gitweb: https://git.kernel.org/tip/8dbec5c77bc32f04583d3973c8178a74e72fdf18
Author: David Woodhouse <dwmw@xxxxxxxxxxxx>
AuthorDate: Thu, 05 Dec 2024 15:05:15
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Fri, 06 Dec 2024 10:42:00 +01:00

x86/kexec: Add data section to relocate_kernel

Now that the relocate_kernel page is handled sanely by a linker script
we can have actual data, and just use %rip-relative addressing to access
it.

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Baoquan He <bhe@xxxxxxxxxx>
Cc: Vivek Goyal <vgoyal@xxxxxxxxxx>
Cc: Dave Young <dyoung@xxxxxxxxxx>
Cc: Eric Biederman <ebiederm@xxxxxxxxxxxx>
Cc: Ard Biesheuvel <ardb@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Link: https://lore.kernel.org/r/20241205153343.3275139-10-dwmw2@xxxxxxxxxxxxx
---
arch/x86/kernel/machine_kexec_64.c | 8 ++-
arch/x86/kernel/relocate_kernel_64.S | 62 +++++++++++++--------------
arch/x86/kernel/vmlinux.lds.S | 1 +-
3 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 23dffdc..63dca5c 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -343,6 +343,7 @@ void machine_kexec(struct kimage *image)
unsigned long start_address,
unsigned int preserve_context,
unsigned int host_mem_enc_active);
+ unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
unsigned long page_list[PAGES_NR];
unsigned int host_mem_enc_active;
int save_ftrace_enabled;
@@ -389,7 +390,12 @@ void machine_kexec(struct kimage *image)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);

- relocate_kernel_ptr = control_page;
+ /*
+ * Allow for the possibility that relocate_kernel might not be at
+ * the very start of the page.
+ */
+ relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel -
+ reloc_start;

/*
* The segment registers are funny things, they have both a
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 2670044..f13866a 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -23,23 +23,21 @@
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

/*
- * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
- * ~ control_page + PAGE_SIZE are used as data storage and stack for
- * jumping back
+ * The .text.relocate_kernel and .data.relocate_kernel sections are copied
+ * into the control page, and the remainder of the page is used as the stack.
*/
-#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))

+ .section .data.relocate_kernel,"a";
/* Minimal CPU state */
-#define RSP DATA(0x0)
-#define CR0 DATA(0x8)
-#define CR3 DATA(0x10)
-#define CR4 DATA(0x18)
-
-/* other data */
-#define CP_PA_TABLE_PAGE DATA(0x20)
-#define CP_PA_SWAP_PAGE DATA(0x28)
-#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
-#define CP_VA_CONTROL_PAGE DATA(0x38)
+SYM_DATA_LOCAL(saved_rsp, .quad 0)
+SYM_DATA_LOCAL(saved_cr0, .quad 0)
+SYM_DATA_LOCAL(saved_cr3, .quad 0)
+SYM_DATA_LOCAL(saved_cr4, .quad 0)
+ /* other data */
+SYM_DATA_LOCAL(va_control_page, .quad 0)
+SYM_DATA_LOCAL(pa_table_page, .quad 0)
+SYM_DATA_LOCAL(pa_swap_page, .quad 0)
+SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)

.section .text.relocate_kernel,"ax";
.code64
@@ -63,14 +61,13 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
pushq %r15
pushf

- movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
- movq %rsp, RSP(%r11)
+ movq %rsp, saved_rsp(%rip)
movq %cr0, %rax
- movq %rax, CR0(%r11)
+ movq %rax, saved_cr0(%rip)
movq %cr3, %rax
- movq %rax, CR3(%r11)
+ movq %rax, saved_cr3(%rip)
movq %cr4, %rax
- movq %rax, CR4(%r11)
+ movq %rax, saved_cr4(%rip)

/* Save CR4. Required to enable the right paging mode later. */
movq %rax, %r13
@@ -83,10 +80,11 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
movq %r8, %r12

/*
- * get physical address of control page now
+ * get physical and virtual address of control page now
* this is impossible after page table switch
*/
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
+ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11

/* get physical address of page table now too */
movq PTR(PA_TABLE_PAGE)(%rsi), %r9
@@ -95,10 +93,10 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
movq PTR(PA_SWAP_PAGE)(%rsi), %r10

/* save some information for jumping back */
- movq %r9, CP_PA_TABLE_PAGE(%r11)
- movq %r10, CP_PA_SWAP_PAGE(%r11)
- movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
- movq %r11, CP_VA_CONTROL_PAGE(%r11)
+ movq %r9, pa_table_page(%rip)
+ movq %r10, pa_swap_page(%rip)
+ movq %rdi, pa_backup_pages_map(%rip)
+ movq %r11, va_control_page(%rip)

/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
movq %rcx, %r11
@@ -229,13 +227,13 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* get the re-entry point of the peer system */
movq 0(%rsp), %rbp
leaq relocate_kernel(%rip), %r8
- movq CP_PA_SWAP_PAGE(%r8), %r10
- movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
- movq CP_PA_TABLE_PAGE(%r8), %rax
+ movq pa_swap_page(%rip), %r10
+ movq pa_backup_pages_map(%rip), %rdi
+ movq pa_table_page(%rip), %rax
movq %rax, %cr3
lea PAGE_SIZE(%r8), %rsp
call swap_pages
- movq CP_VA_CONTROL_PAGE(%r8), %rax
+ movq va_control_page(%rip), %rax
addq $(virtual_mapped - relocate_kernel), %rax
pushq %rax
ANNOTATE_UNRET_SAFE
@@ -246,11 +244,11 @@ SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR // RET target, above
- movq RSP(%r8), %rsp
- movq CR4(%r8), %rax
+ movq saved_rsp(%rip), %rsp
+ movq saved_cr4(%rip), %rax
movq %rax, %cr4
- movq CR3(%r8), %rax
- movq CR0(%r8), %r8
+ movq saved_cr3(%rip), %rax
+ movq saved_cr0(%rip), %r8
movq %rax, %cr3
movq %r8, %cr0

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 78ce1a0..0c89399 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ const_pcpu_hot = pcpu_hot;
. = ALIGN(0x100); \
__relocate_kernel_start = .; \
*(.text.relocate_kernel); \
+ *(.data.relocate_kernel); \
__relocate_kernel_end = .;

ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX_SIZE,