[PATCH v8 09/23] x86/decompressor: Call trampoline as a normal function

From: Ard Biesheuvel
Date: Wed Aug 02 2023 - 11:50:20 EST


Move the long return to switch to 32-bit mode into the trampoline code
so it can be called as an ordinary function. This will allow it to be
called directly from C code in a subsequent patch.

While at it, reorganize the code somewhat to keep the prologue and
epilogue of the function together, making the code a bit easier to
follow. Also, given that the trampoline is now entered in 64-bit mode, a
simple RIP-relative reference can be used to take the address of the
exit point.

Acked-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx>
---
arch/x86/boot/compressed/head_64.S | 79 +++++++++-----------
arch/x86/boot/compressed/pgtable.h | 2 +-
2 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 19bf810409e2aa62..91b5eee306148f9a 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -447,39 +447,8 @@ SYM_CODE_START(startup_64)
/* Save the trampoline address in RCX */
movq %rax, %rcx

- /* Set up 32-bit addressable stack */
- leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
-
- /*
- * Preserve live 64-bit registers on the stack: this is necessary
- * because the architecture does not guarantee that GPRs will retain
- * their full 64-bit values across a 32-bit mode switch.
- */
- pushq %r15
- pushq %rbp
- pushq %rbx
-
- /*
- * Push the 64-bit address of trampoline_return() onto the new stack.
- * It will be used by the trampoline to return to the main code. Due to
- * the 32-bit mode switch, it cannot be kept it in a register either.
- */
- leaq trampoline_return(%rip), %rdi
- pushq %rdi
-
- /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
- pushq $__KERNEL32_CS
leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
- pushq %rax
- lretq
-trampoline_return:
- /* Restore live 64-bit registers */
- popq %rbx
- popq %rbp
- popq %r15
-
- /* Restore the stack, the 32-bit trampoline uses its own stack */
- leaq rva(boot_stack_end)(%rbx), %rsp
+ call *%rax

/*
* cleanup_trampoline() would restore trampoline memory.
@@ -561,7 +530,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
jmp *%rax
SYM_FUNC_END(.Lrelocated)

- .code32
/*
* This is the 32-bit trampoline that will be copied over to low memory.
*
@@ -570,6 +538,39 @@ SYM_FUNC_END(.Lrelocated)
* Non zero RDX means trampoline needs to enable 5-level paging.
*/
SYM_CODE_START(trampoline_32bit_src)
+ /*
+ * Preserve live 64-bit registers on the stack: this is necessary
+ * because the architecture does not guarantee that GPRs will retain
+ * their full 64-bit values across a 32-bit mode switch.
+ */
+ pushq %r15
+ pushq %rbp
+ pushq %rbx
+
+ /* Set up 32-bit addressable stack and push the old RSP value */
+ leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx
+ movq %rsp, (%rbx)
+ movq %rbx, %rsp
+
+ /* Take the address of the trampoline exit code */
+ leaq .Lret(%rip), %rbx
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq 0f(%rip), %rax
+ pushq %rax
+ lretq
+
+.Lret:
+ /* Restore the preserved 64-bit registers */
+ movq (%rsp), %rsp
+ popq %rbx
+ popq %rbp
+ popq %r15
+ retq
+
+ .code32
+0:
/* Set up data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
@@ -633,12 +634,9 @@ SYM_CODE_START(trampoline_32bit_src)
1:
movl %eax, %cr4

- /* Calculate address of paging_enabled() once we are executing in the trampoline */
- leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
-
/* Prepare the stack for far return to Long Mode */
pushl $__KERNEL_CS
- pushl %eax
+ pushl %ebx

/* Enable paging again. */
movl %cr0, %eax
@@ -648,12 +646,6 @@ SYM_CODE_START(trampoline_32bit_src)
lret
SYM_CODE_END(trampoline_32bit_src)

- .code64
-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
- /* Return from the trampoline */
- retq
-SYM_FUNC_END(.Lpaging_enabled)
-
/*
* The trampoline code has a size limit.
* Make sure we fail to compile if the trampoline code grows
@@ -661,7 +653,6 @@ SYM_FUNC_END(.Lpaging_enabled)
*/
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE

- .code32
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index cc9b2529a08634b4..91dbb99203fbce2d 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -6,7 +6,7 @@
#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0

#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE 0x80
+#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0

#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE

--
2.39.2