Re: [PATCH v5 07/20] x86/kexec: Invoke copy of relocate_kernel() instead of the original
From: Josh Poimboeuf
Date: Wed Dec 18 2024 - 16:23:36 EST
On Wed, Dec 18, 2024 at 10:44:25AM +0100, David Woodhouse wrote:
> > At some point we had discussed placing the code in .rodata, was it the
> > alternative preventing that?
>
> No, the alternative seems to be fine, and it's all in the .data section
> now (since the kernel does write some variables there which are then
> accessed %rip-relative from the code itself).
The linker script does place it in .data, but objtool runs on the object
file before linking, where it's still in an executable section
(.text..relocate_kernel).
How about something like below?
- move text to .data..relocate_kernel
- remove objtool annotations
- replace the alternative with a runtime check
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5081d0b9e290..126d777a5695 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -61,6 +61,7 @@ struct kimage;
extern unsigned long kexec_va_control_page;
extern unsigned long kexec_pa_table_page;
extern unsigned long kexec_pa_swap_page;
+extern unsigned int kexec_preserve_mce;
extern gate_desc kexec_debug_idt[];
extern unsigned char kexec_debug_exc_vectors[];
extern uint16_t kexec_debug_8250_port;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 853f3cdc7c75..751adeea0db8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -363,6 +363,8 @@ int machine_kexec_prepare(struct kimage *image)
if (image->type == KEXEC_TYPE_DEFAULT)
kexec_pa_swap_page = page_to_pfn(image->swap_page) << PAGE_SHIFT;
+ kexec_preserve_mce = boot_cpu_has(X86_FEATURE_TDX_GUEST);
+
prepare_debug_idt((unsigned long)__pa(control_page),
(unsigned long)kexec_debug_exc_vectors - reloc_start);
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 4dca50141586..04b7a9f11e98 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -24,8 +24,8 @@
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
/*
- * The .text..relocate_kernel and .data..relocate_kernel sections are copied
- * into the control page, and the remainder of the page is used as the stack.
+ * The .data..relocate_kernel section is copied into the control page, and the
+ * remainder of the page is used as the stack.
*/
.section .data..relocate_kernel,"a";
@@ -41,6 +41,7 @@ SYM_DATA(kexec_pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
SYM_DATA(kexec_debug_8250_mmio32, .quad 0)
SYM_DATA(kexec_debug_8250_port, .word 0)
+SYM_DATA(kexec_preserve_mce, .word 0)
#ifdef CONFIG_KEXEC_DEBUG
.balign 16
@@ -60,12 +61,14 @@ SYM_DATA_END(kexec_debug_idt)
#endif /* CONFIG_KEXEC_DEBUG */
- .section .text..relocate_kernel,"ax";
+/*
+ * This code is copied rather than called in place. It's free to use indirect
+ * branches and bare returns, and doesn't need ORC unwinding data. Keep it in
+ * a data section so objtool doesn't see it.
+ */
+
.code64
SYM_TYPED_FUNC_START(relocate_kernel)
- UNWIND_HINT_END_OF_STACK
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
/*
* %rdi indirection_page
* %rsi pa_control_page
@@ -124,12 +127,10 @@ SYM_TYPED_FUNC_START(relocate_kernel)
/* jump to identity mapped page */
addq $identity_mapped, %rsi
subq $__relocate_kernel_start, %rsi
- ANNOTATE_RETPOLINE_SAFE
jmp *%rsi
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
- UNWIND_HINT_END_OF_STACK
/*
* %rdi indirection page
* %rdx start address
@@ -199,7 +200,11 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
* PAE is always set in the original CR4.
*/
andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
- ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
+ movw kexec_preserve_mce(%rip), %ax
+ testw %ax, %ax
+ jz .Lwrite_cr4
+ orl $X86_CR4_MCE, %r13d
+.Lwrite_cr4:
movq %r13, %cr4
/* Flush the TLB (needed?) */
@@ -250,7 +255,6 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %r14d, %r14d
xorl %r15d, %r15d
- ANNOTATE_UNRET_SAFE
ret
int3
@@ -264,7 +268,6 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* push the existing entry point onto the callee's stack */
pushq %rdx
- ANNOTATE_RETPOLINE_SAFE
call *%rdx
/* get the re-entry point of the peer system */
@@ -276,7 +279,6 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* Find start (and end) of this physical mapping of control page */
leaq (%rip), %r8
- ANNOTATE_NOENDBR
andq $PAGE_MASK, %r8
lea PAGE_SIZE(%r8), %rsp
movq $1, %r11 /* Ensure preserve_context flag is set */
@@ -285,14 +287,11 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
addq $virtual_mapped, %rax
subq $__relocate_kernel_start, %rax
pushq %rax
- ANNOTATE_UNRET_SAFE
ret
int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
- UNWIND_HINT_END_OF_STACK
- ANNOTATE_NOENDBR // RET target, above
movq saved_rsp(%rip), %rsp
movq saved_cr4(%rip), %rax
movq %rax, %cr4
@@ -317,14 +316,12 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popq %r12
popq %rbp
popq %rbx
- ANNOTATE_UNRET_SAFE
ret
int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
- UNWIND_HINT_END_OF_STACK
/*
* %rdi indirection page
* %r11 preserve_context
@@ -387,7 +384,6 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
lea PAGE_SIZE(%rax), %rsi
jmp .Lloop
.Ldone:
- ANNOTATE_UNRET_SAFE
ret
int3
SYM_CODE_END(swap_pages)
@@ -404,8 +400,6 @@ SYM_CODE_END(swap_pages)
#define LSR 5 /* Line Status */
SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250)
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
addw $LSR, %dx
xchg %al, %ah
.Lxmtrdy_loop:
@@ -420,15 +414,11 @@ SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250)
xchg %al, %ah
outb %al, %dx
pr_char_null:
- ANNOTATE_NOENDBR
- ANNOTATE_UNRET_SAFE
ret
SYM_CODE_END(pr_char_8250)
SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32)
- UNWIND_HINT_FUNC
- ANNOTATE_NOENDBR
.Lxmtrdy_loop_mmio:
movb (LSR*4)(%rdx), %ah
testb $XMTRDY, %ah
@@ -438,7 +428,6 @@ SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32)
.Lready_mmio:
movb %al, (%rdx)
- ANNOTATE_UNRET_SAFE
ret
SYM_CODE_END(pr_char_8250_mmio32)
@@ -463,7 +452,6 @@ SYM_CODE_END(pr_char_8250_mmio32)
/* Print the nybble in %bl, clobber %rax */
SYM_CODE_START_LOCAL_NOALIGN(pr_nybble)
- UNWIND_HINT_FUNC
movb %bl, %al
nop
andb $0x0f, %al
@@ -471,33 +459,26 @@ SYM_CODE_START_LOCAL_NOALIGN(pr_nybble)
cmpb $0x3a, %al
jb 1f
addb $('a' - '0' - 10), %al
- ANNOTATE_RETPOLINE_SAFE
1: jmp *%rsi
SYM_CODE_END(pr_nybble)
SYM_CODE_START_LOCAL_NOALIGN(pr_qword)
- UNWIND_HINT_FUNC
movq $16, %rcx
1: rolq $4, %rbx
call pr_nybble
loop 1b
movb $'\n', %al
- ANNOTATE_RETPOLINE_SAFE
jmp *%rsi
SYM_CODE_END(pr_qword)
.macro print_reg a, b, c, d, r
movb $\a, %al
- ANNOTATE_RETPOLINE_SAFE
call *%rsi
movb $\b, %al
- ANNOTATE_RETPOLINE_SAFE
call *%rsi
movb $\c, %al
- ANNOTATE_RETPOLINE_SAFE
call *%rsi
movb $\d, %al
- ANNOTATE_RETPOLINE_SAFE
call *%rsi
movq \r, %rbx
call pr_qword
@@ -506,7 +487,6 @@ SYM_CODE_END(pr_qword)
SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors)
/* Each of these is 6 bytes. */
.macro vec_err exc
- UNWIND_HINT_ENTRY
. = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
nop
nop
@@ -515,14 +495,12 @@ SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors)
.endm
.macro vec_noerr exc
- UNWIND_HINT_ENTRY
. = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
pushq $0
pushq $\exc
jmp exc_handler
.endm
- ANNOTATE_NOENDBR
vec_noerr 0 // #DE
vec_noerr 1 // #DB
vec_noerr 2 // #NMI
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 63ff60a11be5..9c2abfd6ea26 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -100,7 +100,6 @@ const_pcpu_hot = pcpu_hot;
#define KEXEC_RELOCATE_KERNEL \
. = ALIGN(0x100); \
__relocate_kernel_start = .; \
- *(.text..relocate_kernel); \
*(.data..relocate_kernel); \
__relocate_kernel_end = .;