Re: [PATCH v5 07/20] x86/kexec: Invoke copy of relocate_kernel() instead of the original
From: Nathan Chancellor
Date: Mon Dec 16 2024 - 00:49:56 EST
On Sun, Dec 15, 2024 at 10:09:57AM +0000, David Woodhouse wrote:
> On Sat, 2024-12-14 at 16:08 -0700, Nathan Chancellor wrote:
> >
> > I guess this seems somewhat unavoidable because control_page is just a
> > 'void *', perhaps machine_kexec() should just be marked as __nocfi? This
> > diff resolves that issue for me.
>
> The patch below seems to work too. I already wanted to deal with the
Can confirm, thanks for the quick fix. With your fix for the first issue
I reported, the fix I sent for LTO, and this patch below, I can kexec on
a CFI and LTO enabled kernel without any issues.
> case where relocate_kernel isn't at the start of the page, so it forces
> me to do that.
>
> For some reason it also started complaining
> vmlinux.o: warning: objtool: relocate_kernel+0x6a: return with modified stack frame
> ... which is easy to fix just by turning it into a jmp *%rsi; I have no
> idea why it was done with a ret like that in the first place.
>
> I don't know why it puts 16 bytes of NOPs between __reloc_start and
> __cfi_relocate_kernel (in addition to the 16 before relocate_kernel
> itself), and space is *fairly* tight in the control page, but it's
> tolerable.
I think this is something to do with FineIBT IIRC? PeterZ might have
more details.
> To make the CFI check actually give useful output if it triggers, I'm
> tempted to do the IDT/GDT invalidation relocate_kernel itself, instead
> of before the call.
>
>
> diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
> index ccb297765e08..e6befd11fee4 100644
> --- a/arch/x86/kernel/relocate_kernel_64.S
> +++ b/arch/x86/kernel/relocate_kernel_64.S
> @@ -6,6 +6,7 @@
>
> #include <linux/linkage.h>
> #include <linux/stringify.h>
> +#include <linux/cfi_types.h>
> #include <asm/alternative.h>
> #include <asm/page_types.h>
> #include <asm/kexec.h>
> @@ -61,7 +62,10 @@ SYM_DATA_END(kexec_debug_idt)
>
> .section .text.relocate_kernel,"ax";
> .code64
> -SYM_CODE_START_NOALIGN(relocate_kernel)
> +__reloc_start:
> + ANNOTATE_NOENDBR
> +
> +SYM_TYPED_FUNC_START(relocate_kernel)
> UNWIND_HINT_END_OF_STACK
> ANNOTATE_NOENDBR
> /*
> @@ -115,10 +119,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
> lea PAGE_SIZE(%rsi), %rsp
>
> /* jump to identity mapped page */
> - addq $(identity_mapped - relocate_kernel), %rsi
> - pushq %rsi
> - ANNOTATE_UNRET_SAFE
> - ret
> + addq $(identity_mapped - __reloc_start), %rsi
> + ANNOTATE_RETPOLINE_SAFE
> + jmp *%rsi
> int3
> SYM_CODE_END(relocate_kernel)
>
> @@ -263,7 +266,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
>
> /* get the re-entry point of the peer system */
> popq %rbp
> - leaq relocate_kernel(%rip), %r8
> + leaq __reloc_start(%rip), %r8
> movq kexec_pa_swap_page(%rip), %r10
> movq pa_backup_pages_map(%rip), %rdi
> movq kexec_pa_table_page(%rip), %rax
> @@ -272,7 +275,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
> movq $1, %r11 /* Ensure preserve_context flag is set */
> call swap_pages
> movq kexec_va_control_page(%rip), %rax
> - addq $(virtual_mapped - relocate_kernel), %rax
> + addq $(virtual_mapped - __reloc_start), %rax
> pushq %rax
> ANNOTATE_UNRET_SAFE
> ret
>