Re: [PATCH 2/5] x86/pvh: Make PVH entrypoint PIC for x86-64

From: Brian Gerst
Date: Wed Apr 10 2024 - 17:00:41 EST


On Wed, Apr 10, 2024 at 3:50 PM Jason Andryuk <jason.andryuk@xxxxxxx> wrote:
>
> The PVH entrypoint is 32bit non-PIC code running the uncompressed
> vmlinux at its load address CONFIG_PHYSICAL_START - default 0x1000000
> (16MB). The kernel is loaded at that physical address inside the VM by
> the VMM software (Xen/QEMU).
>
> When running a Xen PVH Dom0, the host reserved addresses are mapped 1-1
> into the PVH container. There exist system firmwares (Coreboot/EDK2)
> with reserved memory at 16MB. This creates a conflict where the PVH
> kernel cannot be loaded at that address.
>
> Modify the PVH entrypoint to be position-indepedent to allow flexibility
> in load address. Only the 64bit entry path is converted. A 32bit
> kernel is not PIC, so calling into other parts of the kernel, like
> xen_prepare_pvh() and mk_pgtable_32(), don't work properly when
> relocated.
>
> This makes the code PIC, but the page tables need to be updated as well
> to handle running from the kernel high map.
>
> The UNWIND_HINT_END_OF_STACK is to silence:
> vmlinux.o: warning: objtool: pvh_start_xen+0x7f: unreachable instruction
> after the lret into 64bit code.
>
> Signed-off-by: Jason Andryuk <jason.andryuk@xxxxxxx>
> ---
> ---
> arch/x86/platform/pvh/head.S | 44 ++++++++++++++++++++++++++++--------
> 1 file changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> index f7235ef87bc3..bb1e582e32b1 100644
> --- a/arch/x86/platform/pvh/head.S
> +++ b/arch/x86/platform/pvh/head.S
> @@ -7,6 +7,7 @@
> .code32
> .text
> #define _pa(x) ((x) - __START_KERNEL_map)
> +#define rva(x) ((x) - pvh_start_xen)
>
> #include <linux/elfnote.h>
> #include <linux/init.h>
> @@ -54,7 +55,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
> UNWIND_HINT_END_OF_STACK
> cld
>
> - lgdt (_pa(gdt))
> + /*
> + * See the comment for startup_32 for more details. We need to
> + * execute a call to get the execution address to be position
> + * independent, but we don't have a stack. Save and restore the
> + * magic field of start_info in ebx, and use that as the stack.
> + */
> + mov (%ebx), %eax
> + leal 4(%ebx), %esp
> + ANNOTATE_INTRA_FUNCTION_CALL
> + call 1f
> +1: popl %ebp
> + mov %eax, (%ebx)
> + subl $rva(1b), %ebp
> + movl $0, %esp
> +
> + leal rva(gdt)(%ebp), %eax
> + leal rva(gdt_start)(%ebp), %ecx
> + movl %ecx, 2(%eax)
> + lgdt (%eax)
>
> mov $PVH_DS_SEL,%eax
> mov %eax,%ds
> @@ -62,14 +81,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
> mov %eax,%ss
>
> /* Stash hvm_start_info. */
> - mov $_pa(pvh_start_info), %edi
> + leal rva(pvh_start_info)(%ebp), %edi
> mov %ebx, %esi
> - mov _pa(pvh_start_info_sz), %ecx
> + movl rva(pvh_start_info_sz)(%ebp), %ecx
> shr $2,%ecx
> rep
> movsl
>
> - mov $_pa(early_stack_end), %esp
> + leal rva(early_stack_end)(%ebp), %esp
>
> /* Enable PAE mode. */
> mov %cr4, %eax
> @@ -84,28 +103,33 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
> wrmsr
>
> /* Enable pre-constructed page tables. */
> - mov $_pa(init_top_pgt), %eax
> + leal rva(init_top_pgt)(%ebp), %eax
> mov %eax, %cr3
> mov $(X86_CR0_PG | X86_CR0_PE), %eax
> mov %eax, %cr0
>
> /* Jump to 64-bit mode. */
> - ljmp $PVH_CS_SEL, $_pa(1f)
> + pushl $PVH_CS_SEL
> + leal rva(1f)(%ebp), %eax
> + pushl %eax
> + lretl
>
> /* 64-bit entry point. */
> .code64
> 1:
> + UNWIND_HINT_END_OF_STACK
> +
> /* Set base address in stack canary descriptor. */
> mov $MSR_GS_BASE,%ecx
> - mov $_pa(canary), %eax
> + leal rva(canary)(%ebp), %eax

Since this is in 64-bit mode, RIP-relative addressing can be used.

> xor %edx, %edx
> wrmsr
>
> call xen_prepare_pvh
>
> /* startup_64 expects boot_params in %rsi. */
> - mov $_pa(pvh_bootparams), %rsi
> - mov $_pa(startup_64), %rax
> + lea rva(pvh_bootparams)(%ebp), %rsi
> + lea rva(startup_64)(%ebp), %rax

RIP-relative here too.

> ANNOTATE_RETPOLINE_SAFE
> jmp *%rax
>
> @@ -143,7 +167,7 @@ SYM_CODE_END(pvh_start_xen)
> .balign 8
> SYM_DATA_START_LOCAL(gdt)
> .word gdt_end - gdt_start
> - .long _pa(gdt_start)
> + .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
> .word 0
> SYM_DATA_END(gdt)
> SYM_DATA_START_LOCAL(gdt_start)
> --
> 2.44.0
>
>

Brian Gerst