[PATCH 4/4] riscv: kexec: Switch to trampoline page table before norelocate

From: fangyu . yu

Date: Tue Mar 24 2026 - 08:19:23 EST


From: Fangyu Yu <fangyu.yu@xxxxxxxxxxxxxxxxx>

Make riscv_kexec_norelocate a two-pass trampoline so it can
drop the kernel page tables while still executing from a
mapped address.

On the first entry, t3 is initialized to 0 by machine_kexec().
Loads the physical address of riscv_kexec_norelocate and the
trampoline SATP value, switches to the trampoline page table,
and jumps to the trampoline VA(=PA).

On the second entry, t3 contains the physical address of
riscv_kexec_norelocate, so the PC comparison matches and
execution continues under trampoline VA(=PA).

Since the trampoline page table is already active, replace the
previous stvec-based handoff with a direct jump to the target
entry (jr a2).

Signed-off-by: Fangyu Yu <fangyu.yu@xxxxxxxxxxxxxxxxx>
---
arch/riscv/kernel/kexec_relocate.S | 32 +++++++++++++++++++++++++-----
arch/riscv/kernel/machine_kexec.c | 13 ++++++++++++
2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
index af6b99f5b0fd..2b9892bf04f2 100644
--- a/arch/riscv/kernel/kexec_relocate.S
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -147,13 +147,35 @@ riscv_kexec_relocate_end:


/* Used for jumping to crashkernel */
+.extern kexec_tramp_satp
+.extern riscv_kexec_norelocate_pa
.section ".kexec.tramp.text", "ax"
SYM_CODE_START(riscv_kexec_norelocate)
+ /*
+ * Two-pass entry:
+ * - 1st entry: t3 == 0 (initialized by machine_kexec()).
+ *
+ * - 2nd entry: t3 holds the physical address of
+ * riscv_kexec_norelocate, so auipc matches t3 and we fall through
+ * to label 1 to continue execution under trampoline VA(=PA).
+ */
+ auipc t0, 0
+ beq t0, t3, 1f
+
+ la t0, riscv_kexec_norelocate_pa
+ REG_L t3, 0(t0)
+ la t0, kexec_tramp_satp
+ REG_L t1, 0(t0)
+ csrw CSR_SATP, t1
+ sfence.vma x0, x0
+
+ jr t3
/*
* s0: (const) Phys address to jump to
* s1: (const) Phys address of the FDT image
* s2: (const) The hartid of the current hart
*/
+1:
mv s0, a1
mv s1, a2
mv s2, a3
@@ -199,13 +221,13 @@ SYM_CODE_START(riscv_kexec_norelocate)
csrw CSR_SSCRATCH, zero

/*
- * Switch to physical addressing
- * This will also trigger a jump to CSR_STVEC
- * which in this case is the address of the new
- * kernel.
+ * We are already executing from the trampoline VA with the trampoline
+ * page table installed, so there is no need to rely on the old flow
+ * of programming stvec and taking the implicit trap on SATP switch.
+ * Jump directly to the target entry instead.
*/
- csrw CSR_STVEC, a2
csrw CSR_SATP, zero
+ jr a2

SYM_CODE_END(riscv_kexec_norelocate)

diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index 4e522a64a614..d78e7928c6cf 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -18,6 +18,8 @@
#include <linux/interrupt.h>
#include <linux/irq.h>

+unsigned long kexec_tramp_satp;
+unsigned long riscv_kexec_norelocate_pa;
static pgd_t kexec_tramp_pgd[PTRS_PER_PGD] __aligned(PAGE_SIZE);
static p4d_t kexec_tramp_p4d[PTRS_PER_P4D] __aligned(PAGE_SIZE);
static pud_t kexec_tramp_pud[PTRS_PER_PUD] __aligned(PAGE_SIZE);
@@ -266,6 +268,8 @@ machine_kexec(struct kimage *image)
*/
riscv_kexec_build_tramp((unsigned long)__kexec_tramp_text_start,
__pa_symbol(__kexec_tramp_text_start));
+ riscv_kexec_norelocate_pa = __pa_symbol(&riscv_kexec_norelocate);
+ kexec_tramp_satp = PFN_DOWN(__pa_symbol(kexec_tramp_pgd)) | satp_mode;
}

pr_notice("Will call new kernel at %08lx from hart id %lx\n",
@@ -277,6 +281,15 @@ machine_kexec(struct kimage *image)

/* Jump to the relocation code */
pr_notice("Bye...\n");
+ /*
+ * Initialize t3 to 0 for riscv_kexec_norelocate().
+ *
+ * The norelocate trampoline uses t3 as a scratch register to record/
+ * compare against the current PC when switching to the trampoline
+ * page table. Keep t3 untouched from here until we branch into
+ * riscv_kexec_norelocate.
+ */
+ asm volatile ("li t3, 0x0" ::: "t3");
kexec_method(first_ind_entry, jump_addr, fdt_addr,
this_hart_id, kernel_map.va_pa_offset);
unreachable();
--
2.50.1