[PATCH v3 9/9] riscv: kexec: Route normal kexec through the trampoline page table
From: fangyu . yu
Date: Thu Jun 04 2026 - 09:42:50 EST
From: Fangyu Yu <fangyu.yu@xxxxxxxxxxxxxxxxx>
riscv_kexec_relocate (copied into control_code_buffer) uses an stvec
trick to drop the MMU and land on the PA of the next loop label.
Under VS-mode KVM cannot emulate this single-step transition and the
VCPU dies with "kvm run failed Operation not supported".
Route normal kexec through riscv_kexec_relocate_entry, the trampoline
wrapper added in the previous patch. It drops SATP with PC already on
a PA, then hands off to control_code_buffer where the relocate body
runs with SATP=0.
Drop the stvec trick from the relocate body and pass first_ind_entry
as a physical address since the body now starts with SATP=0. The
".align 2" plus filler "nop" that ensured the PA of the loop top was
4-byte aligned -- required because the legacy stvec trick wrote that
PA into stvec.BASE, whose low two bits are MODE and are discarded by
the hardware -- is no longer load-bearing and is removed as well.
Signed-off-by: Fangyu Yu <fangyu.yu@xxxxxxxxxxxxxxxxx>
---
arch/riscv/kernel/kexec_relocate.S | 26 ++++++--------------------
arch/riscv/kernel/machine_kexec.c | 27 +++++++++++++++++++--------
2 files changed, 25 insertions(+), 28 deletions(-)
diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
index 6c624560c9ac..7ffb83ea45fc 100644
--- a/arch/riscv/kernel/kexec_relocate.S
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -34,27 +34,13 @@ SYM_CODE_START(riscv_kexec_relocate)
csrw CSR_SIP, zero
/*
- * When we switch SATP.MODE to "Bare" we'll only
- * play with physical addresses. However the first time
- * we try to jump somewhere, the offset on the jump
- * will be relative to pc which will still be on VA. To
- * deal with this we set stvec to the physical address at
- * the start of the loop below so that we jump there in
- * any case.
+ * The trampoline wrapper (riscv_kexec_relocate_entry) has already
+ * dropped the MMU and handed control to us at this PA copy of the
+ * relocate code. From here on the entire loop runs with SATP=0 and
+ * every address (s0, s5, source/dest pointers) is a physical one.
*/
- la s6, 1f
- sub s6, s6, s4
- csrw CSR_STVEC, s6
-
- /*
- * With C-extension, here we get 42 Bytes and the next
- * .align directive would pad zeros here up to 44 Bytes.
- * So manually put a nop here to avoid zeros padding.
- */
- nop
/* Process entries in a loop */
-.align 2
1:
REG_L t0, 0(s0) /* t0 = *image->entry */
addi s0, s0, RISCV_SZPTR /* image->entry++ */
@@ -70,8 +56,8 @@ SYM_CODE_START(riscv_kexec_relocate)
andi t1, t0, 0x2
beqz t1, 2f
andi s0, t0, ~0x2
- csrw CSR_SATP, zero
- jr s6
+ /* MMU is already off; the entry wrapper handled the transition. */
+ j 1b
2:
/* IND_DONE entry ? -> jump to done label */
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index 71688c63af65..82fcb84a03ec 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -164,9 +164,6 @@ machine_kexec_prepare(struct kimage *image)
memcpy(control_code_buffer, riscv_kexec_relocate,
riscv_kexec_relocate_size);
- /* Mark the control page executable */
- set_memory_x((unsigned long) control_code_buffer, 1);
-
WRITE_ONCE(riscv_kexec_relocate_entry_pa,
__pa_symbol(&riscv_kexec_relocate_entry));
} else {
@@ -262,11 +259,15 @@ machine_kexec(struct kimage *image)
{
struct kimage_arch *internal = &image->arch;
unsigned long jump_addr = (unsigned long) image->start;
- unsigned long first_ind_entry = (unsigned long) &image->head;
+ /*
+ * The relocate body runs entirely with the MMU off (the wrapper
+ * drops SATP before jumping into control_code_buffer), so the very
+ * first entry must be a physical address.
+ */
+ unsigned long first_ind_entry = __pa(&image->head);
unsigned long this_cpu_id = __smp_processor_id();
unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
unsigned long fdt_addr = internal->fdt_addr;
- void *control_code_buffer = page_address(image->control_code_page);
riscv_kexec_method kexec_method = NULL;
#ifdef CONFIG_SMP
@@ -274,10 +275,20 @@ machine_kexec(struct kimage *image)
"Some CPUs may be stale, kdump will be unreliable.\n");
#endif
- if (image->type != KEXEC_TYPE_CRASH)
- kexec_method = control_code_buffer;
- else
+ if (image->type != KEXEC_TYPE_CRASH) {
+ kexec_method = (riscv_kexec_method) &riscv_kexec_relocate_entry;
+ /*
+ * Publish the per-image control_code_buffer PA at dispatch
+ * time rather than in machine_kexec_prepare(). machine_kexec()
+ * only runs once the image has been fully loaded and committed
+ * as kexec_image, so the global cannot be left pointing at a
+ * page freed by a failed load.
+ */
+ WRITE_ONCE(riscv_kexec_cc_buffer_pa,
+ __pa(page_address(image->control_code_page)));
+ } else {
kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate;
+ }
pr_notice("Will call new kernel at %08lx from hart id %lx\n",
jump_addr, this_hart_id);
--
2.50.1