Re: [RFC PATCH 50/56] x86/alternative: Add re-patch support
From: Nikolay Borisov
Date: Fri Oct 31 2025 - 06:23:01 EST
On 10/13/25 17:34, David Kaplan wrote:
Updating alternatives is done under the biggest hammers possible. The
freezer is used to freeze all processes and kernel threads at safe
points to ensure they are not in the middle of a sequence we're about to
patch. Then stop_machine_nmi() synchronizes all CPUs and puts them into
a tight spin loop while re-patching occurs. The actual patching is done
using simple memcpy, just like during boot.
Signed-off-by: David Kaplan <david.kaplan@xxxxxxx>
---
arch/x86/include/asm/alternative.h | 6 ++
arch/x86/kernel/alternative.c | 131 +++++++++++++++++++++++++++++
2 files changed, 137 insertions(+)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 61ce8a4b1aa6..f0b863292c3c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -19,6 +19,7 @@
#ifndef __ASSEMBLER__
#include <linux/stddef.h>
+#include <linux/static_call_types.h>
/*
* Alternative inline assembly for SMP.
@@ -89,6 +90,9 @@ extern s32 __cfi_sites[], __cfi_sites_end[];
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
extern s32 __smp_locks[], __smp_locks_end[];
+extern struct static_call_site __start_static_call_sites[],
+ __stop_static_call_sites[];
+
/*
* Debug flag that can be tested to see whether alternative
* instructions were patched in already:
@@ -98,6 +102,8 @@ extern int alternatives_patched;
struct module;
#ifdef CONFIG_DYNAMIC_MITIGATIONS
+extern void cpu_update_alternatives(void);
+extern void cpu_prepare_repatch_alternatives(void);
extern void reset_retpolines(s32 *start, s32 *end, struct module *mod);
extern void reset_returns(s32 *start, s32 *end, struct module *mod);
extern void reset_alternatives(struct alt_instr *start, struct alt_instr *end,
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 23bb3386ec5e..613cb645bd9f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -6,12 +6,15 @@
#include <linux/vmalloc.h>
#include <linux/memory.h>
#include <linux/execmem.h>
+#include <linux/stop_machine.h>
+#include <linux/freezer.h>
#include <asm/text-patching.h>
#include <asm/insn.h>
#include <asm/ibt.h>
#include <asm/set_memory.h>
#include <asm/nmi.h>
+#include <asm/bugs.h>
int __read_mostly alternatives_patched;
@@ -3468,4 +3471,132 @@ void its_free_all(struct module *mod)
its_page = NULL;
}
#endif
+static atomic_t thread_ack;
+
+/*
+ * This function is called by ALL online CPUs but only CPU0 will do the
+ * re-patching. It is important that all other cores spin in the tight loop
+ * below (and not in multi_cpu_stop) because they cannot safely do return
+ * instructions while returns are being patched. Therefore, spin them here
+ * (with interrupts disabled) until CPU0 has finished its work.
+ */
+static int __cpu_update_alternatives(void *__unused)
+{
+ if (smp_processor_id()) {
+ atomic_dec(&thread_ack);
+ while (!READ_ONCE(alternatives_patched))
+ cpu_relax();
+
+ cpu_bugs_update_speculation_msrs();
+ } else {
+ repatch_in_progress = true;
+
+ /* Wait for all cores to enter this function. */
+ while (atomic_read(&thread_ack))
+ cpu_relax();
+
+ /* These must be un-done in the opposite order in which they were applied. */
+ reset_alternatives(__alt_instructions, __alt_instructions_end, NULL);
+ reset_builtin_callthunks();
+ reset_returns(__return_sites, __return_sites_end, NULL);
+ reset_retpolines(__retpoline_sites, __retpoline_sites_end, NULL);
+
+ apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL);
+ apply_returns(__return_sites, __return_sites_end, NULL);
This triggers the following splat:
[ 363.467469] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575
[ 363.467472] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 18, name: migration/0
[ 363.467472] preempt_count: 110001, expected: 0
[ 363.467473] RCU nest depth: 0, expected: 0
[ 363.467474] no locks held by migration/0/18.
[ 363.467474] irq event stamp: 1280
[ 363.467475] hardirqs last enabled at (1279): [<ffffffff91fd1444>] _raw_spin_unlock_irq+0x24/0x50
[ 363.467479] hardirqs last disabled at (1280): [<ffffffff913c98f9>] multi_cpu_stop+0x119/0x170
[ 363.467482] softirqs last enabled at (0): [<ffffffff9129eaab>] copy_process+0x7fb/0x1990
[ 363.467484] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 363.467485] Preemption disabled at:
[ 363.467486] [<ffffffff913c8e63>] cpu_stopper_thread+0x93/0x150
[ 363.467488] CPU: 0 UID: 0 PID: 18 Comm: migration/0 Not tainted 6.18.0-rc1-default+ #9 PREEMPT(none)
[ 363.467490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[ 363.467491] Stopper: multi_cpu_stop+0x0/0x170 <- __stop_cpus.constprop.0+0x77/0xb0
[ 363.467493] Call Trace:
[ 363.467494] <NMI>
[ 363.467496] dump_stack_lvl+0x62/0x90
[ 363.467498] __might_resched+0x19f/0x2b0
[ 363.467501] ? its_return_thunk+0x10/0x10
[ 363.467503] __mutex_lock+0x67/0x1060
[ 363.467504] ? look_up_lock_class+0x59/0x130
[ 363.467506] ? look_up_lock_class+0x59/0x130
[ 363.467508] ? __static_call_fixup+0x4f/0xa0
[ 363.467510] ? insn_get_prefixes+0x1a4/0x3f0
[ 363.467512] ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[ 363.467513] ? its_return_thunk+0x10/0x10
[ 363.467514] ? its_return_thunk+0x10/0x10
[ 363.467516] ? __static_call_fixup+0x4f/0xa0
[ 363.467517] __static_call_fixup+0x4f/0xa0
[ 363.467518] ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[ 363.467519] apply_returns+0x13e/0x370
[ 363.467523] ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[ 363.467524] ? __SCT__x86_pmu_disable_all+0x7/0x8
[ 363.467525] ? __SCT__x86_pmu_handle_irq+0x5/0x8
[ 363.467527] ? __copy_user_flushcache+0xf3/0x100
[ 363.467528] ? its_return_thunk+0x10/0x10
[ 363.467529] __cpu_update_alternatives+0x1e3/0x240
[ 363.467531] ? x2apic_send_IPI+0x40/0x60
[ 363.467533] stop_machine_nmi_handler+0x29/0x40
[ 363.467534] default_do_nmi+0x137/0x1a0
[ 363.467536] exc_nmi+0xef/0x120
[ 363.467538] end_repeat_nmi+0xf/0x53
[ 363.467578] ================================
[ 363.467578] WARNING: inconsistent lock state
[ 363.467578] 6.18.0-rc1-default+ #9 Tainted: G W
[ 363.467579] --------------------------------
[ 363.467579] inconsistent {INITIAL USE} -> {IN-NMI} usage.
[ 363.467580] migration/0/18 [HC1[1]:SC0[0]:HE0:SE1] takes:
[ 363.467581] ffffffff92668c28 (text_mutex){+.+.}-{4:4}, at: __static_call_fixup+0x4f/0xa0
[ 363.467583] {INITIAL USE} state was registered at:
[ 363.467584] irq event stamp: 1280
[ 363.467584] hardirqs last enabled at (1279): [<ffffffff91fd1444>] _raw_spin_unlock_irq+0x24/0x50
[ 363.467586] hardirqs last disabled at (1280): [<ffffffff913c98f9>] multi_cpu_stop+0x119/0x170
[ 363.467587] softirqs last enabled at (0): [<ffffffff9129eaab>] copy_process+0x7fb/0x1990
[ 363.467588] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 363.467589]
other info that might help us debug this:
[ 363.467590] Possible unsafe locking scenario:
[ 363.467590] CPU0
[ 363.467590] ----
[ 363.467590] lock(text_mutex);
[ 363.467591] <Interrupt>
[ 363.467591] lock(text_mutex);
[ 363.467592]
*** DEADLOCK ***
[ 363.467592] no locks held by migration/0/18.
[ 363.467592]
stack backtrace:
[ 363.467593] CPU: 0 UID: 0 PID: 18 Comm: migration/0 Tainted: G W 6.18.0-rc1-default+ #9 PREEMPT(none)
[ 363.467594] Tainted: [W]=WARN
[ 363.467595] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[ 363.467595] Stopper: multi_cpu_stop+0x0/0x170 <- __stop_cpus.constprop.0+0x77/0xb0
[ 363.467597] Call Trace:
[ 363.467598] <NMI>
[ 363.467598] dump_stack_lvl+0x62/0x90
[ 363.467600] print_usage_bug.part.0+0x22c/0x2c0
[ 363.467602] lock_acquire+0x208/0x2d0
[ 363.467604] ? __static_call_fixup+0x4f/0xa0
[ 363.467605] ? its_return_thunk+0x10/0x10
[ 363.467607] __mutex_lock+0xb3/0x1060
[ 363.467607] ? __static_call_fixup+0x4f/0xa0
[ 363.467608] ? look_up_lock_class+0x59/0x130
[ 363.467610] ? look_up_lock_class+0x59/0x130
[ 363.467611] ? __static_call_fixup+0x4f/0xa0
[ 363.467613] ? insn_get_prefixes+0x1a4/0x3f0
[ 363.467614] ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[ 363.467615] ? its_return_thunk+0x10/0x10
[ 363.467617] ? its_return_thunk+0x10/0x10
[ 363.467618] ? __static_call_fixup+0x4f/0xa0
[ 363.467619] __static_call_fixup+0x4f/0xa0
[ 363.467619] ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[ 363.467621] apply_returns+0x13e/0x370
[ 363.467624] ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[ 363.467625] ? __SCT__x86_pmu_disable_all+0x7/0x8
[ 363.467626] ? __SCT__x86_pmu_handle_irq+0x5/0x8
[ 363.467627] ? __copy_user_flushcache+0xf3/0x100
[ 363.467628] ? its_return_thunk+0x10/0x10
[ 363.467630] __cpu_update_alternatives+0x1e3/0x240
[ 363.467631] ? x2apic_send_IPI+0x40/0x60
[ 363.467633] stop_machine_nmi_handler+0x29/0x40
[ 363.467634] default_do_nmi+0x137/0x1a0
[ 363.467635] exc_nmi+0xef/0x120
[ 363.467637] end_repeat_nmi+0xf/0x53
The reason being apply_returns->__static_call_fixup acquires text_mutex from NMI context.
<snip>