[RFC PATCH 1/7] jump_label: expose queueing API for batched static key updates
From: Jim Cromie
Date: Thu Mar 05 2026 - 20:51:11 EST
Currently, `HAVE_JUMP_LABEL_BATCH` provides an architecture-level
mechanism to defer instruction synchronization (`text_poke_sync()`)
when patching a sequence of static keys. However, this deferred
batching capability is not exposed as a public kernel API. Subsystems
that need to toggle a large number of static keys (e.g.,
dynamic_debug) currently suffer from O(N) overhead due to repeated
machine-wide synchronizations (stop_machine).
This patch introduces a public queueing API to expose this deferred
synchronization mechanism to the rest of the kernel. This allows
multiple static keys to be enabled/disabled by queueing their
architecture-level updates, before applying a single machine-wide
synchronization barrier after all instructions are modified.
The new API consists of:
- static_key_enable_queued(key)
- static_key_disable_queued(key)
- static_key_apply_queued() (the global barrier/flush)
- static_branch_enable_queued(x) / static_branch_disable_queued(x) macros
NOTES:
The '_queued' API suffix was chosen to match the underlying
'arch_jump_label_transform_queue' and to avoid confusion with the
existing rate-limited 'static_key_deferred' API.
Also unify the names under the 'static_key_*' prefix, renaming
jump_label_apply_queued to static_key_apply_queued (with a
compatibility macro) for consistency.
A pr_debug() is added to show the poked addresses, this exposed the
semi-random ordering coming from dynamic-debug, despite its ordered
descriptors.
So x86/kernel/alternatives gets new code to do an insert-sort, by
memcpy & memmove after appending. This sorting yields a dramatic IPI
reduction; a following patch to dynamic-debug uses the API, and
includes the numbers.
Cc: Jason Baron <jbaron@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxx>
Cc: Alice Ryhl <aliceryhl@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ard Biesheuvel <ardb@xxxxxxxxxx>
Cc: Alexandre Chartre <alexandre.chartre@xxxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Signed-off-by: Jim Cromie <jim.cromie@xxxxxxxxx>
---
arch/Kconfig | 3 +
arch/x86/Kconfig | 1 +
arch/x86/kernel/alternative.c | 50 +++++++++-----
arch/x86/kernel/jump_label.c | 13 +++-
include/linux/jump_label.h | 24 +++++++
kernel/jump_label.c | 125 +++++++++++++++++++++++++++++++---
6 files changed, 186 insertions(+), 30 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 102ddbd4298e..388a73545005 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -505,6 +505,9 @@ config HAVE_ARCH_JUMP_LABEL
config HAVE_ARCH_JUMP_LABEL_RELATIVE
bool
+config HAVE_JUMP_LABEL_BATCH
+ bool
+
config MMU_GATHER_TABLE_FREE
bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2df1b147184..4d7705890558 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -249,6 +249,7 @@ config X86
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_JUMP_LABEL_BATCH
select HAVE_JUMP_LABEL_HACK if HAVE_OBJTOOL
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a888ae0f01fb..85df82c36543 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -3137,26 +3137,19 @@ static void __smp_text_poke_batch_add(void *addr, const void *opcode, size_t len
}
/*
- * We hard rely on the text_poke_array.vec being ordered; ensure this is so by flushing
- * early if needed.
+ * We hard rely on the text_poke_array.vec being ordered; ensure this
+ * by finding where to insert to preserve the order, and mem-moving
+ * into place after appending it.
*/
-static bool text_poke_addr_ordered(void *addr)
+static int text_poke_get_insert_idx(void *addr)
{
- WARN_ON_ONCE(!addr);
+ int i;
- if (!text_poke_array.nr_entries)
- return true;
-
- /*
- * If the last current entry's address is higher than the
- * new entry's address we'd like to add, then ordering
- * is violated and we must first flush all pending patching
- * requests:
- */
- if (text_poke_addr(text_poke_array.vec + text_poke_array.nr_entries-1) > addr)
- return false;
-
- return true;
+ for (i = 0; i < text_poke_array.nr_entries; i++) {
+ if (text_poke_addr(&text_poke_array.vec[i]) > addr)
+ return i;
+ }
+ return text_poke_array.nr_entries;
}
/**
@@ -3174,9 +3167,30 @@ static bool text_poke_addr_ordered(void *addr)
*/
void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate)
{
- if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX || !text_poke_addr_ordered(addr))
+ int insert_idx;
+
+ pr_debug("incoming addr=%px, current_qlen=%d\n",
+ addr, text_poke_array.nr_entries);
+
+ if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX)
smp_text_poke_batch_finish();
+
+ insert_idx = text_poke_get_insert_idx(addr);
__smp_text_poke_batch_add(addr, opcode, len, emulate);
+
+ if (insert_idx < text_poke_array.nr_entries - 1) {
+ struct smp_text_poke_loc tmp;
+ int last = text_poke_array.nr_entries - 1;
+ /* Copy the newly appended item out */
+ memcpy(&tmp, &text_poke_array.vec[last], sizeof(tmp));
+
+ /* Shift everything from insert_idx over by 1 */
+ memmove(&text_poke_array.vec[insert_idx + 1],
+ &text_poke_array.vec[insert_idx],
+ (last - insert_idx) * sizeof(struct smp_text_poke_loc));
+ /* Drop the new item into its sorted home */
+ memcpy(&text_poke_array.vec[insert_idx], &tmp, sizeof(tmp));
+ }
}
/**
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index a7949a54a0ff..6b5bab5f34e8 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -120,6 +120,8 @@ void arch_jump_label_transform(struct jump_entry *entry,
jump_label_transform(entry, type, 0);
}
+static int jump_label_queue_len;
+
bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type)
{
@@ -135,14 +137,23 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
mutex_lock(&text_mutex);
jlp = __jump_label_patch(entry, type);
- smp_text_poke_batch_add((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
+ smp_text_poke_batch_add((void *)jump_entry_code(entry),
+ jlp.code, jlp.size, NULL);
+ jump_label_queue_len++;
mutex_unlock(&text_mutex);
return true;
}
void arch_jump_label_transform_apply(void)
{
+ if (!jump_label_queue_len) {
+ pr_debug("no queued jump_labels to apply\n");
+ return;
+ }
+
+ pr_debug("applying %d queued jump_labels\n", jump_label_queue_len);
mutex_lock(&text_mutex);
smp_text_poke_batch_finish();
+ jump_label_queue_len = 0;
mutex_unlock(&text_mutex);
}
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index fdb79dd1ebd8..17f572abe4bb 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -234,10 +234,20 @@ extern void static_key_slow_dec_cpuslocked(struct static_key *key);
extern int static_key_count(struct static_key *key);
extern void static_key_enable(struct static_key *key);
extern void static_key_disable(struct static_key *key);
+extern void static_key_enable_queued(struct static_key *key);
+extern void static_key_disable_queued(struct static_key *key);
+extern void static_key_apply_queued(void);
extern void static_key_enable_cpuslocked(struct static_key *key);
extern void static_key_disable_cpuslocked(struct static_key *key);
extern enum jump_label_type jump_label_init_type(struct jump_entry *entry);
+#define static_branch_enable(x) static_key_enable(&(x)->key)
+#define static_branch_disable(x) static_key_disable(&(x)->key)
+#define static_branch_enable_queued(x) static_key_enable_queued(&(x)->key)
+#define static_branch_disable_queued(x) static_key_disable_queued(&(x)->key)
+#define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key)
+#define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key)
+
/*
* We should be using ATOMIC_INIT() for initializing .enabled, but
* the inclusion of atomic.h is problematic for inclusion of jump_label.h
@@ -340,6 +350,18 @@ static inline void static_key_disable(struct static_key *key)
atomic_set(&key->enabled, 0);
}
+static inline void static_key_enable_queued(struct static_key *key)
+{
+ static_key_enable(key);
+}
+
+static inline void static_key_disable_queued(struct static_key *key)
+{
+ static_key_disable(key);
+}
+
+static inline void static_key_apply_queued(void) {}
+
#define static_key_enable_cpuslocked(k) static_key_enable((k))
#define static_key_disable_cpuslocked(k) static_key_disable((k))
@@ -535,6 +557,8 @@ extern bool ____wrong_branch_error(void);
#define static_branch_enable(x) static_key_enable(&(x)->key)
#define static_branch_disable(x) static_key_disable(&(x)->key)
+#define static_branch_enable_queued(x) static_key_enable_queued(&(x)->key)
+#define static_branch_disable_queued(x) static_key_disable_queued(&(x)->key)
#define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key)
#define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 7cb19e601426..76a0f4e68b73 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -91,6 +91,7 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
}
static void jump_label_update(struct static_key *key);
+static void jump_label_update_queued(struct static_key *key);
/*
* There are similar definitions for the !CONFIG_JUMP_LABEL case in jump_label.h.
@@ -250,6 +251,41 @@ void static_key_disable(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_disable);
+void static_key_enable_queued(struct static_key *key)
+{
+ STATIC_KEY_CHECK_USE(key);
+
+ if (atomic_read(&key->enabled) > 0) {
+ WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
+ return;
+ }
+
+ jump_label_lock();
+ if (atomic_read(&key->enabled) == 0) {
+ atomic_set(&key->enabled, -1);
+ jump_label_update_queued(key);
+ atomic_set_release(&key->enabled, 1);
+ }
+ jump_label_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_enable_queued);
+
+void static_key_disable_queued(struct static_key *key)
+{
+ STATIC_KEY_CHECK_USE(key);
+
+ if (atomic_read(&key->enabled) != 1) {
+ WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
+ return;
+ }
+
+ jump_label_lock();
+ if (atomic_cmpxchg(&key->enabled, 1, 0) == 1)
+ jump_label_update_queued(key);
+ jump_label_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_disable_queued);
+
static bool static_key_dec_not_one(struct static_key *key)
{
int v;
@@ -488,39 +524,59 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init)
return true;
}
-#ifndef HAVE_JUMP_LABEL_BATCH
static void __jump_label_update(struct static_key *key,
struct jump_entry *entry,
struct jump_entry *stop,
bool init)
{
+#ifndef HAVE_JUMP_LABEL_BATCH
for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
if (jump_label_can_update(entry, init))
arch_jump_label_transform(entry, jump_label_type(entry));
}
-}
#else
-static void __jump_label_update(struct static_key *key,
- struct jump_entry *entry,
- struct jump_entry *stop,
- bool init)
-{
for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
if (!jump_label_can_update(entry, init))
continue;
if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) {
- /*
- * Queue is full: Apply the current queue and try again.
- */
arch_jump_label_transform_apply();
- BUG_ON(!arch_jump_label_transform_queue(entry, jump_label_type(entry)));
+ WARN_ON_ONCE(!arch_jump_label_transform_queue(entry, jump_label_type(entry)));
}
}
arch_jump_label_transform_apply();
+#endif
}
+
+static void __jump_label_update_queued(struct static_key *key,
+ struct jump_entry *entry,
+ struct jump_entry *stop,
+ bool init)
+{
+#ifdef HAVE_JUMP_LABEL_BATCH
+ for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
+
+ if (!jump_label_can_update(entry, init))
+ continue;
+
+ if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) {
+ arch_jump_label_transform_apply();
+ WARN_ON_ONCE(!arch_jump_label_transform_queue(entry, jump_label_type(entry)));
+ }
+ }
+#else
+ __jump_label_update(key, entry, stop, init);
+#endif
+}
+
+void static_key_apply_queued(void)
+{
+#ifdef HAVE_JUMP_LABEL_BATCH
+ arch_jump_label_transform_apply();
#endif
+}
+EXPORT_SYMBOL_GPL(static_key_apply_queued);
void __init jump_label_init(void)
{
@@ -696,6 +752,27 @@ static void __jump_label_mod_update(struct static_key *key)
}
}
+static void __jump_label_mod_update_queued(struct static_key *key)
+{
+ struct static_key_mod *mod;
+
+ for (mod = static_key_mod(key); mod; mod = mod->next) {
+ struct jump_entry *stop;
+ struct module *m;
+
+ if (!mod->entries)
+ continue;
+
+ m = mod->mod;
+ if (!m)
+ stop = __stop___jump_table;
+ else
+ stop = m->jump_entries + m->num_jump_entries;
+ __jump_label_update_queued(key, mod->entries, stop,
+ m && m->state == MODULE_STATE_COMING);
+ }
+}
+
static int jump_label_add_module(struct module *mod)
{
struct jump_entry *iter_start = mod->jump_entries;
@@ -919,6 +996,32 @@ static void jump_label_update(struct static_key *key)
__jump_label_update(key, entry, stop, init);
}
+static void jump_label_update_queued(struct static_key *key)
+{
+ struct jump_entry *stop = __stop___jump_table;
+ bool init = system_state < SYSTEM_RUNNING;
+ struct jump_entry *entry;
+#ifdef CONFIG_MODULES
+ struct module *mod;
+
+ if (static_key_linked(key)) {
+ __jump_label_mod_update_queued(key);
+ return;
+ }
+
+ scoped_guard(rcu) {
+ mod = __module_address((unsigned long)key);
+ if (mod) {
+ stop = mod->jump_entries + mod->num_jump_entries;
+ init = mod->state == MODULE_STATE_COMING;
+ }
+ }
+#endif
+ entry = static_key_entries(key);
+ if (entry)
+ __jump_label_update_queued(key, entry, stop, init);
+}
+
#ifdef CONFIG_STATIC_KEYS_SELFTEST
static DEFINE_STATIC_KEY_TRUE(sk_true);
static DEFINE_STATIC_KEY_FALSE(sk_false);
--
2.53.0