[PATCH 2/5] x86/alternative.c: sort text-pokes before flushing the queue
From: Jim Cromie
Date: Fri Apr 03 2026 - 10:24:20 EST
Until now, x86 can queue jump_label text-pokes as long as the
poke-addr is monotonically increasing, but flushes the queue when a
new poke-addr is less than the previous.
Dynamic-debug now uses the queued static-key API, but the advantage is
limited; we see a ~2x reduction in IPIs. Although the pr_debug
descriptors are ordered, the patch-address in them are not; about 1/2
violate the ordering constraint.
So this patch drops that requirement, and sorts the text-pokes by
their address before applying them. Doing so lets us fill the queue
before sorting then flushing, giving a dramatic ~125x reduction in
IPIs over the traditional single IPI per pr_debug.
Other arches don't need a queue, and so have nothing to sort.
#> dd_ipis
[ 23.100381] dyndbg: query 0: "module !virtio* +p " mod:*
[ 23.103432] dyndbg: query 1: "-p" mod:*
Delta-CAL (IPI): 242
Signed-off-by: Jim Cromie <jim.cromie@xxxxxxxxx>
---
arch/x86/kernel/alternative.c | 42 ++++++++++++++++-------------------
lib/dynamic_debug.c | 1 +
2 files changed, 20 insertions(+), 23 deletions(-)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e87da25d1236..92987954b8aa 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -2,6 +2,7 @@
#define pr_fmt(fmt) "SMP alternatives: " fmt
#include <linux/mmu_context.h>
+#include <linux/sort.h>
#include <linux/perf_event.h>
#include <linux/vmalloc.h>
#include <linux/memory.h>
@@ -2823,6 +2824,18 @@ static __always_inline int patch_cmp(const void *tpl_a, const void *tpl_b)
return 0;
}
+static int text_poke_loc_cmp(const void *a, const void *b)
+{
+ const struct smp_text_poke_loc *tpl_a = a;
+ const struct smp_text_poke_loc *tpl_b = b;
+
+ if (tpl_a->rel_addr < tpl_b->rel_addr)
+ return -1;
+ if (tpl_a->rel_addr > tpl_b->rel_addr)
+ return 1;
+ return 0;
+}
+
noinstr int smp_text_poke_int3_handler(struct pt_regs *regs)
{
struct smp_text_poke_loc *tpl;
@@ -2935,6 +2948,10 @@ void smp_text_poke_batch_finish(void)
if (!text_poke_array.nr_entries)
return;
+ if (text_poke_array.nr_entries > 1)
+ sort(text_poke_array.vec, text_poke_array.nr_entries,
+ sizeof(struct smp_text_poke_loc), text_poke_loc_cmp, NULL);
+
lockdep_assert_held(&text_mutex);
/*
@@ -3151,28 +3168,6 @@ static void __smp_text_poke_batch_add(void *addr, const void *opcode, size_t len
}
}
-/*
- * We hard rely on the text_poke_array.vec being ordered; ensure this is so by flushing
- * early if needed.
- */
-static bool text_poke_addr_ordered(void *addr)
-{
- WARN_ON_ONCE(!addr);
-
- if (!text_poke_array.nr_entries)
- return true;
-
- /*
- * If the last current entry's address is higher than the
- * new entry's address we'd like to add, then ordering
- * is violated and we must first flush all pending patching
- * requests:
- */
- if (text_poke_addr(text_poke_array.vec + text_poke_array.nr_entries-1) > addr)
- return false;
-
- return true;
-}
/**
* smp_text_poke_batch_add() -- update instruction on live kernel on SMP, batched
@@ -3189,8 +3184,9 @@ static bool text_poke_addr_ordered(void *addr)
*/
void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate)
{
- if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX || !text_poke_addr_ordered(addr))
+ if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX)
smp_text_poke_batch_finish();
+
__smp_text_poke_batch_add(addr, opcode, len, emulate);
}
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 18a71a9108d3..b5060749464e 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -264,6 +264,7 @@ static int ddebug_change(const struct ddebug_query *query,
}
}
mutex_unlock(&ddebug_lock);
+ v2pr_info("applied %d queued updates to sites in total\n", nfound);
if (!nfound && verbose)
pr_info("no matches for query\n");
--
2.53.0