[PATCH v2 06/11] perf/uprobe: SRCU-ify uprobe->consumer list

From: Peter Zijlstra
Date: Thu Jul 11 2024 - 07:08:30 EST


With handle_swbp() hitting concurrently on (all) CPUs the
uprobe->register_rwsem can get very contended. Add an SRCU instance to
cover the consumer list and consumer lifetime.

Since the consumer are externally embedded structures, unregister will
have to suffer a synchronize_srcu().

A notably complication is the UPROBE_HANDLER_REMOVE logic which can
race against uprobe_register() such that it might want to remove a
freshly installer handler that didn't get called. In order to close
this hole, a seqcount is added. With that, the removal path can tell
if anything changed and bail out of the removal.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/events/uprobes.c | 60 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 50 insertions(+), 10 deletions(-)

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
#include <linux/khugepaged.h>
+#include <linux/srcu.h>

#include <linux/uprobes.h>

@@ -49,6 +50,11 @@ static struct mutex uprobes_mmap_mutex[U

DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);

+/*
+ * Covers uprobe->consumers lifetime.
+ */
+DEFINE_STATIC_SRCU(uprobes_srcu);
+
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0

@@ -57,6 +63,7 @@ struct uprobe {
refcount_t ref;
struct rcu_head rcu;
struct rw_semaphore register_rwsem;
+ seqcount_t register_seq;
struct rw_semaphore consumer_rwsem;
struct list_head pending_list;
struct uprobe_consumer *consumers;
@@ -760,6 +767,7 @@ static struct uprobe *alloc_uprobe(struc
uprobe->offset = offset;
uprobe->ref_ctr_offset = ref_ctr_offset;
init_rwsem(&uprobe->register_rwsem);
+ seqcount_init(&uprobe->register_seq);
init_rwsem(&uprobe->consumer_rwsem);

/* add to uprobes_tree, sorted on inode:offset */
@@ -782,8 +790,8 @@ static struct uprobe *alloc_uprobe(struc
static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
down_write(&uprobe->consumer_rwsem);
- uc->next = uprobe->consumers;
- uprobe->consumers = uc;
+ WRITE_ONCE(uc->next, uprobe->consumers);
+ rcu_assign_pointer(uprobe->consumers, uc);
up_write(&uprobe->consumer_rwsem);
}

@@ -800,7 +808,7 @@ static bool consumer_del(struct uprobe *
down_write(&uprobe->consumer_rwsem);
for (con = &uprobe->consumers; *con; con = &(*con)->next) {
if (*con == uc) {
- *con = uc->next;
+ WRITE_ONCE(*con, uc->next);
ret = true;
break;
}
@@ -1139,9 +1147,13 @@ void uprobe_unregister(struct inode *ino
return;

down_write(&uprobe->register_rwsem);
+ raw_write_seqcount_begin(&uprobe->register_seq);
__uprobe_unregister(uprobe, uc);
+ raw_write_seqcount_end(&uprobe->register_seq);
up_write(&uprobe->register_rwsem);
put_uprobe(uprobe);
+
+ synchronize_srcu(&uprobes_srcu);
}
EXPORT_SYMBOL_GPL(uprobe_unregister);

@@ -1204,10 +1216,12 @@ static int __uprobe_register(struct inod
down_write(&uprobe->register_rwsem);
ret = -EAGAIN;
if (likely(uprobe_is_active(uprobe))) {
+ raw_write_seqcount_begin(&uprobe->register_seq);
consumer_add(uprobe, uc);
ret = register_for_each_vma(uprobe, uc);
if (ret)
__uprobe_unregister(uprobe, uc);
+ raw_write_seqcount_end(&uprobe->register_seq);
}
up_write(&uprobe->register_rwsem);
put_uprobe(uprobe);
@@ -1250,10 +1264,12 @@ int uprobe_apply(struct inode *inode, lo
return ret;

down_write(&uprobe->register_rwsem);
+ raw_write_seqcount_begin(&uprobe->register_seq);
for (con = uprobe->consumers; con && con != uc ; con = con->next)
;
if (con)
ret = register_for_each_vma(uprobe, add ? uc : NULL);
+ raw_write_seqcount_end(&uprobe->register_seq);
up_write(&uprobe->register_rwsem);
put_uprobe(uprobe);

@@ -2096,15 +2112,23 @@ static struct uprobe *find_active_uprobe
return uprobe;
}

+#define for_each_consumer_rcu(pos, head) \
+ for (pos = rcu_dereference_raw(head); pos; \
+ pos = rcu_dereference_raw(pos->next))
+
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{
struct uprobe_consumer *uc;
int remove = UPROBE_HANDLER_REMOVE;
bool need_prep = false; /* prepare return uprobe, when needed */
bool had_handler = false;
+ unsigned int seq;

- down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ guard(srcu)(&uprobes_srcu);
+
+ seq = raw_read_seqcount_begin(&uprobe->register_seq);
+
+ for_each_consumer_rcu(uc, uprobe->consumers) {
int rc = 0;

if (uc->handler) {
@@ -2134,9 +2158,25 @@ static void handler_chain(struct uprobe
if (need_prep && !remove)
prepare_uretprobe(uprobe, regs); /* put bp at return */

- if (remove)
+ if (remove) {
+ /*
+ * Removing uprobes is a slow path, after all, the more probes
+ * you remove, the less probe hits you get.
+ *
+ * This needs to serialize against uprobe_register(), such that
+ * if the above RCU iteration missed a new handler that
+ * would've liked to keep the probe, we don't go uninstall the
+ * probe after it already ran register_for_each_vma().
+ *
+ * The rwsem ensures exclusivity against uprobe_register()
+ * while the seqcount will avoid the removal if anything has
+ * changed since we started.
+ */
+ guard(rwsem_read)(&uprobe->register_rwsem);
+ if (read_seqcount_retry(&uprobe->register_seq, seq))
+ return;
unapply_uprobe(uprobe, current->mm);
- up_read(&uprobe->register_rwsem);
+ }
}

static void
@@ -2145,12 +2185,12 @@ handle_uretprobe_chain(struct return_ins
struct uprobe *uprobe = ri->uprobe;
struct uprobe_consumer *uc;

- down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ guard(srcu)(&uprobes_srcu);
+
+ for_each_consumer_rcu(uc, uprobe->consumers) {
if (uc->ret_handler)
uc->ret_handler(uc, ri->func, regs);
}
- up_read(&uprobe->register_rwsem);
}

static struct return_instance *find_next_ret_chain(struct return_instance *ri)