[PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
From: Peter Zijlstra
Date: Mon Jul 08 2024 - 05:31:03 EST
With handle_swbp() hitting concurrently on (all) CPUs the
uprobe->register_rwsem can get very contended. Replace it with
SRCU+mutex.
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/events/uprobes.c | 37 +++++++++++++++++++++++--------------
1 file changed, 23 insertions(+), 14 deletions(-)
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
#include <linux/khugepaged.h>
+#include <linux/srcu.h>
#include <linux/uprobes.h>
@@ -49,6 +50,8 @@ static struct mutex uprobes_mmap_mutex[U
DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
+DEFINE_STATIC_SRCU(uprobes_srcu);
+
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
@@ -56,7 +59,7 @@ struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
refcount_t ref;
struct rcu_head rcu;
- struct rw_semaphore register_rwsem;
+ struct mutex register_mutex;
struct rw_semaphore consumer_rwsem;
struct list_head pending_list;
struct uprobe_consumer *consumers;
@@ -750,7 +753,7 @@ static struct uprobe *alloc_uprobe(struc
uprobe->inode = inode;
uprobe->offset = offset;
uprobe->ref_ctr_offset = ref_ctr_offset;
- init_rwsem(&uprobe->register_rwsem);
+ mutex_init(&uprobe->register_mutex);
init_rwsem(&uprobe->consumer_rwsem);
/* add to uprobes_tree, sorted on inode:offset */
@@ -1129,10 +1132,12 @@ void uprobe_unregister(struct inode *ino
if (WARN_ON(!uprobe))
return;
- down_write(&uprobe->register_rwsem);
+ mutex_lock(&uprobe->register_mutex);
__uprobe_unregister(uprobe, uc);
- up_write(&uprobe->register_rwsem);
+ mutex_unlock(&uprobe->register_mutex);
put_uprobe(uprobe);
+
+ synchronize_srcu(&uprobes_srcu);
}
EXPORT_SYMBOL_GPL(uprobe_unregister);
@@ -1192,7 +1197,7 @@ static int __uprobe_register(struct inod
* We can race with uprobe_unregister()->delete_uprobe().
* Check uprobe_is_active() and retry if it is false.
*/
- down_write(&uprobe->register_rwsem);
+ mutex_lock(&uprobe->register_mutex);
ret = -EAGAIN;
if (likely(uprobe_is_active(uprobe))) {
consumer_add(uprobe, uc);
@@ -1200,7 +1205,7 @@ static int __uprobe_register(struct inod
if (ret)
__uprobe_unregister(uprobe, uc);
}
- up_write(&uprobe->register_rwsem);
+ mutex_unlock(&uprobe->register_mutex);
put_uprobe(uprobe);
if (unlikely(ret == -EAGAIN))
@@ -1240,12 +1245,12 @@ int uprobe_apply(struct inode *inode, lo
if (WARN_ON(!uprobe))
return ret;
- down_write(&uprobe->register_rwsem);
+ mutex_lock(&uprobe->register_mutex);
for (con = uprobe->consumers; con && con != uc ; con = con->next)
;
if (con)
ret = register_for_each_vma(uprobe, add ? uc : NULL);
- up_write(&uprobe->register_rwsem);
+ mutex_unlock(&uprobe->register_mutex);
put_uprobe(uprobe);
return ret;
@@ -2087,14 +2092,19 @@ static struct uprobe *find_active_uprobe
return uprobe;
}
+#define for_each_consumer_rcu(pos, head) \
+ for (pos = rcu_dereference_raw(head); pos; \
+ pos = rcu_dereference_raw(pos->next))
+
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{
struct uprobe_consumer *uc;
int remove = UPROBE_HANDLER_REMOVE;
bool need_prep = false; /* prepare return uprobe, when needed */
- down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ guard(srcu)(&uprobes_srcu);
+
+ for_each_consumer_rcu(uc, uprobe->consumers) {
int rc = 0;
if (uc->handler) {
@@ -2116,7 +2126,6 @@ static void handler_chain(struct uprobe
WARN_ON(!uprobe_is_active(uprobe));
unapply_uprobe(uprobe, current->mm);
}
- up_read(&uprobe->register_rwsem);
}
static void
@@ -2125,12 +2134,12 @@ handle_uretprobe_chain(struct return_ins
struct uprobe *uprobe = ri->uprobe;
struct uprobe_consumer *uc;
- down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ guard(srcu)(&uprobes_srcu);
+
+ for_each_consumer_rcu(uc, uprobe->consumers) {
if (uc->ret_handler)
uc->ret_handler(uc, ri->func, regs);
}
- up_read(&uprobe->register_rwsem);
}
static struct return_instance *find_next_ret_chain(struct return_instance *ri)