[PATCH 1/3] uprobes: allow put_uprobe() from non-sleepable softirq context

From: Andrii Nakryiko
Date: Mon Sep 09 2024 - 18:49:30 EST


Currently put_uprobe() might trigger mutex_lock()/mutex_unlock(), which
makes it unsuitable to be called from more restricted context like softirq.

Let's make put_uprobe() agnostic to the context in which it is called,
and use work queue to defer the mutex-protected clean up steps.

To avoid unnecessarily increasing the size of struct uprobe, we colocate
work_struct in parallel with rb_node and rcu, both of which are unused
by the time we get to schedule clean up work.

Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
---
kernel/events/uprobes.c | 30 +++++++++++++++++++++++++++---
1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a2e6a57f79f2..377bd524bc8b 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -27,6 +27,7 @@
#include <linux/shmem_fs.h>
#include <linux/khugepaged.h>
#include <linux/rcupdate_trace.h>
+#include <linux/workqueue.h>

#include <linux/uprobes.h>

@@ -54,14 +55,20 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
#define UPROBE_COPY_INSN 0

struct uprobe {
- struct rb_node rb_node; /* node in the rb tree */
+ union {
+ struct {
+ struct rb_node rb_node; /* node in the rb tree */
+ struct rcu_head rcu;
+ };
+ /* work is used only during freeing, rcu and rb_node are unused at that point */
+ struct work_struct work;
+ };
refcount_t ref;
struct rw_semaphore register_rwsem;
struct rw_semaphore consumer_rwsem;
struct list_head pending_list;
struct list_head consumers;
struct inode *inode; /* Also hold a ref to inode */
- struct rcu_head rcu;
loff_t offset;
loff_t ref_ctr_offset;
unsigned long flags;
@@ -620,11 +627,28 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
return !RB_EMPTY_NODE(&uprobe->rb_node);
}

+static void uprobe_free_deferred(struct work_struct *work)
+{
+ struct uprobe *uprobe = container_of(work, struct uprobe, work);
+
+ /*
+ * If application munmap(exec_vma) before uprobe_unregister()
+ * gets called, we don't get a chance to remove uprobe from
+ * delayed_uprobe_list from remove_breakpoint(). Do it here.
+ */
+ mutex_lock(&delayed_uprobe_lock);
+ delayed_uprobe_remove(uprobe, NULL);
+ mutex_unlock(&delayed_uprobe_lock);
+
+ kfree(uprobe);
+}
+
static void uprobe_free_rcu(struct rcu_head *rcu)
{
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);

- kfree(uprobe);
+ INIT_WORK(&uprobe->work, uprobe_free_deferred);
+ schedule_work(&uprobe->work);
}

static void put_uprobe(struct uprobe *uprobe)
--
2.43.5