Re: [PATCH v2 tip/perf/core 1/2] uprobes: allow put_uprobe() from non-sleepable softirq context
From: Peter Zijlstra
Date: Fri Oct 18 2024 - 04:26:22 EST
On Mon, Oct 07, 2024 at 05:25:55PM -0700, Andrii Nakryiko wrote:
> Currently put_uprobe() might trigger mutex_lock()/mutex_unlock(), which
> makes it unsuitable to be called from more restricted context like softirq.
This is delayed_uprobe_lock, right?
So can't we do something like so instead?
---
kernel/events/uprobes.c | 40 +++++++++++++++++++++++-----------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2a0059464383..d17a9046de35 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -83,9 +83,11 @@ struct delayed_uprobe {
struct list_head list;
struct uprobe *uprobe;
struct mm_struct *mm;
+ struct rcu_head rcu;
};
-static DEFINE_MUTEX(delayed_uprobe_lock);
+/* XXX global state; use per mm list instead ? */
+static DEFINE_SPINLOCK(delayed_uprobe_lock);
static LIST_HEAD(delayed_uprobe_list);
/*
@@ -289,9 +291,11 @@ delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
{
struct delayed_uprobe *du;
- list_for_each_entry(du, &delayed_uprobe_list, list)
+ guard(rcu)();
+ list_for_each_entry_rcu(du, &delayed_uprobe_list, list) {
if (du->uprobe == uprobe && du->mm == mm)
return du;
+ }
return NULL;
}
@@ -308,7 +312,8 @@ static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
du->uprobe = uprobe;
du->mm = mm;
- list_add(&du->list, &delayed_uprobe_list);
+ scoped_guard(spinlock, &delayed_uprobe_lock)
+ list_add_rcu(&du->list, &delayed_uprobe_list);
return 0;
}
@@ -316,19 +321,21 @@ static void delayed_uprobe_delete(struct delayed_uprobe *du)
{
if (WARN_ON(!du))
return;
- list_del(&du->list);
- kfree(du);
+ scoped_guard(spinlock, &delayed_uprobe_lock)
+ list_del(&du->list);
+ kfree_rcu(du, rcu);
}
static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
{
- struct list_head *pos, *q;
struct delayed_uprobe *du;
+ struct list_head *pos;
if (!uprobe && !mm)
return;
- list_for_each_safe(pos, q, &delayed_uprobe_list) {
+ guard(rcu)();
+ list_for_each_rcu(pos, &delayed_uprobe_list) {
du = list_entry(pos, struct delayed_uprobe, list);
if (uprobe && du->uprobe != uprobe)
@@ -434,12 +441,10 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
return ret;
}
- mutex_lock(&delayed_uprobe_lock);
if (d > 0)
ret = delayed_uprobe_add(uprobe, mm);
else
delayed_uprobe_remove(uprobe, mm);
- mutex_unlock(&delayed_uprobe_lock);
return ret;
}
@@ -645,9 +650,7 @@ static void put_uprobe(struct uprobe *uprobe)
* gets called, we don't get a chance to remove uprobe from
* delayed_uprobe_list from remove_breakpoint(). Do it here.
*/
- mutex_lock(&delayed_uprobe_lock);
delayed_uprobe_remove(uprobe, NULL);
- mutex_unlock(&delayed_uprobe_lock);
call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu);
}
@@ -1350,13 +1353,18 @@ static void build_probe_list(struct inode *inode,
/* @vma contains reference counter, not the probed instruction. */
static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
{
- struct list_head *pos, *q;
struct delayed_uprobe *du;
+ struct list_head *pos;
unsigned long vaddr;
int ret = 0, err = 0;
- mutex_lock(&delayed_uprobe_lock);
- list_for_each_safe(pos, q, &delayed_uprobe_list) {
+ /*
+ * delayed_uprobe_list is added to when the ref_ctr is not mapped
+ * and is consulted (this function) when adding maps. And since
+ * mmap_lock serializes these, it is not possible miss an entry.
+ */
+ guard(rcu)();
+ list_for_each_rcu(pos, &delayed_uprobe_list) {
du = list_entry(pos, struct delayed_uprobe, list);
if (du->mm != vma->vm_mm ||
@@ -1370,9 +1378,9 @@ static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
if (!err)
err = ret;
}
+
delayed_uprobe_delete(du);
}
- mutex_unlock(&delayed_uprobe_lock);
return err;
}
@@ -1596,9 +1604,7 @@ void uprobe_clear_state(struct mm_struct *mm)
{
struct xol_area *area = mm->uprobes_state.xol_area;
- mutex_lock(&delayed_uprobe_lock);
delayed_uprobe_remove(NULL, mm);
- mutex_unlock(&delayed_uprobe_lock);
if (!area)
return;