Re: [PATCH] tracing/user_events: Run BPF program if attached

From: Linus Torvalds
Date: Wed May 17 2023 - 15:28:10 EST


On Wed, May 17, 2023 at 12:08 PM Beau Belgrave
<beaub@xxxxxxxxxxxxxxxxxxx> wrote:
>
> user_event_mm_dup() puts a new mm into the global list before the
> enablers list is fully populated.

Then that simply needs to be fixed.

user_event_mm_dup() should not madd the mm into the global list until
it is *done*.

Because if it makes that list visible to others in a half-way state,
then it needs to use the proper locking and use event_mutex.

You can't say "this is so critical that we can't take a lock" and then
use that as an excuse to simply do buggy code.

Either take the lock in user_event_mm_dup(), or make sure that the
data structures are all completely local so that no lock is necessary.

Here's a COMPLETELY UNTESTED patch that just separates out the notion
of "allocate" and "attach".

NOTE NOTE NOTE! I am *not* claiming this patch works. It builds for
me. It looks right. It seems like it's the right thing to do. But it
might have some issues.

With this, the newly dup'ed list is attached to the process once after
it is done, so nobody can see the list being built up.

Also note that this does NOT fix the incorrect RCU walks.

Linus
kernel/trace/trace_events_user.c | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b1ecd7677642..b2aecbfbbd24 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -538,10 +538,9 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
return found;
}

-static struct user_event_mm *user_event_mm_create(struct task_struct *t)
+static struct user_event_mm *user_event_mm_alloc(struct task_struct *t)
{
struct user_event_mm *user_mm;
- unsigned long flags;

user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT);

@@ -553,12 +552,6 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
refcount_set(&user_mm->refcnt, 1);
refcount_set(&user_mm->tasks, 1);

- spin_lock_irqsave(&user_event_mms_lock, flags);
- list_add_rcu(&user_mm->link, &user_event_mms);
- spin_unlock_irqrestore(&user_event_mms_lock, flags);
-
- t->user_event_mm = user_mm;
-
/*
* The lifetime of the memory descriptor can slightly outlast
* the task lifetime if a ref to the user_event_mm is taken
@@ -572,6 +565,17 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
return user_mm;
}

+static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_struct *t)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&user_event_mms_lock, flags);
+ list_add_rcu(&user_mm->link, &user_event_mms);
+ spin_unlock_irqrestore(&user_event_mms_lock, flags);
+
+ t->user_event_mm = user_mm;
+}
+
static struct user_event_mm *current_user_event_mm(void)
{
struct user_event_mm *user_mm = current->user_event_mm;
@@ -579,10 +583,12 @@ static struct user_event_mm *current_user_event_mm(void)
if (user_mm)
goto inc;

- user_mm = user_event_mm_create(current);
+ user_mm = user_event_mm_alloc(current);

if (!user_mm)
goto error;
+
+ user_event_mm_attach(user_mm, current);
inc:
refcount_inc(&user_mm->refcnt);
error:
@@ -670,7 +676,7 @@ void user_event_mm_remove(struct task_struct *t)

void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
{
- struct user_event_mm *mm = user_event_mm_create(t);
+ struct user_event_mm *mm = user_event_mm_alloc(t);
struct user_event_enabler *enabler;

if (!mm)
@@ -684,10 +690,11 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)

rcu_read_unlock();

+ user_event_mm_attach(mm, t);
return;
error:
rcu_read_unlock();
- user_event_mm_remove(t);
+ user_event_mm_destroy(mm);
}

static bool current_user_event_enabler_exists(unsigned long uaddr,