[PATCH v3 1/4] tracing/user_events: Split up mm alloc and attach

From: Beau Belgrave
Date: Fri May 19 2023 - 19:08:32 EST


From: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>

When a new mm is being created in a fork() path it currently is
allocated and then attached in one go. This leaves the mm exposed out to
the tracing register callbacks while any parent enabler locations are
copied in. This should not happen.

Split up mm alloc and attach as unique operations. When duplicating
enablers, first alloc, then duplicate, and only upon success, attach.
This prevents any timing window outside of the event_reg mutex for
enablement walking. This allows for dropping RCU requirement for
enablement walking in later patches.

Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=whTBvXJuoi_kACo3qi5WZUmRrhyA-_=rRFsycTytmB6qw@xxxxxxxxxxxxxx/

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
[ change log written by Beau Belgrave ]
Signed-off-by: Beau Belgrave <beaub@xxxxxxxxxxxxxxxxxxx>
---
kernel/trace/trace_events_user.c | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b1ecd7677642..b2aecbfbbd24 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -538,10 +538,9 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
return found;
}

-static struct user_event_mm *user_event_mm_create(struct task_struct *t)
+static struct user_event_mm *user_event_mm_alloc(struct task_struct *t)
{
struct user_event_mm *user_mm;
- unsigned long flags;

user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT);

@@ -553,12 +552,6 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
refcount_set(&user_mm->refcnt, 1);
refcount_set(&user_mm->tasks, 1);

- spin_lock_irqsave(&user_event_mms_lock, flags);
- list_add_rcu(&user_mm->link, &user_event_mms);
- spin_unlock_irqrestore(&user_event_mms_lock, flags);
-
- t->user_event_mm = user_mm;
-
/*
* The lifetime of the memory descriptor can slightly outlast
* the task lifetime if a ref to the user_event_mm is taken
@@ -572,6 +565,17 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
return user_mm;
}

+static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_struct *t)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&user_event_mms_lock, flags);
+ list_add_rcu(&user_mm->link, &user_event_mms);
+ spin_unlock_irqrestore(&user_event_mms_lock, flags);
+
+ t->user_event_mm = user_mm;
+}
+
static struct user_event_mm *current_user_event_mm(void)
{
struct user_event_mm *user_mm = current->user_event_mm;
@@ -579,10 +583,12 @@ static struct user_event_mm *current_user_event_mm(void)
if (user_mm)
goto inc;

- user_mm = user_event_mm_create(current);
+ user_mm = user_event_mm_alloc(current);

if (!user_mm)
goto error;
+
+ user_event_mm_attach(user_mm, current);
inc:
refcount_inc(&user_mm->refcnt);
error:
@@ -670,7 +676,7 @@ void user_event_mm_remove(struct task_struct *t)

void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
{
- struct user_event_mm *mm = user_event_mm_create(t);
+ struct user_event_mm *mm = user_event_mm_alloc(t);
struct user_event_enabler *enabler;

if (!mm)
@@ -684,10 +690,11 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)

rcu_read_unlock();

+ user_event_mm_attach(mm, t);
return;
error:
rcu_read_unlock();
- user_event_mm_remove(t);
+ user_event_mm_destroy(mm);
}

static bool current_user_event_enabler_exists(unsigned long uaddr,
--
2.25.1