[PATCH] exec: fix the racy usage of fs_struct->in_exec
From: Oleg Nesterov
Date: Mon Mar 24 2025 - 12:00:59 EST
check_unsafe_exec() sets fs->in_exec under cred_guard_mutex, then execve()
paths clear fs->in_exec lockless. This is fine if exec succeeds, but if it
fails we have the following race:
T1 sets fs->in_exec = 1, fails, drops cred_guard_mutex
T2 sets fs->in_exec = 1
T1 clears fs->in_exec
T2 continues with fs->in_exec == 0
Change fs/exec.c to clear fs->in_exec with cred_guard_mutex held.
Reported-by: syzbot+1c486d0b62032c82a968@xxxxxxxxxxxxxxxxxxxxxxxxx
Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001f.GAE@xxxxxxxxxx/
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>
---
fs/exec.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index 506cd411f4ac..17047210be46 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1229,13 +1229,12 @@ int begin_new_exec(struct linux_binprm * bprm)
*/
bprm->point_of_no_return = true;
- /*
- * Make this the only thread in the thread group.
- */
+ /* Make this the only thread in the thread group */
retval = de_thread(me);
if (retval)
goto out;
-
+ /* see the comment in check_unsafe_exec() */
+ current->fs->in_exec = 0;
/*
* Cancel any io_uring activity across execve
*/
@@ -1497,6 +1496,8 @@ static void free_bprm(struct linux_binprm *bprm)
}
free_arg_pages(bprm);
if (bprm->cred) {
+ /* in case exec fails before de_thread() succeeds */
+ current->fs->in_exec = 0;
mutex_unlock(¤t->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
@@ -1618,6 +1619,10 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
* suid exec because the differently privileged task
* will be able to manipulate the current directory, etc.
* It would be nice to force an unshare instead...
+ *
+ * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS)
+ * from another sub-thread until de_thread() succeeds, this
+ * state is protected by cred_guard_mutex we hold.
*/
n_fs = 1;
spin_lock(&p->fs->lock);
@@ -1862,7 +1867,6 @@ static int bprm_execve(struct linux_binprm *bprm)
sched_mm_cid_after_execve(current);
/* execve succeeded */
- current->fs->in_exec = 0;
current->in_execve = 0;
rseq_execve(current);
user_events_execve(current);
@@ -1881,7 +1885,6 @@ static int bprm_execve(struct linux_binprm *bprm)
force_fatal_sig(SIGSEGV);
sched_mm_cid_after_execve(current);
- current->fs->in_exec = 0;
current->in_execve = 0;
return retval;
--
2.25.1.362.g51ebf55