[PATCH 6/8] kcmp: take a lock-free exec_update_seq fast path
From: Christian Brauner
Date: Mon May 25 2026 - 03:29:17 EST
kcmp() compares two tasks' resources after ptrace_may_access() checks on
both, today under both tasks' exec_update_locks (taken in pointer order to
avoid ABBA). Add a two-task seqcount fast path: snapshot both tasks'
exec_update_seq, run the checks and comparison, then revalidate both; on a
racing exec()/TSYNC of either task fall back to the existing ordered
double-lock (kcmp_lock). The fast path takes no lock, so it needs no
ordering. The comparison logic moves to kcmp_access() and pointer reads
use READ_ONCE(); get_file_raw_ptr() takes and drops its own reference, so
a retried comparison leaks nothing.
Signed-off-by: Christian Brauner (Amutable) <brauner@xxxxxxxxxx>
---
kernel/kcmp.c | 106 +++++++++++++++++++++++++++++++-------------------
1 file changed, 66 insertions(+), 40 deletions(-)
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 7c1a65bd5f8d..8fee7a5752d4 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -132,39 +132,15 @@ static int kcmp_epoll_target(struct task_struct *task1,
}
#endif
-SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
- unsigned long, idx1, unsigned long, idx2)
+/* Compare two tasks' resources by obfuscated pointer; caller serializes. */
+static int kcmp_access(struct task_struct *task1, struct task_struct *task2,
+ int type, unsigned long idx1, unsigned long idx2)
{
- struct task_struct *task1, *task2;
int ret;
- rcu_read_lock();
-
- /*
- * Tasks are looked up in caller's PID namespace only.
- */
- task1 = find_task_by_vpid(pid1);
- task2 = find_task_by_vpid(pid2);
- if (unlikely(!task1 || !task2))
- goto err_no_task;
-
- get_task_struct(task1);
- get_task_struct(task2);
-
- rcu_read_unlock();
-
- /*
- * One should have enough rights to inspect task details.
- */
- ret = kcmp_lock(&task1->signal->exec_update_lock,
- &task2->signal->exec_update_lock);
- if (ret)
- goto err;
if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
- !ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS)) {
- ret = -EPERM;
- goto err_unlock;
- }
+ !ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS))
+ return -EPERM;
switch (type) {
case KCMP_FILE: {
@@ -180,24 +156,29 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
break;
}
case KCMP_VM:
- ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
+ ret = kcmp_ptr(READ_ONCE(task1->mm), READ_ONCE(task2->mm),
+ KCMP_VM);
break;
case KCMP_FILES:
- ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
+ ret = kcmp_ptr(READ_ONCE(task1->files), READ_ONCE(task2->files),
+ KCMP_FILES);
break;
case KCMP_FS:
- ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
+ ret = kcmp_ptr(READ_ONCE(task1->fs), READ_ONCE(task2->fs),
+ KCMP_FS);
break;
case KCMP_SIGHAND:
- ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
+ ret = kcmp_ptr(READ_ONCE(task1->sighand),
+ READ_ONCE(task2->sighand), KCMP_SIGHAND);
break;
case KCMP_IO:
- ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
+ ret = kcmp_ptr(READ_ONCE(task1->io_context),
+ READ_ONCE(task2->io_context), KCMP_IO);
break;
case KCMP_SYSVSEM:
#ifdef CONFIG_SYSVIPC
- ret = kcmp_ptr(task1->sysvsem.undo_list,
- task2->sysvsem.undo_list,
+ ret = kcmp_ptr(READ_ONCE(task1->sysvsem.undo_list),
+ READ_ONCE(task2->sysvsem.undo_list),
KCMP_SYSVSEM);
#else
ret = -EOPNOTSUPP;
@@ -211,10 +192,55 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
break;
}
-err_unlock:
- kcmp_unlock(&task1->signal->exec_update_lock,
- &task2->signal->exec_update_lock);
-err:
+ return ret;
+}
+
+SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
+ unsigned long, idx1, unsigned long, idx2)
+{
+ struct task_struct *task1, *task2;
+ struct signal_struct *sig1, *sig2;
+ unsigned int seq1, seq2;
+ int ret;
+
+ rcu_read_lock();
+
+ /*
+ * Tasks are looked up in caller's PID namespace only.
+ */
+ task1 = find_task_by_vpid(pid1);
+ task2 = find_task_by_vpid(pid2);
+ if (unlikely(!task1 || !task2))
+ goto err_no_task;
+
+ get_task_struct(task1);
+ get_task_struct(task2);
+
+ rcu_read_unlock();
+
+ sig1 = task1->signal;
+ sig2 = task2->signal;
+
+ /*
+ * Lock-free fast path: snapshot both tasks' exec_update_seq, compare,
+ * then revalidate both. Falls back to taking both exec_update_locks in
+ * a deadlock-safe order if either task is mid-exec.
+ */
+ if (exec_update_speculate_try_begin(sig1, &seq1) &&
+ exec_update_speculate_try_begin(sig2, &seq2)) {
+ ret = kcmp_access(task1, task2, type, idx1, idx2);
+ if (!exec_update_speculate_retry(sig1, seq1) &&
+ !exec_update_speculate_retry(sig2, seq2))
+ goto out;
+ }
+
+ ret = kcmp_lock(&sig1->exec_update_lock, &sig2->exec_update_lock);
+ if (ret)
+ goto out;
+ ret = kcmp_access(task1, task2, type, idx1, idx2);
+ kcmp_unlock(&sig1->exec_update_lock, &sig2->exec_update_lock);
+
+out:
put_task_struct(task1);
put_task_struct(task2);
--
2.47.3
--j3ezp33mpunnwnqz
Content-Type: text/x-diff; charset=utf-8
Content-Disposition: attachment;
filename="0007-proc-lock-free-exec_update_seq-fast-path-for-stack-s.patch"