[PATCH v3 5/6] sched,ptrace: Avoid relying on __TASK_TRACED | __TASK_STOPPED

From: Peter Zijlstra
Date: Sat Oct 09 2021 - 06:20:18 EST


Make ->ptrace/->jobctl the canonical state, this allows us to play
games with __state (such as freezing).

The wait_task_inactive() usage will be fixed up later, once we have
additional TASK_state.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/sched.h | 27 ++++++++++++++++++++++-----
kernel/ptrace.c | 16 +++++++++-------
2 files changed, 31 insertions(+), 12 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,11 +118,9 @@ struct task_group;

#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)

-#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-
-#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
-
-#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+#define task_is_traced(task) ((task)->ptrace & PT_STOPPED)
+#define task_is_stopped(task) ((task)->jobctl & JOBCTL_STOP_PENDING)
+#define task_is_stopped_or_traced(task) (task_is_stopped(task) || task_is_traced(task))

/*
* Special states are those that do not use the normal wait-loop pattern. See
@@ -228,6 +226,25 @@ struct task_group;
} while (0)

/*
+ * task_cond_set_special_state() is a cmpxchg like operation on task->state.
+ *
+ * This operation isn't safe in general and should only be used to transform
+ * one (special) blocked state into another, such as:
+ * TASK_STOPPED <-> TASK_FROZEN.
+ */
+#define task_cond_set_special_state(task, cond_state) \
+ ({ \
+ struct task_struct *__p = (task); \
+ unsigned long __flags; /* may shadow */ \
+ unsigned int __state; \
+ raw_spin_lock_irqsave(&__p->pi_lock, __flags); \
+ if ((__state = (cond_state))) \
+ WRITE_ONCE(__p->__state, __state); \
+ raw_spin_unlock_irqrestore(&__p->pi_lock, __flags); \
+ !!__state; \
+ })
+
+/*
* PREEMPT_RT specific variants for "sleeping" spin/rwlocks
*
* RT's spin/rwlock substitutions are state preserving. The state of the
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -207,7 +207,8 @@ static bool ptrace_freeze_traced(struct
!__fatal_signal_pending(task)) {
task->ptrace &= ~PT_STOPPED_MASK;
task->ptrace |= PT_STOPPED;
- WRITE_ONCE(task->__state, __TASK_TRACED);
+ /* *TASK_TRACED -> __TASK_TRACED */
+ task_cond_set_special_state(task, !!(task->__state & __TASK_TRACED) * __TASK_TRACED);
ret = true;
}
spin_unlock_irq(&task->sighand->siglock);
@@ -217,7 +218,7 @@ static bool ptrace_freeze_traced(struct

static void ptrace_unfreeze_traced(struct task_struct *task)
{
- if (READ_ONCE(task->__state) != __TASK_TRACED)
+ if (!task_is_traced(task))
return;

WARN_ON(!task->ptrace || task->parent != current);
@@ -227,13 +228,14 @@ static void ptrace_unfreeze_traced(struc
* Recheck state under the lock to close this race.
*/
spin_lock_irq(&task->sighand->siglock);
- if (READ_ONCE(task->__state) == __TASK_TRACED) {
+ if (task_is_traced(task)) {
if (__fatal_signal_pending(task)) {
task->ptrace &= ~PT_STOPPED_MASK;
wake_up_state(task, __TASK_TRACED);
} else {
task->ptrace |= PT_STOPPED_MASK;
- WRITE_ONCE(task->__state, TASK_TRACED);
+ /* *TASK_TRACED -> TASK_TRACED */
+ task_cond_set_special_state(task, !!(task->__state & __TASK_TRACED) * TASK_TRACED);
}
}
spin_unlock_irq(&task->sighand->siglock);
@@ -269,7 +271,7 @@ static int ptrace_check_attach(struct ta
*/
read_lock(&tasklist_lock);
if (child->ptrace && child->parent == current) {
- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
+// WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
/*
* child->sighand can't be NULL, release_task()
* does ptrace_unlink() before __exit_signal().
@@ -280,13 +282,13 @@ static int ptrace_check_attach(struct ta
read_unlock(&tasklist_lock);

if (!ret && !ignore_state) {
- if (!wait_task_inactive(child, __TASK_TRACED)) {
+ if (!wait_task_inactive(child, __TASK_TRACED)) { // XXX mooo!!!
/*
* This can only happen if may_ptrace_stop() fails and
* ptrace_stop() changes ->state back to TASK_RUNNING,
* so we should not worry about leaking __TASK_TRACED.
*/
- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
+// WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
ret = -ESRCH;
}
}