Re: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive().

From: Peter Zijlstra
Date: Fri May 26 2023 - 09:55:36 EST


On Thu, May 25, 2023 at 06:52:44PM +0200, Peter Zijlstra wrote:
> On Fri, Feb 17, 2023 at 03:53:02PM +0100, Sebastian Andrzej Siewior wrote:
>
> > +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
> > +{
> > + unsigned long flags;
> > + bool mismatch;
> > +
> > + raw_spin_lock_irqsave(&p->pi_lock, flags);
> > + if (READ_ONCE(p->__state) & match_state)
> > + mismatch = false;
> > + else if (READ_ONCE(p->saved_state) & match_state)
> > + mismatch = false;
> > + else
> > + mismatch = true;
> > +
> > + raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> > + return mismatch;
> > +}
> > +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
> > + bool *wait)
> > +{
> > + if (READ_ONCE(p->__state) & match_state)
> > + return true;
> > + if (READ_ONCE(p->saved_state) & match_state) {
> > + *wait = true;
> > + return true;
> > + }
> > + return false;
> > +}
> > +#else
> > +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
> > +{
> > + return !(READ_ONCE(p->__state) & match_state);
> > +}
> > +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
> > + bool *wait)
> > +{
> > + return (READ_ONCE(p->__state) & match_state);
> > +}
> > +#endif
> > +
> > /*
> > * wait_task_inactive - wait for a thread to unschedule.
> > *
>
> Urgh...
>
> I've ended up with the below.. I've tried folding it with
> ttwu_state_match() but every attempt so far makes it an unholy mess.
>
> Now, if only we had proper lock guard then we could drop another few
> lines, but alas.

New day, new chances... How's this? Code-gen doesn't look totally
insane, but then, making sense of an optimizing compiler's output is
always a wee challenge.

---
kernel/sched/core.c | 55 ++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a68d1276bab0..d89610fffd23 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3341,6 +3341,35 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
}
#endif /* CONFIG_NUMA_BALANCING */

+static __always_inline
+int __task_state_match(struct task_struct *p, unsigned int state)
+{
+ if (READ_ONCE(p->__state) & state)
+ return 1;
+
+#ifdef CONFIG_PREEMPT_RT
+ if (READ_ONCE(p->saved_state) & state)
+ return -1;
+#endif
+ return 0;
+}
+
+static __always_inline
+int task_state_match(struct task_struct *p, unsigned int state)
+{
+#ifdef CONFIG_PREEMPT_RT
+ int match;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ match = __task_state_match(p, state);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ return match;
+#else
+ return __task_state_match(p, state);
+#endif
+}
+
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -3359,7 +3388,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
*/
unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
{
- int running, queued;
+ int running, queued, match;
struct rq_flags rf;
unsigned long ncsw;
struct rq *rq;
@@ -3385,7 +3414,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
* is actually now running somewhere else!
*/
while (task_on_cpu(rq, p)) {
- if (!(READ_ONCE(p->__state) & match_state))
+ if (!task_state_match(p, match_state))
return 0;
cpu_relax();
}
@@ -3400,8 +3429,15 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
running = task_on_cpu(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
- if (READ_ONCE(p->__state) & match_state)
+ if ((match = __task_state_match(p, match_state))) {
+ /*
+ * When matching on p->saved_state, consider this task
+ * still queued so it will wait.
+ */
+ if (match < 0)
+ queued = 1;
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+ }
task_rq_unlock(rq, p, &rf);

/*
@@ -4003,15 +4039,14 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
static __always_inline
bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
{
+ int match;
+
if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
state != TASK_RTLOCK_WAIT);
}

- if (READ_ONCE(p->__state) & state) {
- *success = 1;
- return true;
- }
+ *success = !!(match = __task_state_match(p, state));

#ifdef CONFIG_PREEMPT_RT
/*
@@ -4027,12 +4062,10 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
* p::saved_state to TASK_RUNNING so any further tests will
* not result in false positives vs. @success
*/
- if (p->saved_state & state) {
+ if (match < 0)
p->saved_state = TASK_RUNNING;
- *success = 1;
- }
#endif
- return false;
+ return match > 0;
}

/*