Re: [PATCH 04/19] sched: Prepare for Core-wide rq->lock

From: Peter Zijlstra
Date: Wed Apr 28 2021 - 05:13:46 EST


On Tue, Apr 27, 2021 at 04:30:02PM -0700, Josh Don wrote:

> Also, did you mean to have a preempt_enable_no_resched() rather than
> prempt_enable() in raw_spin_rq_trylock?

No, trylock really needs to be preempt_enable(), because it can have
failed, at which point it will not have incremented the preemption count
and our decrement can hit 0, at which point we really should reschedule.

> I went over the rq_lockp stuff again after Don's reported lockup. Most
> uses are safe due to already holding an rq lock. However,
> double_rq_unlock() is prone to race:
>
> double_rq_unlock(rq1, rq2):
> /* Initial state: core sched enabled, and rq1 and rq2 are smt
> siblings. So, double_rq_lock(rq1, rq2) only took a single rq lock */
> raw_spin_rq_unlock(rq1);
> /* now not holding any rq lock */
> /* sched core disabled. Now __rq_lockp(rq1) != __rq_lockp(rq2), so we
> falsely unlock rq2 */
> if (__rq_lockp(rq1) != __rq_lockp(rq2))
> raw_spin_rq_unlock(rq2);
> else
> __release(rq2->lock);
>
> Instead we can cache __rq_lockp(rq1) and __rq_lockp(rq2) before
> releasing the lock, in order to prevent this. FWIW I think it is
> likely that Don is seeing a different issue.

Ah, indeed so.. rq_lockp() could do with an assertion, not sure how to
sanely do that. Anyway, double_rq_unlock() is simple enough to fix, we
can simply flip the unlock()s.

( I'm suffering a cold and am really quite slow atm )

How's this then?

---
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f732642e3e09..3a534c0c1c46 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -290,6 +290,10 @@ static void sched_core_assert_empty(void)
static void __sched_core_enable(void)
{
static_branch_enable(&__sched_core_enabled);
+ /*
+ * Ensure raw_spin_rq_*lock*() have completed before flipping.
+ */
+ synchronize_sched();
__sched_core_flip(true);
sched_core_assert_empty();
}
@@ -449,16 +453,23 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
{
raw_spinlock_t *lock;

+ /* Matches synchronize_sched() in __sched_core_enabled() */
+ preempt_disable();
if (sched_core_disabled()) {
raw_spin_lock_nested(&rq->__lock, subclass);
+ /* preempt-count *MUST* be > 1 */
+ preempt_enable_no_resched();
return;
}

for (;;) {
lock = __rq_lockp(rq);
raw_spin_lock_nested(lock, subclass);
- if (likely(lock == __rq_lockp(rq)))
+ if (likely(lock == __rq_lockp(rq))) {
+ /* preempt-count *MUST* be > 1 */
+ preempt_enable_no_resched();
return;
+ }
raw_spin_unlock(lock);
}
}
@@ -468,14 +479,21 @@ bool raw_spin_rq_trylock(struct rq *rq)
raw_spinlock_t *lock;
bool ret;

- if (sched_core_disabled())
- return raw_spin_trylock(&rq->__lock);
+ /* Matches synchronize_sched() in __sched_core_enabled() */
+ preempt_disable();
+ if (sched_core_disabled()) {
+ ret = raw_spin_trylock(&rq->__lock);
+ preempt_enable();
+ return ret;
+ }

for (;;) {
lock = __rq_lockp(rq);
ret = raw_spin_trylock(lock);
- if (!ret || (likely(lock == __rq_lockp(rq))))
+ if (!ret || (likely(lock == __rq_lockp(rq)))) {
+ preempt_enable();
return ret;
+ }
raw_spin_unlock(lock);
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6a905fe19eef..c9a52231d58a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2568,11 +2568,12 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq1->lock)
__releases(rq2->lock)
{
- raw_spin_rq_unlock(rq1);
if (__rq_lockp(rq1) != __rq_lockp(rq2))
raw_spin_rq_unlock(rq2);
else
__release(rq2->lock);
+
+ raw_spin_rq_unlock(rq1);
}

extern void set_rq_online (struct rq *rq);