Re: [PATCH 1/4] sched/wakeup: Strengthen current_save_and_set_rtlock_wait_state()
From: Peter Zijlstra
Date: Fri Sep 10 2021 - 10:03:56 EST
On Fri, Sep 10, 2021 at 03:17:04PM +0200, Peter Zijlstra wrote:
> On Fri, Sep 10, 2021 at 01:57:26PM +0100, Will Deacon wrote:
> > On Thu, Sep 09, 2021 at 04:27:46PM +0200, Peter Zijlstra wrote:
> > > Moo yes, so the earlier changelog I wrote was something like:
> > >
> > > current_save_and_set_rtlock_wait_state();
> > > for (;;) {
> > > if (try_lock())
> > > break;
> > >
> > > raw_spin_unlock_irq(&lock->wait_lock);
> > > if (!cond)
> > > schedule();
> > > raw_spin_lock_irq(&lock->wait_lock);
> > >
> > > set_current_state(TASK_RTLOCK_WAIT);
> > > }
> > > current_restore_rtlock_saved_state();
> > >
> > > which is more what the code looks like before these patches, and in that
> > > case the @cond load can be lifted before __state.
> >
> > Ah, so that makes more sense, thanks. I can't see how the try_lock() could
> > be reordered though, as it's going to have to do an atomic rmw.
>
> OK, lemme go update the Changelog and make it __flags for bigeasy :-)
The patch now reads:
---
Subject: sched/wakeup: Strengthen current_save_and_set_rtlock_wait_state()
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Date: Thu, 09 Sep 2021 12:59:16 +0200
While looking at current_save_and_set_rtlock_wait_state() I'm thinking
it really ought to use smp_store_mb(), because using it for a more
traditional wait loop like:
current_save_and_set_rtlock_wait_state();
for (;;) {
if (cond)
schedule();
set_current_state(TASK_RTLOCK_WAIT);
}
current_restore_rtlock_saved_state();
is actually broken, since the cond load could be re-ordered against
the state store, which could lead to a missed wakeup -> BAD (tm).
While there, make them consistent with the IRQ usage in
set_special_state().
Fixes: 5f220be21418 ("sched/wakeup: Prepare for RT sleeping spin/rwlocks")
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/20210909110203.767330253@xxxxxxxxxxxxx
---
include/linux/sched.h | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -218,7 +218,7 @@ struct task_group;
*/
#define set_special_state(state_value) \
do { \
- unsigned long flags; /* may shadow */ \
+ unsigned long __flags; /* may shadow */ \
\
raw_spin_lock_irqsave(¤t->pi_lock, flags); \
debug_special_state_change((state_value)); \
@@ -245,7 +245,8 @@ struct task_group;
* if (try_lock())
* break;
* raw_spin_unlock_irq(&lock->wait_lock);
- * schedule_rtlock();
+ * if (!cond)
+ * schedule_rtlock();
* raw_spin_lock_irq(&lock->wait_lock);
* set_current_state(TASK_RTLOCK_WAIT);
* }
@@ -253,22 +254,24 @@ struct task_group;
*/
#define current_save_and_set_rtlock_wait_state() \
do { \
- lockdep_assert_irqs_disabled(); \
- raw_spin_lock(¤t->pi_lock); \
+ unsigned long __flags; /* may shadow */ \
+ \
+ raw_spin_lock_irqsave(¤t->pi_lock, flags); \
current->saved_state = current->__state; \
debug_rtlock_wait_set_state(); \
- WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
- raw_spin_unlock(¤t->pi_lock); \
+ smp_store_mb(current->__state, TASK_RTLOCK_WAIT); \
+ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \
} while (0);
#define current_restore_rtlock_saved_state() \
do { \
- lockdep_assert_irqs_disabled(); \
- raw_spin_lock(¤t->pi_lock); \
+ unsigned long __flags; /* may shadow */ \
+ \
+ raw_spin_lock_irqsave(¤t->pi_lock, flags); \
debug_rtlock_wait_restore_state(); \
WRITE_ONCE(current->__state, current->saved_state); \
current->saved_state = TASK_RUNNING; \
- raw_spin_unlock(¤t->pi_lock); \
+ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \
} while (0);
#define get_current_state() READ_ONCE(current->__state)