Re: Crash with PREEMPT_RT on aarch64 machine
From: Sebastian Andrzej Siewior
Date: Mon Nov 28 2022 - 10:58:39 EST
How about this?
- The fast path is easy…
- The slow path first sets the WAITER bits (mark_rt_mutex_waiters()) so
I made that one _acquire so that it is visible by the unlocker forcing
everyone into slow path.
- If the lock is acquired, then the owner is written via
rt_mutex_set_owner(). This happens under wait_lock so it is
serialized and so a WRITE_ONCE() is used to write the final owner. I
replaced it with a cmpxchg_acquire() to have the owner there.
Not sure if I shouldn't make this as you put it:
| e.g. by making use of dependency ordering where it already exists.
The other (locking) CPU needs to see the owner not only the WAITER
bit. I'm not sure if this could be replaced with smp_store_release().
- After the whole operation completes, fixup_rt_mutex_waiters() cleans
the WAITER bit and I kept the _acquire semantic here. Now looking at
it again, I don't think that needs to be done since that shouldn't be
set here.
- There is rtmutex_spin_on_owner() which (as the name implies) spins on
the owner as long as it active. It obtains it via READ_ONCE() and I'm
not sure if any memory barrier is needed. Worst case is that it will
spin while owner isn't set if it observers a stale value.
- The unlock path first clears the waiter bit if there are no waiters
recorded (via simple assignments under the wait_lock (every locker
will fail with the cmpxchg_acquire() and go for the wait_lock)) and
then finally drop it via rt_mutex_cmpxchg_release(,, NULL).
Should there be a wait, it will just store the WAITER bit with
smp_store_release() (setting the owner is NULL but the WAITER bit
forces everyone into the slow path).
- Added rt_mutex_set_owner_pi() which does simple assignment. This is
used from the futex code and here everything happens under a lock.
- I added a smp_load_acquire() to rt_mutex_base_is_locked() since I
*think* want to observe a real waiter and not something stale.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
include/linux/rtmutex.h | 2 +-
kernel/locking/rtmutex.c | 26 ++++++++++++++++++--------
kernel/locking/rtmutex_api.c | 4 ++--
3 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 7d049883a08ac..4447e01f723d4 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -41,7 +41,7 @@ struct rt_mutex_base {
*/
static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock)
{
- return READ_ONCE(lock->owner) != NULL;
+ return smp_load_acquire(&lock->owner) != NULL;
}
extern void rt_mutex_base_init(struct rt_mutex_base *rtb);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7779ee8abc2a0..e3cc673e0c988 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -97,7 +97,7 @@ rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
if (rt_mutex_has_waiters(lock))
val |= RT_MUTEX_HAS_WAITERS;
- WRITE_ONCE(lock->owner, (struct task_struct *)val);
+ WARN_ON_ONCE(cmpxchg_acquire(&lock->owner, RT_MUTEX_HAS_WAITERS, val) != RT_MUTEX_HAS_WAITERS);
}
static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
@@ -106,6 +106,17 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}
+static __always_inline void
+rt_mutex_set_owner_pi(struct rt_mutex_base *lock, struct task_struct *owner)
+{
+ unsigned long val = (unsigned long)owner;
+
+ if (rt_mutex_has_waiters(lock))
+ val |= RT_MUTEX_HAS_WAITERS;
+
+ WRITE_ONCE(lock->owner, val);
+}
+
static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
@@ -173,7 +184,7 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
*/
owner = READ_ONCE(*p);
if (owner & RT_MUTEX_HAS_WAITERS)
- WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+ cmpxchg_acquire(p, owner, owner & ~RT_MUTEX_HAS_WAITERS);
}
/*
@@ -196,17 +207,16 @@ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
}
/*
- * Callers must hold the ->wait_lock -- which is the whole purpose as we force
- * all future threads that attempt to [Rmw] the lock to the slowpath. As such
- * relaxed semantics suffice.
+ * Callers hold the ->wait_lock. This needs to be visible to the lockowner,
+ * forcing him into the slow path for the unlock operation.
*/
static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
do {
- owner = *p;
- } while (cmpxchg_relaxed(p, owner,
+ owner = READ_ONCE(*p);
+ } while (cmpxchg_acquire(p, owner,
owner | RT_MUTEX_HAS_WAITERS) != owner);
}
@@ -1218,7 +1228,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
* slow path making sure no task of lower priority than
* the top waiter can steal this lock.
*/
- lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
+ smp_store_release(&lock->owner, (void *) RT_MUTEX_HAS_WAITERS);
/*
* We deboosted before waking the top waiter task such that we don't
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 900220941caac..9acc176f93d38 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -249,7 +249,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
* recursion. Give the futex/rtmutex wait_lock a separate key.
*/
lockdep_set_class(&lock->wait_lock, &pi_futex_key);
- rt_mutex_set_owner(lock, proxy_owner);
+ rt_mutex_set_owner_pi(lock, proxy_owner);
}
/**
@@ -267,7 +267,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
{
debug_rt_mutex_proxy_unlock(lock);
- rt_mutex_set_owner(lock, NULL);
+ rt_mutex_set_owner_pi(lock, NULL);
}
/**
--
2.38.1