Re: PI futexes + lock stealing woes
From: Gratian Crisan
Date: Thu Dec 07 2017 - 09:58:47 EST
Peter Zijlstra writes:
> The below compiles and boots, but is otherwise untested. Could you give
> it a spin?
Thank you! Yes, I'll start a test now.
-Gratian
> ---
> kernel/futex.c | 83 +++++++++++++++++++++++++++++++++--------
> kernel/locking/rtmutex.c | 26 +++++++++----
> kernel/locking/rtmutex_common.h | 1 +
> 3 files changed, 87 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/futex.c b/kernel/futex.c
> index 76ed5921117a..29ac5b64e7c7 100644
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -2294,21 +2294,17 @@ static void unqueue_me_pi(struct futex_q *q)
> spin_unlock(q->lock_ptr);
> }
>
> -/*
> - * Fixup the pi_state owner with the new owner.
> - *
> - * Must be called with hash bucket lock held and mm->sem held for non
> - * private futexes.
> - */
> static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
> - struct task_struct *newowner)
> + struct task_struct *argowner)
> {
> - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
> struct futex_pi_state *pi_state = q->pi_state;
> u32 uval, uninitialized_var(curval), newval;
> - struct task_struct *oldowner;
> + struct task_struct *oldowner, *newowner;
> + u32 newtid;
> int ret;
>
> + lockdep_assert_held(q->lock_ptr);
> +
> raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
>
> oldowner = pi_state->owner;
> @@ -2317,11 +2313,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
> newtid |= FUTEX_OWNER_DIED;
>
> /*
> - * We are here either because we stole the rtmutex from the
> - * previous highest priority waiter or we are the highest priority
> - * waiter but have failed to get the rtmutex the first time.
> + * We are here because either:
> + *
> + * - we stole the lock and pi_state->owner needs updating to reflect
> + * that (@argowner == current),
> *
> - * We have to replace the newowner TID in the user space variable.
> + * or:
> + *
> + * - someone stole our lock and we need to fix things to point to the
> + * new owner (@argowner == NULL).
> + *
> + * Either way, we have to replace the TID in the user space variable.
> * This must be atomic as we have to preserve the owner died bit here.
> *
> * Note: We write the user space value _before_ changing the pi_state
> @@ -2334,6 +2336,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
> * in the PID check in lookup_pi_state.
> */
> retry:
> + if (!argowner) {
> + if (oldowner != current) {
> + /*
> + * We raced against a concurrent self; things are
> + * already fixed up. Nothing to do.
> + */
> + ret = 0;
> + goto out_unlock;
> + }
> +
> + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
> + /* We got the lock after all, nothing to fix. */
> + ret = 0;
> + goto out_unlock;
> + }
> +
> + /*
> + * Since we just failed the trylock; there must be an owner.
> + */
> + newowner = rt_mutex_owner(&pi_state->pi_mutex);
> + BUG_ON(!newowner);
> + } else {
> + WARN_ON_ONCE(argowner != current);
> + if (oldowner == current) {
> + /*
> + * We raced against a concurrent self; things are
> + * already fixed up. Nothing to do.
> + */
> + ret = 0;
> + goto out_unlock;
> + }
> + newowner = argowner;
> + }
> +
> + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
> +
> if (get_futex_value_locked(&uval, uaddr))
> goto handle_fault;
>
> @@ -2434,15 +2472,28 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
> * Got the lock. We might not be the anticipated owner if we
> * did a lock-steal - fix up the PI-state in that case:
> *
> - * We can safely read pi_state->owner without holding wait_lock
> - * because we now own the rt_mutex, only the owner will attempt
> - * to change it.
> + * Speculative pi_state->owner read (we don't hold wait_lock);
> + * since we own the lock pi_state->owner == current is the
> + * stable state, anything else needs more attention.
> */
> if (q->pi_state->owner != current)
> ret = fixup_pi_state_owner(uaddr, q, current);
> goto out;
> }
>
> + /*
> + * If we didn't get the lock; check if anybody stole it from us. In
> + * that case, we need to fix up the uval to point to them instead of
> + * us, otherwise bad things happen. [10]
> + *
> + * Another speculative read; pi_state->owner == current is unstable
> + * but needs our attention.
> + */
> + if (q->pi_state->owner == current) {
> + ret = fixup_pi_state_owner(uaddr, q, NULL);
> + goto out;
> + }
> +
> /*
> * Paranoia check. If we did not take the lock, then we should not be
> * the owner of the rt_mutex.
> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> index 6f3dba6e4e9e..65cc0cb984e6 100644
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
> return ret;
> }
>
> +static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
> +{
> + int ret = try_to_take_rt_mutex(lock, current, NULL);
> +
> + /*
> + * try_to_take_rt_mutex() sets the lock waiters bit
> + * unconditionally. Clean this up.
> + */
> + fixup_rt_mutex_waiters(lock);
> +
> + return ret;
> +}
> +
> /*
> * Slow path try-lock function:
> */
> @@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
> */
> raw_spin_lock_irqsave(&lock->wait_lock, flags);
>
> - ret = try_to_take_rt_mutex(lock, current, NULL);
> -
> - /*
> - * try_to_take_rt_mutex() sets the lock waiters bit
> - * unconditionally. Clean this up.
> - */
> - fixup_rt_mutex_waiters(lock);
> + ret = __rt_mutex_slowtrylock(lock);
>
> raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>
> @@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
> return rt_mutex_slowtrylock(lock);
> }
>
> +int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
> +{
> + return __rt_mutex_slowtrylock(lock);
> +}
> +
> /**
> * rt_mutex_timed_lock - lock a rt_mutex interruptible
> * the timeout structure is provided
> diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
> index 124e98ca0b17..68686b3ec3c1 100644
> --- a/kernel/locking/rtmutex_common.h
> +++ b/kernel/locking/rtmutex_common.h
> @@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
> struct rt_mutex_waiter *waiter);
>
> extern int rt_mutex_futex_trylock(struct rt_mutex *l);
> +extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
>
> extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
> extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,