[PATCH v6 09/15] futex: Introduce futex_get_locked_hb().

From: Sebastian Andrzej Siewior
Date: Wed Dec 18 2024 - 06:18:20 EST


futex_lock_pi() and __fixup_pi_state_owner() acquire the
futex_q::lock_ptr without holding a reference assuming the previously
obtained hash bucket and the assigned lock_ptr are still valid. This
isn't the case once the private hash can be resized and becomes invalid
after the reference drop.

Introduce futex_get_locked_hb() to lock the hash bucket recorded in
futex_q::lock_ptr. The lock pointer is read in a RCU section to ensure
that it does not go away if the hash bucket has been replaced and the
old pointer has been observed. After locking the pointer needs to be
compared to check if it changed. If so then the hash bucket has been
replaced and the user has been moved to the new one and lock_ptr has
been updated. The lock operation needs to be redone in this case.

Once the lock_ptr is the same, we can return the futex_hash_bucket it
belongs to as the hash bucket for the caller locked. This is important
because we don't own a reference so the hash bucket is valid as long as
we hold the lock.
This means if the local hash is resized then this (old) hash bucket
remains valid as long as we hold the lock because all user need to be
moved to the new hash bucket and have their lock_ptr updated. The task
performing the resize will block.

Add futex_get_locked_hb() and use it.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
kernel/futex/core.c | 27 +++++++++++++++++++++++++++
kernel/futex/futex.h | 2 +-
kernel/futex/pi.c | 9 +++++++--
kernel/futex/requeue.c | 8 +++++---
4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 1521fbdf22f65..e8214656a66b6 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -639,6 +639,33 @@ int futex_unqueue(struct futex_q *q)
return ret;
}

+struct futex_hash_bucket *futex_get_locked_hb(struct futex_q *q)
+{
+ struct futex_hash_bucket *hb;
+ spinlock_t *lock_ptr;
+
+ /*
+ * See futex_unqueue() why lock_ptr can change.
+ */
+ guard(rcu)();
+retry:
+ lock_ptr = READ_ONCE(q->lock_ptr);
+ spin_lock(lock_ptr);
+
+ if (unlikely(lock_ptr != q->lock_ptr)) {
+ spin_unlock(lock_ptr);
+ goto retry;
+ }
+
+ hb = container_of(lock_ptr, struct futex_hash_bucket, lock);
+ /*
+ * We don't acquire a reference on the hb because we don't get it
+ * if a resize is in progress and we got the old hb->lock before the
+ * resizing task got it so we can't be moved to the new hb.
+ */
+ return hb;
+}
+
/*
* PI futexes can not be requeued and must remove themselves from the hash
* bucket. The hash bucket lock (i.e. lock_ptr) is held.
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 36627617f7ced..3c78126d4079e 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -196,7 +196,7 @@ enum futex_access {

extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
enum futex_access rw);
-
+extern struct futex_hash_bucket *futex_get_locked_hb(struct futex_q *q);
extern struct hrtimer_sleeper *
futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
int flags, u64 range_ns);
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index 8561f94f21ed9..506ba1ad8ff23 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -806,7 +806,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
break;
}

- spin_lock(q->lock_ptr);
+ futex_get_locked_hb(q);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);

/*
@@ -922,6 +922,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
+ bool no_block_fp = false;
DEFINE_WAKE_Q(wake_q);
int res, ret;

@@ -988,6 +989,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
/* Fixup the trylock return value: */
ret = ret ? 0 : -EWOULDBLOCK;
+ no_block_fp = true;
goto no_block;
}

@@ -1024,6 +1026,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
wake_up_q(&wake_q);
preempt_enable();
+ futex_hash_put(hb);

if (ret) {
if (ret == 1)
@@ -1063,7 +1066,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
* spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
* the
*/
- spin_lock(q.lock_ptr);
+ hb = futex_get_locked_hb(&q);
/*
* Waiter is unqueued.
*/
@@ -1083,6 +1086,8 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl

futex_unqueue_pi(&q);
spin_unlock(q.lock_ptr);
+ if (no_block_fp)
+ futex_hash_put(hb);
goto out;

out_unlock_put_key:
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index 31ec543e7fdb3..db27fbf68521c 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -825,15 +825,17 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
switch (futex_requeue_pi_wakeup_sync(&q)) {
case Q_REQUEUE_PI_IGNORE:
/* The waiter is still on uaddr1 */
- spin_lock(&hb->lock);
+ hb = futex_get_locked_hb(&q);
+
ret = handle_early_requeue_pi_wakeup(hb, &q, to);
spin_unlock(&hb->lock);
+
break;

case Q_REQUEUE_PI_LOCKED:
/* The requeue acquired the lock */
if (q.pi_state && (q.pi_state->owner != current)) {
- spin_lock(q.lock_ptr);
+ futex_get_locked_hb(&q);
ret = fixup_pi_owner(uaddr2, &q, true);
/*
* Drop the reference to the pi state which the
@@ -860,7 +862,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
ret = 0;

- spin_lock(q.lock_ptr);
+ futex_get_locked_hb(&q);
debug_rt_mutex_free_waiter(&rt_waiter);
/*
* Fixup the pi_state owner and possibly acquire the lock if we
--
2.45.2