[PATCH v6 07/15] futex: Prepare for reference counting of the process private hash end of operation.
From: Sebastian Andrzej Siewior
Date: Wed Dec 18 2024 - 06:18:11 EST
To support runtime resizing of the process private hash, it's
required to add a reference count to the hash structure. The
reference count ensures that the hash cannot be resized or freed
while a task is operating on it.
The reference count will be obtained within futex_hash() and dropped
once the hash bucket is unlocked and not longer required for the
particular operation (queue, unqueue, wakeup etc.).
This is achieved by:
- appending _put() to existing functions so it's clear that they
also put the hash reference and fixing up the usage sites
- providing new helpers, which combine common operations (unlock,
put), and using them at the appropriate places
- providing new helper for standalone reference counting
functionality and using them at places, where the unlock operation
needs to be separate.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
io_uring/futex.c | 2 +-
kernel/futex/core.c | 12 ++++++++----
kernel/futex/futex.h | 31 ++++++++++++++++++++-----------
kernel/futex/pi.c | 19 ++++++++++---------
kernel/futex/requeue.c | 12 ++++++------
kernel/futex/waitwake.c | 23 ++++++++++++-----------
6 files changed, 57 insertions(+), 42 deletions(-)
diff --git a/io_uring/futex.c b/io_uring/futex.c
index e29662f039e1a..67246438da228 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -349,7 +349,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
hlist_add_head(&req->hash_node, &ctx->futex_list);
io_ring_submit_unlock(ctx, issue_flags);
- futex_queue(&ifd->q, hb);
+ futex_queue_put(&ifd->q, hb);
return IOU_ISSUE_SKIP_COMPLETE;
}
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 254d0dfac71a9..1521fbdf22f65 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -152,6 +152,9 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
return &futex_queues[hash & (futex_hashsize - 1)];
}
+void futex_hash_put(struct futex_hash_bucket *hb)
+{
+}
/**
* futex_setup_timer - set up the sleeping hrtimer.
@@ -543,8 +546,8 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
* Increment the counter before taking the lock so that
* a potential waker won't miss a to-be-slept task that is
* waiting for the spinlock. This is safe as all futex_q_lock()
- * users end up calling futex_queue(). Similarly, for housekeeping,
- * decrement the counter at futex_q_unlock() when some error has
+ * users end up calling futex_queue_put(). Similarly, for housekeeping,
+ * decrement the counter at futex_q_unlock_put() when some error has
* occurred and we don't end up adding the task to the list.
*/
futex_hb_waiters_inc(hb); /* implies smp_mb(); (A) */
@@ -555,11 +558,12 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
return hb;
}
-void futex_q_unlock(struct futex_hash_bucket *hb)
+void futex_q_unlock_put(struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
futex_hb_waiters_dec(hb);
spin_unlock(&hb->lock);
+ futex_hash_put(hb);
}
void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
@@ -586,7 +590,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
* @q: The futex_q to unqueue
*
* The q->lock_ptr must not be held by the caller. A call to futex_unqueue() must
- * be paired with exactly one earlier call to futex_queue().
+ * be paired with exactly one earlier call to futex_queue_put().
*
* Return:
* - 1 - if the futex_q was still queued (and we removed unqueued it);
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 99b32e728c4ad..36627617f7ced 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -202,6 +202,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
int flags, u64 range_ns);
extern struct futex_hash_bucket *futex_hash(union futex_key *key);
+extern void futex_hash_put(struct futex_hash_bucket *hb);
/**
* futex_match - Check whether two futex keys are equal
@@ -288,23 +289,29 @@ extern void __futex_unqueue(struct futex_q *q);
extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
extern int futex_unqueue(struct futex_q *q);
+static inline void futex_hb_unlock_put(struct futex_hash_bucket *hb)
+{
+ spin_unlock(&hb->lock);
+ futex_hash_put(hb);
+}
+
/**
- * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
+ * futex_queue_put() - Enqueue the futex_q on the futex_hash_bucket
* @q: The futex_q to enqueue
* @hb: The destination hash bucket
*
- * The hb->lock must be held by the caller, and is released here. A call to
- * futex_queue() is typically paired with exactly one call to futex_unqueue(). The
- * exceptions involve the PI related operations, which may use futex_unqueue_pi()
- * or nothing if the unqueue is done as part of the wake process and the unqueue
- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
- * an example).
+ * The hb->lock must be held by the caller, and is released here and the reference
+ * on the hb is dropped. A call to futex_queue_put() is typically paired with
+ * exactly one call to futex_unqueue(). The exceptions involve the PI related
+ * operations, which may use futex_unqueue_pi() or nothing if the unqueue is
+ * done as part of the wake process and the unqueue state is implicit in the
+ * state of woken task (see futex_wait_requeue_pi() for an example).
*/
-static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
+static inline void futex_queue_put(struct futex_q *q, struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
__futex_queue(q, hb);
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
}
extern void futex_unqueue_pi(struct futex_q *q);
@@ -350,7 +357,7 @@ static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
}
extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
-extern void futex_q_unlock(struct futex_hash_bucket *hb);
+extern void futex_q_unlock_put(struct futex_hash_bucket *hb);
extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
@@ -380,11 +387,13 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
}
static inline void
-double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
+double_unlock_hb_put(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
{
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
+ futex_hash_put(hb1);
+ futex_hash_put(hb2);
}
/* syscalls */
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index d62cca5ed8f4c..8561f94f21ed9 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -217,9 +217,9 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
/*
* We get here with hb->lock held, and having found a
* futex_top_waiter(). This means that futex_lock_pi() of said futex_q
- * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(),
- * which in turn means that futex_lock_pi() still has a reference on
- * our pi_state.
+ * has dropped the hb->lock in between futex_queue_put() and
+ * futex_unqueue_pi(), which in turn means that futex_lock_pi() still
+ * has a reference on our pi_state.
*
* The waiter holding a reference on @pi_state also protects against
* the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
@@ -963,7 +963,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
* exit to complete.
* - EAGAIN: The user space value changed.
*/
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -1086,7 +1086,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
goto out;
out_unlock_put_key:
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
out:
if (to) {
@@ -1096,7 +1096,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
return ret != -EINTR ? ret : -ERESTARTNOINTR;
uaddr_faulted:
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
ret = fault_in_user_writeable(uaddr);
if (ret)
@@ -1196,7 +1196,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
}
get_pi_state(pi_state);
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
/* drops pi_state->pi_mutex.wait_lock */
ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter);
@@ -1235,7 +1235,8 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* owner.
*/
if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) {
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
+
switch (ret) {
case -EFAULT:
goto pi_faulted;
@@ -1255,7 +1256,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
ret = (curval == uval) ? 0 : -EAGAIN;
out_unlock:
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
return ret;
pi_retry:
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index fb69dcdf74da8..217cec5c8302e 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -58,7 +58,7 @@ enum {
};
const struct futex_q futex_q_init = {
- /* list gets initialized in futex_queue()*/
+ /* list gets initialized in futex_queue_put()*/
.wake = futex_wake_mark,
.key = FUTEX_KEY_INIT,
.bitset = FUTEX_BITSET_MATCH_ANY,
@@ -457,7 +457,7 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
if (unlikely(ret)) {
futex_hb_waiters_dec(hb2);
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
ret = get_user(curval, uaddr1);
if (ret)
@@ -543,7 +543,7 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
*/
case -EFAULT:
futex_hb_waiters_dec(hb2);
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
ret = fault_in_user_writeable(uaddr2);
if (!ret)
goto retry;
@@ -557,7 +557,7 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
* - EAGAIN: The user space value changed.
*/
futex_hb_waiters_dec(hb2);
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
/*
* Handle the case where the owner is in the middle of
* exiting. Wait for the exit to complete otherwise
@@ -675,7 +675,7 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
out_unlock:
futex_hb_waiters_dec(hb2);
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
wake_up_q(&wake_q);
return ret ? ret : task_count;
}
@@ -814,7 +814,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* shared futexes. We need to compare the keys:
*/
if (futex_match(&q.key, &key2)) {
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
ret = -EINVAL;
goto out;
}
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 3a10375d95218..01d419efbf298 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -195,7 +195,7 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
}
}
- spin_unlock(&hb->lock);
+ futex_hb_unlock_put(hb);
wake_up_q(&wake_q);
return ret;
}
@@ -274,7 +274,7 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
if (!IS_ENABLED(CONFIG_MMU) ||
unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
@@ -327,7 +327,7 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
}
out_unlock:
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb_put(hb1, hb2);
wake_up_q(&wake_q);
return ret;
}
@@ -335,7 +335,7 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
static long futex_wait_restart(struct restart_block *restart);
/**
- * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
+ * futex_wait_queue() - futex_queue_put() and wait for wakeup, timeout, or signal
* @hb: the futex hash bucket, must be locked by the caller
* @q: the futex_q to queue up on
* @timeout: the prepared hrtimer_sleeper, or null for no timeout
@@ -346,11 +346,11 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
/*
* The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using smp_store_mb() and
- * futex_queue() calls spin_unlock() upon completion, both serializing
+ * futex_queue_put() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
- futex_queue(q, hb);
+ futex_queue_put(q, hb);
/* Arm the timer */
if (timeout)
@@ -461,11 +461,12 @@ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
* next futex. Queue each futex at this moment so hb can
* be unlocked.
*/
- futex_queue(q, hb);
+ futex_queue_put(q, hb);
continue;
}
- futex_q_unlock(hb);
+ futex_q_unlock_put(hb);
+
__set_current_state(TASK_RUNNING);
/*
@@ -624,7 +625,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
ret = futex_get_value_locked(&uval, uaddr);
if (ret) {
- futex_q_unlock(*hb);
+ futex_q_unlock_put(*hb);
ret = get_user(uval, uaddr);
if (ret)
@@ -637,7 +638,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
}
if (uval != val) {
- futex_q_unlock(*hb);
+ futex_q_unlock_put(*hb);
ret = -EWOULDBLOCK;
}
@@ -665,7 +666,7 @@ int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
if (ret)
return ret;
- /* futex_queue and wait for wakeup, timeout, or a signal. */
+ /* futex_queue_put() and wait for wakeup, timeout, or a signal. */
futex_wait_queue(hb, &q, to);
/* If we were woken (and unqueued), we succeeded, whatever. */
--
2.45.2