Re: futex+io_uring: futex_q::task can maybe be dangling (but is not actually accessed, so it's fine)

From: Jens Axboe
Date: Wed Jan 15 2025 - 10:33:28 EST


On 1/15/25 8:23 AM, Jens Axboe wrote:
> On 1/15/25 3:20 AM, Thomas Gleixner wrote:
>> On Mon, Jan 13 2025 at 15:38, Peter Zijlstra wrote:
>>> On Fri, Jan 10, 2025 at 08:33:34PM -0700, Jens Axboe wrote:
>>>
>>>> @@ -548,7 +549,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
>>>>
>>>> plist_node_init(&q->list, prio);
>>>> plist_add(&q->list, &hb->chain);
>>>> - q->task = current;
>>>> + q->task = task;
>>>> }
>>>>
>>>> /**
>>>
>>> The alternative is, I suppose, to move the q->task assignment out to
>>> these two callsites instead. Thomas, any opinions?
>>
>> That's fine as long as hb->lock is held, but the explicit argument makes
>> all of this simpler to understand.
>>
>> Though I'm not really a fan of this part:
>>
>>> + __futex_queue(&ifd->q, hb, NULL);
>>> + spin_unlock(&hb->lock);
>>
>> Can we please add that @task argument to futex_queue() and keep the
>> internals in the futex code instead of pulling more stuff into io_uring?
>
> Sure, was trying to keep the change more minimal, but we can certainly
> add it to futex_queue() instead rather than needing to work around it on
> the io_uring side.
>
> I'll be happy to send out a patch for that.

Here's the raw patch. Should've done this initially rather than just
tackle __futex_queue(), for some reason I thought/assumed that
futex_queue() was more widely used.

What do you think?


diff --git a/io_uring/futex.c b/io_uring/futex.c
index e29662f039e1..f108da4ff863 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -349,7 +349,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
hlist_add_head(&req->hash_node, &ctx->futex_list);
io_ring_submit_unlock(ctx, issue_flags);

- futex_queue(&ifd->q, hb);
+ futex_queue(&ifd->q, hb, NULL);
return IOU_ISSUE_SKIP_COMPLETE;
}

diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index ebdd76b4ecbb..3db8567f5a44 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -532,7 +532,8 @@ void futex_q_unlock(struct futex_hash_bucket *hb)
futex_hb_waiters_dec(hb);
}

-void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
+void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
+ struct task_struct *task)
{
int prio;

@@ -548,7 +549,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)

plist_node_init(&q->list, prio);
plist_add(&q->list, &hb->chain);
- q->task = current;
+ q->task = task;
}

/**
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 618ce1fe870e..11de6405c4e3 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -285,13 +285,15 @@ static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
}

extern void __futex_unqueue(struct futex_q *q);
-extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
+extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
+ struct task_struct *task);
extern int futex_unqueue(struct futex_q *q);

/**
* futex_queue() - Enqueue the futex_q on the futex_hash_bucket
* @q: The futex_q to enqueue
* @hb: The destination hash bucket
+ * @task: Task queueing this futex
*
* The hb->lock must be held by the caller, and is released here. A call to
* futex_queue() is typically paired with exactly one call to futex_unqueue(). The
@@ -299,11 +301,14 @@ extern int futex_unqueue(struct futex_q *q);
* or nothing if the unqueue is done as part of the wake process and the unqueue
* state is implicit in the state of woken task (see futex_wait_requeue_pi() for
* an example).
+ *
+ * Note that @task may be NULL, for async usage of futexes.
*/
-static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
+static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
+ struct task_struct *task)
__releases(&hb->lock)
{
- __futex_queue(q, hb);
+ __futex_queue(q, hb, task);
spin_unlock(&hb->lock);
}

diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index d62cca5ed8f4..635c7d5d4222 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -982,7 +982,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
/*
* Only actually queue now that the atomic ops are done:
*/
- __futex_queue(&q, hb);
+ __futex_queue(&q, hb, current);

if (trylock) {
ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 3a10375d9521..a9056acb75ee 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -350,7 +350,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
* access to the hash list and forcing another memory barrier.
*/
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
- futex_queue(q, hb);
+ futex_queue(q, hb, current);

/* Arm the timer */
if (timeout)
@@ -461,7 +461,7 @@ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
* next futex. Queue each futex at this moment so hb can
* be unlocked.
*/
- futex_queue(q, hb);
+ futex_queue(q, hb, current);
continue;
}


--
Jens Axboe