[PATCH 2/3] sched/swait: Switch to full exclusive mode

From: Peter Zijlstra
Date: Tue Jun 12 2018 - 04:41:22 EST


Linus noted that swait basically implements exclusive mode -- because
swake_up() only wakes a single waiter. And because of that it should
take care to properly deal with the interruptible case.

In short, the problem is that swake_up() can race with a signal. In
this this case it is possible the swake_up() 'wakes' the waiter that
is already on the way out because it just got a signal and the wakeup
gets lost.

The normal wait code is very careful and avoids this situation, make
sure we do too.

Copy the exact exclusive semantics from wait.

Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/swait.h | 11 ++++++-----
kernel/sched/swait.c | 22 +++++++++++++++++-----
2 files changed, 23 insertions(+), 10 deletions(-)

--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -38,8 +38,8 @@
* all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
* sleeper state.
*
- * - the exclusive mode; because this requires preserving the list order
- * and this is hard.
+ * - the !exclusive mode; because that leads to O(n) wakeups, everything is
+ * exclusive.
*
* - custom wake callback functions; because you cannot give any guarantees
* about random code. This also allows swait to be used in RT, such that
@@ -167,9 +167,10 @@ extern long prepare_to_swait_event(struc
extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);

-/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+/* as per ___wait_event() but for swait, therefore "exclusive == 1" */
#define ___swait_event(wq, condition, state, ret, cmd) \
({ \
+ __label__ __out; \
struct swait_queue __wait; \
long __ret = ret; \
\
@@ -182,13 +183,13 @@ extern void finish_swait(struct swait_qu
\
if (___wait_is_interruptible(state) && __int) { \
__ret = __int; \
- break; \
+ goto __out; \
} \
\
cmd; \
} \
finish_swait(&wq, &__wait); \
- __ret; \
+__out: __ret; \
})

#define __swait_event(wq, condition) \
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -73,7 +73,7 @@ static void __prepare_to_swait(struct sw
{
wait->task = current;
if (list_empty(&wait->task_list))
- list_add(&wait->task_list, &q->task_list);
+ list_add_tail(&wait->task_list, &q->task_list);
}

void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
@@ -89,12 +89,24 @@ EXPORT_SYMBOL(prepare_to_swait);

long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
{
- if (signal_pending_state(state, current))
- return -ERESTARTSYS;
+ unsigned long flags;
+ long ret = 0;

- prepare_to_swait(q, wait, state);
+ raw_spin_lock_irqsave(&q->lock, flags);
+ if (unlikely(signal_pending_state(state, current))) {
+ /*
+ * See prepare_to_wait_event(). TL;DR, subsequent swake_up()
+ * must not see us.
+ */
+ list_del_init(&wait->task_list);
+ ret = -ERESTARTSYS;
+ } else {
+ __prepare_to_swait(q, wait);
+ set_current_state(state);
+ }
+ raw_spin_unlock_irqrestore(&q->lock, flags);

- return 0;
+ return ret;
}
EXPORT_SYMBOL(prepare_to_swait_event);