Re: [RFC PATCH, -v2] sched/wait: Introduce new, more compact wait_event*() primitives

From: Peter Zijlstra
Date: Thu Mar 09 2017 - 11:25:38 EST



Here; I rewrote that so that my brain doesn't go wtf, every time I look
at it ;-)

Hope it does the same for you.


---
include/linux/wait.h | 31 +++++++++---
kernel/sched/wait.c | 140 +++++++++++++++++++++++++++++++++++++--------------
2 files changed, 127 insertions(+), 44 deletions(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a3151fa..aeed498 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -234,19 +234,38 @@ struct wait_event_state {
struct wait_queue_entry wq_entry;
};

-extern long wait_event_loop(struct wait_queue_head *wq_head, struct wait_event_state *wes, int condition);
+enum WE_STATE
+{
+ WE_START = 0,
+ WE_FIRST_COND,
+ WE_INIT,
+ WE_PREPARE,
+ WE_COND,
+ WE_SIGNAL,
+ WE_CMD,
+ WE_FINISH,
+ WE_DONE,
+};
+
+struct we_state {
+ int state;
+ long interrupted;
+ struct wait_queue_entry entry;
+};
+
+
+extern long wait_event_loop(struct wait_queue_head *wq_head, struct we_state *wes, bool condition);

#define wait_event_v2(wq_head, condition) \
({ \
- struct wait_event_state __wes; \
+ struct we_state __wes; \
long __ret; \
\
- might_sleep(); \
- __wes.queued = 0; \
+ __wes.state = WE_START; \
\
do { \
- __ret = wait_event_loop(&(wq_head), &__wes, (condition) != 0); \
- } while (!__wes.done); \
+ __ret = wait_event_loop(&(wq_head), &__wes, !!(condition)); \
+ } while (__wes.state != WE_DONE); \
\
__ret; \
})
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 58a8335..04d32d0 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -292,59 +292,123 @@ static inline bool is_kthread_should_stop(void)
return (current->flags & PF_KTHREAD) && kthread_should_stop();
}

+
/*
- * The main wait_event*() event loop iteration state machine.
*
- * Note that this function itself does not loop, it returns to
- * the caller to evaluate the call site dependent condition in
- * every iteration.
+ * #define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)
+ * ({
+ * __label__ __out;
+ * struct wait_queue_entry __wq_entry;
+ * long __ret = ret; // explicit shadow
+ *
+ * init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);
+ * for (;;) {
+ * long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);
+ *
+ * if (condition)
+ * break;
+ *
+ * if (___wait_is_interruptible(state) && __int) {
+ * __ret = __int;
+ * goto __out;
+ * }
+ *
+ * cmd;
+ * }
+ * finish_wait(&wq_head, &__wq_entry);
+ * __out: __ret;
+ * })
+ *
+ *
+ *
+ * do {
+ * ret = __wait_event_loop(wq, &state, !!(cond));
+ * if (state == WE_CMD)
+ * cmd;
+ * } while (state != WE_DONE);
+ *
+ * return ret;
+ *
*/
-long wait_event_loop(struct wait_queue_head *wq_head, struct wait_event_state *wes, int condition)
+static __always_inline long
+__wait_event_loop(struct wait_queue_head *wq_head, struct we_state *wes,
+ bool condition, const unsigned int task_state,
+ const bool exclusive)
{
- if (!wes->queued) {
- /*
- * If we are not initialized yet and the condition is already
- * met, we can return immediately:
- */
- if (condition) {
- wes->done = 1;
- return 0;
- }
+ long ret = 0;

- /* Set up the wait-queue entry: */
- init_wait_entry(&wes->wq_entry, 0);
+ switch(wes->state) {
+ case WE_START:
+ might_sleep();
+ wes->state = WE_FIRST_COND;
+ /* Fall through */

- wes->done = 0;
- wes->queued = 1;
- wes->prepared = 0;
- wes->ret = 0;
- } else {
- /* Here is where we notice an updated wait condition: */
+ case WE_FIRST_COND:
if (condition) {
- finish_wait(wq_head, &wes->wq_entry);
- wes->done = 1;
- return 0;
+ wes->state = WE_DONE;
+ break;
}
- }
+ wes->state = WE_INIT;
+ /* Fall through */
+
+ case WE_INIT:
+ init_wait_entry(&wes->entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);
+ wes->state = WE_PREPARE;
+ /* Fall through */
+
+ case WE_PREPARE:
+prepare:
+ wes->interrupted = prepare_to_wait_event(wq_head, &wes->entry, task_state);
+ wes->state = WE_COND;
+ /* we need a fresh @cond eval */
+ break;
+
+ case WE_COND:
+ if (condition) {
+ wes->state = WE_FINISH;
+ goto finish;
+ }
+ wes->state = WE_SIGNAL;
+ /* Fall through */
+
+ case WE_SIGNAL:
+ if (___wait_is_interruptible(task_state) && wes->interrupted) {
+ wes->state = WE_DONE;
+ ret = wes->interrupted;
+ break;
+ }
+ wes->state = WE_CMD;
+ /* we need it to go sleep */
+ break;
+
+ case WE_CMD:
+ goto prepare;

- if (!wes->prepared) {
-prepare_again:
- wes->ret = prepare_to_wait_event(wq_head, &wes->wq_entry, 0);
- wes->prepared = 1;
+ case WE_FINISH:
+finish:
+ finish_wait(wq_head, &wes->entry);
+ wes->state = WE_DONE;
+ break;

- return 0;
+ case WE_DONE:
+ BUG();
}

- if (___wait_is_interruptible(0) && wes->ret) {
- /* We already got dequeued, so mark it done: */
- wes->done = 1;
+ return ret;
+}

- /* But return any eventual interruption code: */
- return wes->ret;
+long wait_event_loop(struct wait_queue_head *wq_head, struct we_state *wes, bool condition)
+{
+ long ret;
+
+again:
+ ret = __wait_event_loop(wq_head, wes, condition, TASK_UNINTERRUPTIBLE, false);
+ if (wes->state == WE_CMD) {
+ schedule();
+ goto again;
}

- schedule();
- goto prepare_again;
+ return ret;
}
EXPORT_SYMBOL_GPL(wait_event_loop);