[PATCH 2/2] wait: introduce WQ_FLAG_EXCLUSIVE_HEAD

From: Oleg Nesterov
Date: Thu Mar 20 2014 - 13:53:01 EST


Normally wait_queue_t is a FIFO list for exclusive waiting tasks,
but Lustre wants LIFO to wake up the most recent active thread and
avoid the unnecessary cache line pollution.

As Peter suggested we add the new WQ_FLAG_EXCLUSIVE_HEAD flag and
teach prepare_to_wait_event() to insert the new entry right before
the first WQ_FLAG_EXCLUSIVE task.

Note:
- this obviously assumes that the user of EXCLUSIVE_HEAD
doesn't mix exclusive and !exclusive too much, otherwise
it should accept the cost of additional list_for_each().

- WQ_FLAG_EXCLUSIVE_HEAD doesn't imply WQ_FLAG_EXCLUSIVE,
it only controls the placement in queue. So the new flag
can be used individually even if this is unlikely useful.

Requested-by: Peng Tao <bergwolf@xxxxxxxxx>
Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>
---
include/linux/wait.h | 3 ++-
kernel/sched/wait.c | 30 ++++++++++++++++++++++++------
2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index e547c6c..afd41eb 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -16,7 +16,8 @@ int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *ke
struct __wait_queue {
unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
-#define WQ_FLAG_MASK WQ_FLAG_EXCLUSIVE
+#define WQ_FLAG_EXCLUSIVE_HEAD 0x02
+#define WQ_FLAG_MASK (WQ_FLAG_EXCLUSIVE | WQ_FLAG_EXCLUSIVE_HEAD)
void *private;
wait_queue_func_t func;
struct list_head task_list;
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 7d50f79..894ff75 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -195,6 +195,28 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
}
EXPORT_SYMBOL(prepare_to_wait_exclusive);

+static void add_wait_queue_flag(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ struct list_head *head = &q->task_list;
+
+ if (wait->flags & (WQ_FLAG_EXCLUSIVE | WQ_FLAG_EXCLUSIVE_HEAD)) {
+ if (wait->flags & WQ_FLAG_EXCLUSIVE_HEAD) {
+ wait_queue_t *curr;
+ /* find the first exclusive entry */
+ list_for_each_entry(curr, head, task_list) {
+ if (likely(curr->flags & WQ_FLAG_EXCLUSIVE)) {
+ head = &curr->task_list;
+ break;
+ }
+ }
+ }
+ /* turn list_add() below into list_add_tail() */
+ head = head->prev;
+ }
+
+ list_add(&wait->task_list, head);
+}
+
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
@@ -206,12 +228,8 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
wait->func = autoremove_wake_function;

spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list)) {
- if (wait->flags & WQ_FLAG_EXCLUSIVE)
- __add_wait_queue_tail(q, wait);
- else
- __add_wait_queue(q, wait);
- }
+ if (list_empty(&wait->task_list))
+ add_wait_queue_flag(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);

--
1.5.5.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/