[PATCH 26/40] workqueue: use shared worklist and pool all workers per cpu

From: Tejun Heo
Date: Sun Jan 17 2010 - 19:57:22 EST


Use gcwq->worklist instead of cwq->worklist and break the strict
association between a cwq and its worker. All works queued on a cpu
are queued on gcwq->worklist and processed by any available worker on
the gcwq.

As there no longer is strict association between a cwq and its worker,
whether a work is executing can now only be determined using
gcwq->busy_hash[]. [__]find_worker_executing_work() are implemented
for this and used where it's necessary to find whether a work is being
executed and if so which worker is executing it.

After this change, the only association between a cwq and its worker
is that a cwq puts a worker into shared worker pool on creation and
kills it on destruction. As all workqueues are still limited to
max_active of one, this means that there are always at least as many
workers as active works and thus there's no danger for deadlock.

The break of strong association between cwqs and workers requires
somewhat clumsy changes to current_is_keventd() and
destroy_workqueue(). Dynamic worker pool management will remove both
clumsy changes. current_is_keventd() won't be necessary at all as the
only reason it exists is to avoid queueing a work from a work which
will be allowed just fine. The clumsy part of destroy_workqueue() is
added because a worker can only be destroyed while idle and there's no
guarantee a worker is idle when its wq is going down. With dynamic
pool management, workers are not associated with workqueues at all and
only idle ones will be submitted to destroy_workqueue() so the code
won't be necessary anymore.

Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
---
kernel/workqueue.c | 192 +++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 154 insertions(+), 38 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 19cfa12..f38d263 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -72,7 +72,6 @@ enum {
*/

struct global_cwq;
-struct cpu_workqueue_struct;

struct worker {
/* on idle list while idle, on busy hash table while busy */
@@ -85,7 +84,6 @@ struct worker {
struct list_head scheduled; /* L: scheduled works */
struct task_struct *task; /* I: worker task */
struct global_cwq *gcwq; /* I: the associated gcwq */
- struct cpu_workqueue_struct *cwq; /* I: the associated cwq */
unsigned int flags; /* L: flags */
int id; /* I: worker id */
};
@@ -95,6 +93,7 @@ struct worker {
*/
struct global_cwq {
spinlock_t lock; /* the gcwq lock */
+ struct list_head worklist; /* L: list of pending works */
unsigned int cpu; /* I: the associated cpu */
unsigned int flags; /* L: GCWQ_* flags */

@@ -120,7 +119,6 @@ struct global_cwq {
*/
struct cpu_workqueue_struct {
struct global_cwq *gcwq; /* I: the associated gcwq */
- struct list_head worklist;
struct worker *worker;
struct workqueue_struct *wq; /* I: the owning workqueue */
int work_color; /* L: current color */
@@ -338,6 +336,32 @@ static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
WORK_STRUCT_WQ_DATA_MASK);
}

+/* Return the first worker. Safe with preemption disabled */
+static struct worker *first_worker(struct global_cwq *gcwq)
+{
+ if (unlikely(list_empty(&gcwq->idle_list)))
+ return NULL;
+
+ return list_first_entry(&gcwq->idle_list, struct worker, entry);
+}
+
+/**
+ * wake_up_worker - wake up an idle worker
+ * @gcwq: gcwq to wake worker for
+ *
+ * Wake up the first idle worker of @gcwq.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void wake_up_worker(struct global_cwq *gcwq)
+{
+ struct worker *worker = first_worker(gcwq);
+
+ if (likely(worker))
+ wake_up_process(worker->task);
+}
+
/**
* busy_worker_head - return the busy hash head for a work
* @gcwq: gcwq of interest
@@ -366,13 +390,67 @@ static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
}

/**
- * insert_work - insert a work into cwq
+ * __find_worker_executing_work - find worker which is executing a work
+ * @gcwq: gcwq of interest
+ * @bwh: hash head as returned by busy_worker_head()
+ * @work: work to find worker for
+ *
+ * Find a worker which is executing @work on @gcwq. @bwh should be
+ * the hash head obtained by calling busy_worker_head() with the same
+ * work.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ *
+ * RETURNS:
+ * Pointer to worker which is executing @work if found, NULL
+ * otherwise.
+ */
+static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
+ struct hlist_head *bwh,
+ struct work_struct *work)
+{
+ struct worker *worker;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry(worker, tmp, bwh, hentry)
+ if (worker->current_work == work)
+ return worker;
+ return NULL;
+}
+
+/**
+ * find_worker_executing_work - find worker which is executing a work
+ * @gcwq: gcwq of interest
+ * @work: work to find worker for
+ *
+ * Find a worker which is executing @work on @gcwq. This function is
+ * identical to __find_worker_executing_work() except that this
+ * function calculates @bwh itself.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ *
+ * RETURNS:
+ * Pointer to worker which is executing @work if found, NULL
+ * otherwise.
+ */
+static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
+ struct work_struct *work)
+{
+ return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
+ work);
+}
+
+/**
+ * insert_work - insert a work into gcwq
* @cwq: cwq @work belongs to
* @work: work to insert
* @head: insertion point
* @extra_flags: extra WORK_STRUCT_* flags to set
*
- * Insert @work into @cwq after @head.
+ * Insert @work which belongs to @cwq into @gcwq after @head.
+ * @extra_flags is or'd to work_struct flags.
*
* CONTEXT:
* spin_lock_irq(gcwq->lock).
@@ -391,7 +469,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
smp_wmb();

list_add_tail(&work->entry, head);
- wake_up_process(cwq->worker->task);
+ wake_up_worker(cwq->gcwq);
}

/**
@@ -478,7 +556,7 @@ static void __queue_work(unsigned int req_cpu, struct workqueue_struct *wq,

if (likely(cwq->nr_active < cwq->max_active)) {
cwq->nr_active++;
- worklist = &cwq->worklist;
+ worklist = &gcwq->worklist;
} else
worklist = &cwq->delayed_works;

@@ -657,10 +735,10 @@ static struct worker *alloc_worker(void)

/**
* create_worker - create a new workqueue worker
- * @cwq: cwq the new worker will belong to
+ * @gcwq: gcwq the new worker will belong to
* @bind: whether to set affinity to @cpu or not
*
- * Create a new worker which is bound to @cwq. The returned worker
+ * Create a new worker which is bound to @gcwq. The returned worker
* can be started by calling start_worker() or destroyed using
* destroy_worker().
*
@@ -670,9 +748,8 @@ static struct worker *alloc_worker(void)
* RETURNS:
* Pointer to the newly created worker.
*/
-static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
+static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
{
- struct global_cwq *gcwq = cwq->gcwq;
int id = -1;
struct worker *worker = NULL;

@@ -690,7 +767,6 @@ static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
goto fail;

worker->gcwq = gcwq;
- worker->cwq = cwq;
worker->id = id;

worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
@@ -818,7 +894,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
struct work_struct *work = list_first_entry(&cwq->delayed_works,
struct work_struct, entry);

- move_linked_works(work, &cwq->worklist, NULL);
+ move_linked_works(work, &cwq->gcwq->worklist, NULL);
cwq->nr_active++;
}

@@ -886,11 +962,12 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
*/
static void process_one_work(struct worker *worker, struct work_struct *work)
{
- struct cpu_workqueue_struct *cwq = worker->cwq;
+ struct cpu_workqueue_struct *cwq = get_wq_data(work);
struct global_cwq *gcwq = cwq->gcwq;
struct hlist_head *bwh = busy_worker_head(gcwq, work);
work_func_t f = work->func;
int work_color;
+ struct worker *collision;
#ifdef CONFIG_LOCKDEP
/*
* It is permissible to free the struct work_struct from
@@ -901,6 +978,18 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
*/
struct lockdep_map lockdep_map = work->lockdep_map;
#endif
+ /*
+ * A single work shouldn't be executed concurrently by
+ * multiple workers on a single cpu. Check whether anyone is
+ * already processing the work. If so, defer the work to the
+ * currently executing one.
+ */
+ collision = __find_worker_executing_work(gcwq, bwh, work);
+ if (unlikely(collision)) {
+ move_linked_works(work, &collision->scheduled, NULL);
+ return;
+ }
+
/* claim and process */
debug_work_deactivate(work);
hlist_add_head(&worker->hentry, bwh);
@@ -910,7 +999,6 @@ static void process_one_work(struct worker *worker, struct work_struct *work)

spin_unlock_irq(&gcwq->lock);

- BUG_ON(get_wq_data(work) != cwq);
work_clear_pending(work);
lock_map_acquire(&cwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
@@ -967,7 +1055,6 @@ static int worker_thread(void *__worker)
{
struct worker *worker = __worker;
struct global_cwq *gcwq = worker->gcwq;
- struct cpu_workqueue_struct *cwq = worker->cwq;

woke_up:
spin_lock_irq(&gcwq->lock);
@@ -987,9 +1074,9 @@ woke_up:
*/
BUG_ON(!list_empty(&worker->scheduled));

- while (!list_empty(&cwq->worklist)) {
+ while (!list_empty(&gcwq->worklist)) {
struct work_struct *work =
- list_first_entry(&cwq->worklist,
+ list_first_entry(&gcwq->worklist,
struct work_struct, entry);

if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
@@ -1343,8 +1430,7 @@ int flush_work(struct work_struct *work)
if (unlikely(cwq != get_wq_data(work)))
goto already_gone;
} else {
- if (cwq->worker && cwq->worker->current_work == work)
- worker = cwq->worker;
+ worker = find_worker_executing_work(gcwq, work);
if (!worker)
goto already_gone;
}
@@ -1413,11 +1499,9 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,

spin_lock_irq(&gcwq->lock);

- worker = NULL;
- if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
- worker = cwq->worker;
+ worker = find_worker_executing_work(gcwq, work);
+ if (unlikely(worker))
insert_wq_barrier(cwq, &barr, work, worker);
- }

spin_unlock_irq(&gcwq->lock);

@@ -1671,18 +1755,37 @@ int keventd_up(void)

int current_is_keventd(void)
{
- struct cpu_workqueue_struct *cwq;
- int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
- int ret = 0;
+ bool found = false;
+ unsigned int cpu;

- BUG_ON(!keventd_wq);
+ /*
+ * There no longer is one-to-one relation between worker and
+ * work queue and a worker task might be unbound from its cpu
+ * if the cpu was offlined. Match all busy workers. This
+ * function will go away once dynamic pool is implemented.
+ */
+ for_each_possible_cpu(cpu) {
+ struct global_cwq *gcwq = get_gcwq(cpu);
+ struct worker *worker;
+ struct hlist_node *pos;
+ unsigned long flags;
+ int i;

- cwq = get_cwq(cpu, keventd_wq);
- if (current == cwq->worker->task)
- ret = 1;
+ spin_lock_irqsave(&gcwq->lock, flags);

- return ret;
+ for_each_busy_worker(worker, i, pos, gcwq) {
+ if (worker->task == current) {
+ found = true;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&gcwq->lock, flags);
+ if (found)
+ break;
+ }

+ return found;
}

static struct cpu_workqueue_struct *alloc_cwqs(void)
@@ -1771,12 +1874,11 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
cwq->wq = wq;
cwq->flush_color = -1;
cwq->max_active = max_active;
- INIT_LIST_HEAD(&cwq->worklist);
INIT_LIST_HEAD(&cwq->delayed_works);

if (failed)
continue;
- cwq->worker = create_worker(cwq, cpu_online(cpu));
+ cwq->worker = create_worker(gcwq, cpu_online(cpu));
if (cwq->worker)
start_worker(cwq->worker);
else
@@ -1836,13 +1938,26 @@ void destroy_workqueue(struct workqueue_struct *wq)

for_each_possible_cpu(cpu) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+ struct global_cwq *gcwq = cwq->gcwq;
int i;

if (cwq->worker) {
- spin_lock_irq(&cwq->gcwq->lock);
+ retry:
+ spin_lock_irq(&gcwq->lock);
+ /*
+ * Worker can only be destroyed while idle.
+ * Wait till it becomes idle. This is ugly
+ * and prone to starvation. It will go away
+ * once dynamic worker pool is implemented.
+ */
+ if (!(cwq->worker->flags & WORKER_IDLE)) {
+ spin_unlock_irq(&gcwq->lock);
+ msleep(100);
+ goto retry;
+ }
destroy_worker(cwq->worker);
cwq->worker = NULL;
- spin_unlock_irq(&cwq->gcwq->lock);
+ spin_unlock_irq(&gcwq->lock);
}

for (i = 0; i < WORK_NR_COLORS; i++)
@@ -2161,7 +2276,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
*
* Start freezing workqueues. After this function returns, all
* freezeable workqueues will queue new works to their frozen_works
- * list instead of the cwq ones.
+ * list instead of gcwq->worklist.
*
* CONTEXT:
* Grabs and releases workqueue_lock and gcwq->lock's.
@@ -2247,7 +2362,7 @@ out_unlock:
* thaw_workqueues - thaw workqueues
*
* Thaw workqueues. Normal queueing is restored and all collected
- * frozen works are transferred to their respective cwq worklists.
+ * frozen works are transferred to their respective gcwq worklists.
*
* CONTEXT:
* Grabs and releases workqueue_lock and gcwq->lock's.
@@ -2320,6 +2435,7 @@ void __init init_workqueues(void)
struct global_cwq *gcwq = get_gcwq(cpu);

spin_lock_init(&gcwq->lock);
+ INIT_LIST_HEAD(&gcwq->worklist);
gcwq->cpu = cpu;

INIT_LIST_HEAD(&gcwq->idle_list);
--
1.6.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/