[PATCH] workqueue: ensure all flush_work() completed when being destoryed

From: Lai Jiangshan
Date: Mon Jun 01 2020 - 02:08:23 EST


In old days, worker threads are not shared among different
workqueues and destroy_workqueue() used kthread_stop() to destroy
all workers before going to destroy workqueue structures.
And kthread_stop() can ensure the scheduled (worker->scheduled)
work items and the linked work items queued by flush_work()
to be completed.

For a workqueue to be completed/unused for wq users means that all
queued works have completed and all flush_work() have return,
and the workqueue is legitimate to passed to destroy_workqueue().

But
e22bee782b3b("workqueue: implement concurrency managed dynamic worker pool")
made worker pools and workers shared among different
workqueues and kthread_stop() is not used to sync the completion
of the work items. destroy_workqueue() uses drain_workqueue()
to drain user work items, but internal work items queued by
flush_work() is not drained due to they don't have colors.

So problems may occur when wq_barrier_func() does complete(&barr->done)
and the wokenup wq-user code does destroy_workqueue(). destroy_workqueue()
can be scheduled eariler than the proccess_one_work() to do
the put_pwq(), so that the sanity check in destroy_workqueue()
can see the no yet put pwq->refcnt and blame false positively.

The problem can be easily fixed by removing the WORK_NO_COLOR
and making the internal work item queued by flush_work() inherit
the color of the work items to be flushed. It would definitely
revert the design and the benefits of the WORK_NO_COLOR.

The color-based flush_workqueue() was designed so well that
we can also easily to adapt it to flush WORK_NO_COLOR. This
is the approach taken by this patch which introduces a
flush_no_color() to flush all the WORK_NO_COLOR works.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---
include/linux/workqueue.h | 5 +--
kernel/workqueue.c | 72 +++++++++++++++++++++++++++++++++------
2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8b505d22fc0e..85aea89fb6fc 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -55,8 +55,9 @@ enum {
* The last color is no color used for works which don't
* participate in workqueue flushing.
*/
- WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1,
- WORK_NO_COLOR = WORK_NR_COLORS,
+ WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS),
+ WORK_NR_FLUSH_COLORS = (WORK_NR_COLORS - 1),
+ WORK_NO_COLOR = WORK_NR_FLUSH_COLORS,

/* not bound to any CPU, prefer the local CPU */
WORK_CPU_UNBOUND = NR_CPUS,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7a1fc9fe6314..cf281e273b28 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -585,7 +585,7 @@ static int get_work_color(struct work_struct *work)

static int work_next_color(int color)
{
- return (color + 1) % WORK_NR_COLORS;
+ return (color + 1) % WORK_NR_FLUSH_COLORS;
}

/*
@@ -1167,19 +1167,18 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
*/
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
{
- /* uncolored work items don't participate in flushing or nr_active */
- if (color == WORK_NO_COLOR)
- goto out_put;
+ /* uncolored work items don't participate in nr_active */
+ if (color != WORK_NO_COLOR) {
+ pwq->nr_active--;
+ if (!list_empty(&pwq->delayed_works)) {
+ /* one down, submit a delayed one */
+ if (pwq->nr_active < pwq->max_active)
+ pwq_activate_first_delayed(pwq);
+ }
+ }

pwq->nr_in_flight[color]--;

- pwq->nr_active--;
- if (!list_empty(&pwq->delayed_works)) {
- /* one down, submit a delayed one */
- if (pwq->nr_active < pwq->max_active)
- pwq_activate_first_delayed(pwq);
- }
-
/* is flush in progress and are we at the flushing tip? */
if (likely(pwq->flush_color != color))
goto out_put;
@@ -2682,6 +2681,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
}

debug_work_activate(&barr->work);
+ pwq->nr_in_flight[WORK_NO_COLOR]++;
insert_work(pwq, &barr->work, head,
work_color_to_flags(WORK_NO_COLOR) | linked);
}
@@ -2915,6 +2915,55 @@ void flush_workqueue(struct workqueue_struct *wq)
}
EXPORT_SYMBOL(flush_workqueue);

+/*
+ * Called form destroy_workqueue() to flush all uncompleted internal
+ * work items queued by flush_work().
+ */
+static void flush_no_color(struct workqueue_struct *wq)
+{
+ struct wq_flusher this_flusher = {
+ .list = LIST_HEAD_INIT(this_flusher.list),
+ .flush_color = -1,
+ .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
+ };
+
+ lock_map_acquire(&wq->lockdep_map);
+ lock_map_release(&wq->lockdep_map);
+
+ mutex_lock(&wq->mutex);
+ if (WARN_ON_ONCE(wq->first_flusher))
+ goto out_unlock;
+
+ wq->first_flusher = &this_flusher;
+ this_flusher.flush_color = wq->work_color;
+ WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
+ WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
+ WARN_ON_ONCE(!list_empty(&wq->flusher_queue));
+
+ if (!flush_workqueue_prep_pwqs(wq, WORK_NO_COLOR, -1)) {
+ /* nothing to flush, done */
+ WRITE_ONCE(wq->first_flusher, NULL);
+ goto out_unlock;
+ }
+
+ check_flush_dependency(wq, NULL);
+
+ mutex_unlock(&wq->mutex);
+
+ wait_for_completion(&this_flusher.done);
+
+ mutex_lock(&wq->mutex);
+ WARN_ON_ONCE(wq->first_flusher != &this_flusher);
+ WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
+ WARN_ON_ONCE(!list_empty(&wq->flusher_queue));
+ WARN_ON_ONCE(!list_empty(&this_flusher.list));
+ WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
+ WRITE_ONCE(wq->first_flusher, NULL);
+
+out_unlock:
+ mutex_unlock(&wq->mutex);
+}
+
/**
* drain_workqueue - drain a workqueue
* @wq: workqueue to drain
@@ -4353,6 +4402,7 @@ void destroy_workqueue(struct workqueue_struct *wq)

/* drain it before proceeding with destruction */
drain_workqueue(wq);
+ flush_no_color(wq);

/* kill rescuer, if sanity checks fail, leave it w/o rescuer */
if (wq->rescuer) {
--
2.20.1