Re: [PATCH] workqueue: fix memory leak of struct pool_workqueue in destroy_workqueue()

From: Lai Jiangshan
Date: Fri Aug 13 2021 - 08:03:18 EST


On Thu, Aug 12, 2021 at 4:38 PM <lizhe.67@xxxxxxxxxxxxx> wrote:
>
> From: Li Zhe <lizhe.67@xxxxxxxxxxxxx>
>
> Even after def98c84b6cd, we may encount sanity check failures in
> destroy_workqueue() although we call flush_work() before, which
> result in memory leak of struct pool_workqueue.
>
> The warning logs are listed below.
>
> WARNING: CPU: 0 PID: 19336 at kernel/workqueue.c:4430 destroy_workqueue+0x11a/0x2f0
> *****
> destroy_workqueue: test_workqueue9 has the following busy pwq
> pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=0/1 refcnt=2
> in-flight: 5658:wq_barrier_func
> Showing busy workqueues and worker pools:
> *****
>
> The possible stack which result in the failure is listed below.
>
> thread A:
> destroy_workqueue()
> ----raw_spin_lock_irq(&pwq->pool->lock)
> ----pwq_busy()
>
> thread B:
> ----process_one_work()
> ----raw_spin_unlock_irq(&pool->lock)
> ----worker->current_func(work)
> ----cond_resched()
> ----raw_spin_lock_irq(&pool->lock)
> ----pwq_dec_nr_in_flight(pwq, work_color)

Hello, Li

Thanks for your report.
As this list of events shows, the problem does exist.

But complicating process_one_work() and adding branches to it
are not optimized. I'm trying to figure out another way to fix it.

Thanks
Lai

>
> Thread A may get pool->lock before thread B, with the pwq->refcnt
> is still 2, which result in memory leak and sanity check failures.
>
> Notice that wq_barrier_func() only calls complete(), and it is not
> suitable to expand struct work_struct considering of the memory cost,
> this patch put complete() after obtaining pool->lock in function
> process_one_work() to eliminate competition by identify the work as a
> barrier with the work->func equal to NULL.
>
> Signed-off-by: Li Zhe <lizhe.67@xxxxxxxxxxxxx>
> ---
> kernel/workqueue.c | 39 ++++++++++++++++++++++++---------------
> 1 file changed, 24 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index f148eacda55a..02f77f35522c 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -280,6 +280,12 @@ struct workqueue_struct {
> struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
> };
>
> +struct wq_barrier {
> + struct work_struct work;
> + struct completion done;
> + struct task_struct *task; /* purely informational */
> +};
> +
> static struct kmem_cache *pwq_cache;
>
> static cpumask_var_t *wq_numa_possible_cpumask;
> @@ -2152,6 +2158,11 @@ static bool manage_workers(struct worker *worker)
> return true;
> }
>
> +static inline bool is_barrier_func(work_func_t func)
> +{
> + return func == NULL;
> +}
> +
> /**
> * process_one_work - process single work
> * @worker: self
> @@ -2273,7 +2284,8 @@ __acquires(&pool->lock)
> */
> lockdep_invariant_state(true);
> trace_workqueue_execute_start(work);
> - worker->current_func(work);
> + if (likely(!is_barrier_func(worker->current_func)))
> + worker->current_func(work);
> /*
> * While we must be careful to not use "work" after this, the trace
> * point will only record its address.
> @@ -2303,6 +2315,11 @@ __acquires(&pool->lock)
>
> raw_spin_lock_irq(&pool->lock);
>
> + if (unlikely(is_barrier_func(worker->current_func))) {
> + struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
> + complete(&barr->done);
> + }
> +
> /* clear cpu intensive status */
> if (unlikely(cpu_intensive))
> worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
> @@ -2618,18 +2635,6 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
> target_wq->name, target_func);
> }
>
> -struct wq_barrier {
> - struct work_struct work;
> - struct completion done;
> - struct task_struct *task; /* purely informational */
> -};
> -
> -static void wq_barrier_func(struct work_struct *work)
> -{
> - struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
> - complete(&barr->done);
> -}
> -
> /**
> * insert_wq_barrier - insert a barrier work
> * @pwq: pwq to insert barrier into
> @@ -2667,7 +2672,11 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
> * checks and call back into the fixup functions where we
> * might deadlock.
> */
> - INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
> + /* no need to init func because complete() has been moved to
> + * proccess_one_work(), which means that we use NULL to identify
> + * if this work is a barrier
> + */
> + INIT_WORK_ONSTACK(&barr->work, NULL);
> __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
>
> init_completion_map(&barr->done, &target->lockdep_map);
> @@ -4682,7 +4691,7 @@ static void pr_cont_pool_info(struct worker_pool *pool)
>
> static void pr_cont_work(bool comma, struct work_struct *work)
> {
> - if (work->func == wq_barrier_func) {
> + if (is_barrier_func(work->func)) {
> struct wq_barrier *barr;
>
> barr = container_of(work, struct wq_barrier, work);
> --
> 2.11.0
>