Re: [PATCH 1/5] workqueue: wq_pool_mutex protects the attrs-installation

From: Steven Rostedt
Date: Mon Jul 11 2016 - 16:50:52 EST



[ crickets ]

-- Steve

On Thu, 10 Mar 2016 16:44:11 -0500
Steven Rostedt <rostedt@xxxxxxxxxxx> (by way of Steven Rostedt
<rostedt@xxxxxxxxxxx>) wrote:

> This patch was recently backported to 4.1.19, and when I merged it with -rt,
> the following bug triggered:
>
> ===============================
> [ INFO: suspicious RCU usage. ]
> 4.1.19-test-rt22+ #1 Not tainted
> -------------------------------
> /work/rt/stable-rt.git/kernel/workqueue.c:608 sched RCU, wq->mutex or wq_pool_mutex should be held!
>
> other info that might help us debug this:
>
>
> rcu_scheduler_active = 1, debug_locks = 0
> 2 locks held by swapper/0/1:
> #0: ((pendingb_lock).lock){+.+...}, at: [<ffffffff8105e4b7>] __local_lock_irq+0x21/0x74
> #1: (rcu_read_lock){......}, at: [<ffffffff8105fbdc>] rcu_read_lock+0x0/0x6c
>
> stack backtrace:
> ^AdCPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.1.19-test-rt22+ #1
> ^AdHardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
> 0000000000000000 ffff8802101dfd08 ffffffff816083ae ffff880210240000
> 0000000000000001 ffff8802101dfd38 ffffffff81087cf1 ffff88021588e800
> 0000000000000000 0000000000000100 ffff88021588e800 ffff8802101dfd58
> Call Trace:
> [<ffffffff816083ae>] dump_stack+0x67/0x90
> [<ffffffff81087cf1>] lockdep_rcu_suspicious+0x107/0x110
> [<ffffffff8105f9fe>] unbound_pwq_by_node+0x6c/0x93
> [<ffffffff81060e62>] __queue_work+0xc8/0x2e7
> [<ffffffff8106f0cc>] ? migrate_disable+0x28/0xe6
> [<ffffffff81061126>] queue_work_on+0x85/0xb8
> [<ffffffff81f54188>] ? acpi_battery_init+0x16/0x16
> [<ffffffff8106a382>] __async_schedule+0x18b/0x19d
> [<ffffffff81f54172>] ? acpi_memory_hotplug_init+0x12/0x12
> [<ffffffff8106a3b9>] async_schedule+0x15/0x17
> [<ffffffff81f54184>] acpi_battery_init+0x12/0x16
> [<ffffffff81000415>] do_one_initcall+0xf7/0x18a
> [<ffffffff8106692f>] ? parse_args+0x258/0x35f
> [<ffffffff81f140be>] kernel_init_freeable+0x205/0x29e
> [<ffffffff81f137d0>] ? do_early_param+0x86/0x86
> [<ffffffff8160d9bc>] ? _raw_spin_unlock_irq+0x5d/0x72
> [<ffffffff815fc28f>] ? rest_init+0x143/0x143
> [<ffffffff815fc29d>] kernel_init+0xe/0xdf
> [<ffffffff8160e712>] ret_from_fork+0x42/0x70
> [<ffffffff815fc28f>] ? rest_init+0x143/0x143
>
>
>
> On Mon, May 11, 2015 at 05:35:48PM +0800, Lai Jiangshan wrote:
> > ---
> > kernel/workqueue.c | 44 ++++++++++++++++++++++++++------------------
> > 1 file changed, 26 insertions(+), 18 deletions(-)
> >
> > diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> > index a3915ab..fa8b949 100644
> > --- a/kernel/workqueue.c
> > +++ b/kernel/workqueue.c
> > @@ -127,6 +127,12 @@ enum {
> > *
> > * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
> > *
> > + * PW: wq_pool_mutex and wq->mutex protected for writes. Any one of them
> > + * protected for reads.
> > + *
> > + * PWR: wq_pool_mutex and wq->mutex protected for writes. Any one of them
> > + * or sched-RCU for reads.
>
> How exactly is sched-RCU protecting this here? The cause for the 4.1-rt issue
> is that we converted the local_irq_save() in queue_work_on() into a
> "local_lock_irqsave()" which when PREEMPT_RT is enabled will be a mutex that
> disables migration (can not migrate). This also prevents the current CPU from
> going offline.
>
> Does this code really need to be protected from being preempted, or is
> disabling migration good enough?
>
> Thanks!
>
> -- Steve
>
>
> > + *
> > * WQ: wq->mutex protected.
> > *
> > * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
> > @@ -247,8 +253,8 @@ struct workqueue_struct {
> > int nr_drainers; /* WQ: drain in progress */
> > int saved_max_active; /* WQ: saved pwq max_active */
> >
> > - struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */
> > - struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */
> > + struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */
> > + struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */
> >
> > #ifdef CONFIG_SYSFS
> > struct wq_device *wq_dev; /* I: for sysfs interface */
> > @@ -268,7 +274,7 @@ struct workqueue_struct {
> > /* hot fields used during command issue, aligned to cacheline */
> > unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
> > struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
> > - struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
> > + struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
> > };
> >
> > static struct kmem_cache *pwq_cache;
> > @@ -349,6 +355,12 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
> > lockdep_is_held(&wq->mutex), \
> > "sched RCU or wq->mutex should be held")
> >
> > +#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
> > + rcu_lockdep_assert(rcu_read_lock_sched_held() || \
> > + lockdep_is_held(&wq->mutex) || \
> > + lockdep_is_held(&wq_pool_mutex), \
> > + "sched RCU, wq->mutex or wq_pool_mutex should be held")
> > +
> > #define for_each_cpu_worker_pool(pool, cpu) \
> > for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
> > (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
> > @@ -553,7 +565,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
> > * @wq: the target workqueue
> > * @node: the node ID
> > *
> > - * This must be called either with pwq_lock held or sched RCU read locked.
> > + * This must be called either with wq_pool_mutex held or sched RCU read locked.
> > * If the pwq needs to be used beyond the locking in effect, the caller is
> > * responsible for guaranteeing that the pwq stays online.
> > *
> > @@ -562,7 +574,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
> > static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
> > int node)
> > {
> > - assert_rcu_or_wq_mutex(wq);
> > + assert_rcu_or_wq_mutex_or_pool_mutex(wq);
> > return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
> > }
> >
> > @@ -3480,6 +3492,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
> > struct pool_workqueue *old_pwq;
> >
> > lockdep_assert_held(&wq->mutex);
> > + lockdep_assert_held(&wq_pool_mutex);
> >
> > /* link_pwq() can handle duplicate calls */
> > link_pwq(pwq);
> > @@ -3644,10 +3657,9 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
> > * pwqs accordingly.
> > */
> > get_online_cpus();
> > -
> > mutex_lock(&wq_pool_mutex);
> > +
> > ctx = apply_wqattrs_prepare(wq, attrs);
> > - mutex_unlock(&wq_pool_mutex);
> >
> > /* the ctx has been prepared successfully, let's commit it */
> > if (ctx) {
> > @@ -3655,10 +3667,11 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
> > ret = 0;
> > }
> >
> > - put_online_cpus();
> > -
> > apply_wqattrs_cleanup(ctx);
> >
> > + mutex_unlock(&wq_pool_mutex);
> > + put_online_cpus();
> > +
> > return ret;
> > }
> >
> > @@ -3695,7 +3708,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> >
> > lockdep_assert_held(&wq_pool_mutex);
> >
> > - if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND))
> > + if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
> > + wq->unbound_attrs->no_numa)
> > return;
> >
> > /*
> > @@ -3706,10 +3720,6 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> > target_attrs = wq_update_unbound_numa_attrs_buf;
> > cpumask = target_attrs->cpumask;
> >
> > - mutex_lock(&wq->mutex);
> > - if (wq->unbound_attrs->no_numa)
> > - goto out_unlock;
> > -
> > copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
> > pwq = unbound_pwq_by_node(wq, node);
> >
> > @@ -3721,19 +3731,16 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> > */
> > if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
> > if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
> > - goto out_unlock;
> > + return;
> > } else {
> > goto use_dfl_pwq;
> > }
> >
> > - mutex_unlock(&wq->mutex);
> > -
> > /* create a new pwq */
> > pwq = alloc_unbound_pwq(wq, target_attrs);
> > if (!pwq) {
> > pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
> > wq->name);
> > - mutex_lock(&wq->mutex);
> > goto use_dfl_pwq;
> > }
> >
> > @@ -3748,6 +3755,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
> > goto out_unlock;
> >
> > use_dfl_pwq:
> > + mutex_lock(&wq->mutex);
> > spin_lock_irq(&wq->dfl_pwq->pool->lock);
> > get_pwq(wq->dfl_pwq);
> > spin_unlock_irq(&wq->dfl_pwq->pool->lock);
> > --
> > 2.1.0
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/