Re: [PATCH] random: Fix kernel panic due to system_wq use before init

From: Tejun Heo
Date: Wed Sep 14 2016 - 18:26:46 EST


Hello,

On Wed, Sep 14, 2016 at 03:55:51PM -0400, Tejun Heo wrote:
> We've used keventd_up() for this purpose and it hasn't been big enough
> an issue as workqueue usages during earlyboot are very rare (only five
> users right now). But, yeah, it's getting used a more and more and
> there's no harm in moving it to earlier. I'll see how early I can
> push it.

So, we can't really start executing work items much before the init
task is started because other kthreads can't be created or scheduled
before then; however, we can build up all of the workqueue data
structures very early once memory allocation and idr are operational
sans actual worker creation and onlining. This allows creating
workqueues and queueing work items from very early during the boot and
the queued work items will start executing as soon as init task starts
executing. Obviously, if someone tries to flush anything, it's not
gonna work but that really shouldn't happen during early boot.

The patch I'm playing with now looks like the following and it seems
to work fine. I'll verify it further and add some niceties like
WARN_ONs on premature flushes and removal of unnecessary keventd_up()
tests.

Thanks.


Index: work/include/linux/workqueue.h
===================================================================
--- work.orig/include/linux/workqueue.h
+++ work/include/linux/workqueue.h
@@ -631,4 +631,7 @@ int workqueue_online_cpu(unsigned int cp
int workqueue_offline_cpu(unsigned int cpu);
#endif

+int __init workqueue_init_early(void);
+int __init workqueue_init(void);
+
#endif
Index: work/init/main.c
===================================================================
--- work.orig/init/main.c
+++ work/init/main.c
@@ -476,6 +476,11 @@ static void __init mm_init(void)
ioremap_huge_init();
}

+static void my_test_work_fn(struct work_struct *work)
+{
+ printk("XXX my_test_work\n");
+}
+
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
@@ -551,6 +556,13 @@ asmlinkage __visible void __init start_k
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
+ workqueue_init_early();
+
+ {
+ static DECLARE_WORK(my_test_work, my_test_work_fn);
+ schedule_work(&my_test_work);
+ }
+
rcu_init();

/* trace_printk() and trace points may be used after this */
@@ -1005,6 +1017,7 @@ static noinline void __init kernel_init_

smp_prepare_cpus(setup_max_cpus);

+ workqueue_init();
do_pre_smp_initcalls();
lockup_detector_init();

Index: work/kernel/workqueue.c
===================================================================
--- work.orig/kernel/workqueue.c
+++ work/kernel/workqueue.c
@@ -290,6 +290,8 @@ module_param_named(disable_numa, wq_disa
static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
module_param_named(power_efficient, wq_power_efficient, bool, 0444);

+static bool wq_online;
+
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */

/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -3352,7 +3354,7 @@ static struct worker_pool *get_unbound_p
goto fail;

/* create and start the initial worker */
- if (!create_worker(pool))
+ if (wq_online && !create_worker(pool))
goto fail;

/* install */
@@ -3417,6 +3419,7 @@ static void pwq_adjust_max_active(struct
{
struct workqueue_struct *wq = pwq->wq;
bool freezable = wq->flags & WQ_FREEZABLE;
+ unsigned long flags;

/* for @wq->saved_max_active */
lockdep_assert_held(&wq->mutex);
@@ -3425,7 +3428,7 @@ static void pwq_adjust_max_active(struct
if (!freezable && pwq->max_active == wq->saved_max_active)
return;

- spin_lock_irq(&pwq->pool->lock);
+ spin_lock_irqsave(&pwq->pool->lock, flags);

/*
* During [un]freezing, the caller is responsible for ensuring that
@@ -3448,7 +3451,7 @@ static void pwq_adjust_max_active(struct
pwq->max_active = 0;
}

- spin_unlock_irq(&pwq->pool->lock);
+ spin_unlock_irqrestore(&pwq->pool->lock, flags);
}

/* initialize newly alloced @pwq which is associated with @wq and @pool */
@@ -5455,7 +5458,7 @@ static void __init wq_numa_init(void)
wq_numa_enabled = true;
}

-static int __init init_workqueues(void)
+int __init workqueue_init_early(void)
{
int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
int i, cpu;
@@ -5488,16 +5491,6 @@ static int __init init_workqueues(void)
}
}

- /* create the initial worker */
- for_each_online_cpu(cpu) {
- struct worker_pool *pool;
-
- for_each_cpu_worker_pool(pool, cpu) {
- pool->flags &= ~POOL_DISASSOCIATED;
- BUG_ON(!create_worker(pool));
- }
- }
-
/* create default unbound and ordered wq attrs */
for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
struct workqueue_attrs *attrs;
@@ -5538,4 +5531,23 @@ static int __init init_workqueues(void)

return 0;
}
-early_initcall(init_workqueues);
+
+int __init workqueue_init(void)
+{
+ struct worker_pool *pool;
+ int cpu, bkt;
+
+ /* create the initial workers */
+ for_each_online_cpu(cpu) {
+ for_each_cpu_worker_pool(pool, cpu) {
+ pool->flags &= ~POOL_DISASSOCIATED;
+ BUG_ON(!create_worker(pool));
+ }
+ }
+
+ hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
+ BUG_ON(!create_worker(pool));
+
+ return 0;
+}
+