[PATCH v7 3/3] sched/uclamp: Fix a deadlock when enabling uclamp static key

From: Qais Yousef
Date: Thu Jul 16 2020 - 07:07:39 EST


The following splat was caught when setting uclamp value of a task.

BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:49

======================================================
in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 731, name: l_3-1
WARNING: possible circular locking dependency detected
5.8.0-rc4-00040-g6345b3305877-dirty #864 Not tainted
INFO: lockdep is turned off.
------------------------------------------------------
l_0-0/730 is trying to acquire lock:
irq event stamp: 150
ffff80001343dea0 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0x1c/0x38
el0_svc_common.constprop.4+0xe4/0x200

but task is already holding lock:
ffff00097ef4ca58 (&rq->lock){-.-.}-{2:2}, at: task_rq_lock+0x60/0xf0
_raw_spin_lock_irqsave+0x38/0xa8

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:
copy_process+0x620/0x18f0

-> #1 (&rq->lock){-.-.}-{2:2}:
0x0
_raw_spin_lock+0x64/0x80
__schedule+0x108/0x910
CPU: 5 PID: 731 Comm: l_3-1 Not tainted 5.8.0-rc4-00040-g6345b3305877-dirty #864
schedule+0x7c/0x108
schedule_timeout+0x2b0/0x448
Hardware name: ARM Juno development board (r2) (DT)
wait_for_completion_killable+0xb8/0x1a8
__kthread_create_on_node+0xe0/0x1c0
Call trace:
kthread_create_on_node+0x8c/0xb8
create_worker+0xd0/0x1b8
dump_backtrace+0x0/0x1f0
workqueue_prepare_cpu+0x5c/0xa0
cpuhp_invoke_callback+0xe8/0xe30
show_stack+0x2c/0x38
_cpu_up+0xf4/0x1c0
cpu_up+0xa0/0xc0
dump_stack+0xf0/0x170
bringup_nonboot_cpus+0x88/0xc0
smp_init+0x34/0x90
___might_sleep+0x144/0x200
kernel_init_freeable+0x1b8/0x338
kernel_init+0x18/0x118
__might_sleep+0x54/0x88
ret_from_fork+0x10/0x18

-> #0 (cpu_hotplug_lock){++++}-{0:0}:
cpus_read_lock+0x2c/0x130
__lock_acquire+0x11a0/0x1550
lock_acquire+0xf8/0x460
static_key_enable+0x1c/0x38
cpus_read_lock+0x68/0x130
static_key_enable+0x1c/0x38
__sched_setscheduler+0x900/0xad8
__sched_setscheduler+0x900/0xad8
__arm64_sys_sched_setattr+0x2e0/0x4f8
__arm64_sys_sched_setattr+0x2e0/0x4f8
el0_svc_common.constprop.4+0x84/0x200
do_el0_svc+0x34/0xa0
el0_svc_common.constprop.4+0x84/0x200
el0_sync_handler+0x16c/0x340
el0_sync+0x140/0x180
do_el0_svc+0x34/0xa0

other info that might help us debug this:

Possible unsafe locking scenario:

el0_sync_handler+0x16c/0x340
CPU0 CPU1
---- ----
el0_sync+0x140/0x180
lock(&rq->lock);
lock(cpu_hotplug_lock);
lock(&rq->lock);
lock(cpu_hotplug_lock);

*** DEADLOCK ***

3 locks held by l_0-0/730:
#0: ffff80001345b4d0 (&cpuset_rwsem){++++}-{0:0}, at: __sched_setscheduler+0x4c0/0xad8
#1: ffff00096e83b718 (&p->pi_lock){-.-.}-{2:2}, at: task_rq_lock+0x44/0xf0
#2: ffff00097ef4ca58 (&rq->lock){-.-.}-{2:2}, at: task_rq_lock+0x60/0xf0

stack backtrace:
CPU: 1 PID: 730 Comm: l_0-0 Tainted: G W 5.8.0-rc4-00040-g6345b3305877-dirty #864
Hardware name: ARM Juno development board (r2) (DT)
Call trace:
dump_backtrace+0x0/0x1f0
show_stack+0x2c/0x38
dump_stack+0xf0/0x170
print_circular_bug.isra.40+0x228/0x280
check_noncircular+0x14c/0x1b0
__lock_acquire+0x11a0/0x1550
lock_acquire+0xf8/0x460
cpus_read_lock+0x68/0x130
static_key_enable+0x1c/0x38
__sched_setscheduler+0x900/0xad8
__arm64_sys_sched_setattr+0x2e0/0x4f8
el0_svc_common.constprop.4+0x84/0x200
do_el0_svc+0x34/0xa0
el0_sync_handler+0x16c/0x340
el0_sync+0x140/0x180

Fix by ensuring we enable the key outside of the critical section in
__sched_setscheduler()

Fixes: 46609ce22703 ("sched/uclamp: Protect uclamp fast path code with static key")
Signed-off-by: Qais Yousef <qais.yousef@xxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
CC: Patrick Bellasi <patrick.bellasi@xxxxxxxxxx>
Cc: Chris Redpath <chris.redpath@xxxxxxx>
Cc: Lukasz Luba <lukasz.luba@xxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
kernel/sched/core.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e1578c3ad40c..947a1f4fa112 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1348,7 +1348,7 @@ static int uclamp_validate(struct task_struct *p,
return 0;
}

-static void __setscheduler_uclamp(struct task_struct *p,
+static bool __setscheduler_uclamp(struct task_struct *p,
const struct sched_attr *attr)
{
enum uclamp_id clamp_id;
@@ -1376,9 +1376,7 @@ static void __setscheduler_uclamp(struct task_struct *p,
}

if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
- return;
-
- static_branch_enable(&sched_uclamp_used);
+ return false;

if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
@@ -1389,6 +1387,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
attr->sched_util_max, true);
}
+
+ return true;
}

static void uclamp_fork(struct task_struct *p)
@@ -1465,8 +1465,11 @@ static inline int uclamp_validate(struct task_struct *p,
{
return -EOPNOTSUPP;
}
-static void __setscheduler_uclamp(struct task_struct *p,
- const struct sched_attr *attr) { }
+static bool __setscheduler_uclamp(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ return false;
+}
static inline void uclamp_fork(struct task_struct *p) { }
static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
@@ -5305,7 +5308,8 @@ static int __sched_setscheduler(struct task_struct *p,
prev_class = p->sched_class;

__setscheduler(rq, p, attr, pi);
- __setscheduler_uclamp(p, attr);
+
+ retval = __setscheduler_uclamp(p, attr);

if (queued) {
/*
@@ -5335,6 +5339,18 @@ static int __sched_setscheduler(struct task_struct *p,
balance_callback(rq);
preempt_enable();

+#ifdef CONFIG_UCLAMP_TASK
+ /*
+ * Enable uclamp static key outside the critical section.
+ * static_branch_enable() will hold cpu_hotplug_lock; if done from
+ * critical section above which holds other locks (rq->lock namely),
+ * it could lead to deadlock scenarios as both are popular locks and
+ * could be acquired from different paths in different orders.
+ */
+ if (retval)
+ static_branch_enable(&sched_uclamp_used);
+#endif
+
return 0;

unlock:
--
2.17.1