[PATCH 2/2] sched/core: fix cpu_hotplug_lock recursion in tg_set_cfs_bandwidth()

From: Konstantin Khlebnikov
Date: Wed Jan 03 2018 - 15:37:24 EST


After commit fc8dffd379ca ("cpu/hotplug: Convert hotplug locking to
percpu rwsem") get_online_cpus() must be non-recursive.

Cpu hotplug is already locked for read in tg_set_cfs_bandwidth() and
static_key_slow_inc() in cfs_bandwidth_usage_inc() locks it again.

Switch to cpus_read_[un]lock() and static_key_slow_inc/dec_cpuslocked().

This fixes lockdep warning:

============================================
WARNING: possible recursive locking detected
4.14.11-debug-test #5 Not tainted
--------------------------------------------
portod-worker27/4773 is trying to acquire lock:
(cpu_hotplug_lock.rw_sem){++++}, at: static_key_slow_inc+0xe/0x170

but task is already holding lock:
(cpu_hotplug_lock.rw_sem){++++}, at: tg_set_cfs_bandwidth+0xc6/0x890

other info that might help us debug this:
Possible unsafe locking scenario:

CPU0
----
lock(cpu_hotplug_lock.rw_sem);
lock(cpu_hotplug_lock.rw_sem);

*** DEADLOCK ***

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>
---
kernel/sched/core.c | 4 ++--
kernel/sched/fair.c | 4 ++--
kernel/sched/sched.h | 1 +
3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 644fa2e3d993..584832b49fdc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6448,7 +6448,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
* Prevent race between setting of cfs_rq->runtime_enabled and
* unthrottle_offline_cfs_rqs().
*/
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&cfs_constraints_mutex);
ret = __cfs_schedulable(tg, period, quota);
if (ret)
@@ -6491,7 +6491,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
cfs_bandwidth_usage_dec();
out_unlock:
mutex_unlock(&cfs_constraints_mutex);
- put_online_cpus();
+ cpus_read_unlock();

return ret;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2fe3aa853e4d..26a71ebcd3c2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4365,12 +4365,12 @@ static inline bool cfs_bandwidth_used(void)

void cfs_bandwidth_usage_inc(void)
{
- static_key_slow_inc(&__cfs_bandwidth_used);
+ static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
}

void cfs_bandwidth_usage_dec(void)
{
- static_key_slow_dec(&__cfs_bandwidth_used);
+ static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
}
#else /* HAVE_JUMP_LABEL */
static bool cfs_bandwidth_used(void)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b19552a212de..7dddc531ba63 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2007,6 +2007,7 @@ extern void init_cfs_rq(struct cfs_rq *cfs_rq);
extern void init_rt_rq(struct rt_rq *rt_rq);
extern void init_dl_rq(struct dl_rq *dl_rq);

+/* Must be called under cpus_read_lock() */
extern void cfs_bandwidth_usage_inc(void);
extern void cfs_bandwidth_usage_dec(void);