Hi, Pierre,
Yes right indeed,
This happens when putting a CPU offline (as you mentioned earlier,
putting a CPU offline clears the CPU in the idle_cpus_mask).
The load balancing related variables
including?
are unused if a CPU has a NULL
rq as it cannot pull any task. Ideally we should clear them once,
when attaching a NULL sd to the CPU.
This sounds good to me. But TBH, I don't have enough confidence to do
so because I'm not crystal clear about how these variables are used.
Some questions about the code below.
if we force clearing rq->nohz_tick_stopped when detaching domain, why
The following snipped should do that and solve the issue you
mentioned:
--- snip ---
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -9,8 +9,10 @@
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void nohz_balance_enter_idle(int cpu);
extern int get_nohz_timer_target(void);
+extern void nohz_clean_sd_state(int cpu);
#else
static inline void nohz_balance_enter_idle(int cpu) { }
+static inline void nohz_clean_sd_state(int cpu) { }
#endif
#ifdef CONFIG_NO_HZ_COMMON
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b3e25be58e2b..6fcabe5d08f5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11525,6 +11525,9 @@ void nohz_balance_exit_idle(struct rq *rq)
{
SCHED_WARN_ON(rq != this_rq());
+ if (on_null_domain(rq))
+ return;
+
if (likely(!rq->nohz_tick_stopped))
return;
bother adding the first check?
@@ -11551,6 +11554,17 @@ static void set_cpu_sd_state_idle(int cpu)
rcu_read_unlock();
}
+void nohz_clean_sd_state(int cpu) {
+ struct rq *rq = cpu_rq(cpu);
+
+ rq->nohz_tick_stopped = 0;
+ if (cpumask_test_cpu(cpu, nohz.idle_cpus_mask)) {
+ cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
+ atomic_dec(&nohz.nr_cpus);
+ }
+ set_cpu_sd_state_idle(cpu);
+}
+
detach_destroy_domains
cpu_attach_domain
update_top_cache_domain
as we clears per_cpu(sd_llc, cpu) for the isolated cpu in
cpu_attach_domain(), set_cpu_sd_state_idle() seems to be a no-op here,
no?
thanks,
rui
/*
* This routine will record that the CPU is going idle with tick
stopped.
* This info will be used in performing idle load balancing in the
future.
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d3a3b2646ec4..d31137b5f0ce 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2584,8 +2584,10 @@ static void detach_destroy_domains(const
struct cpumask *cpu_map)
static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
rcu_read_lock();
- for_each_cpu(i, cpu_map)
+ for_each_cpu(i, cpu_map) {
cpu_attach_domain(NULL, &def_root_domain, i);
+ nohz_clean_sd_state(i);
+ }
rcu_read_unlock();
}
--- snip ---
Regards,
Pierre
+ }
+
/*
* The tick is still stopped but load could have been
added in the
* meantime. We set the nohz.has_blocked flag to trig
a
check of the
@@ -11585,10 +11609,6 @@ void nohz_balance_enter_idle(int cpu)
if (rq->nohz_tick_stopped)
goto out;
- /* If we're a completely isolated CPU, we don't play:
*/
- if (on_null_domain(rq))
- return;
-
rq->nohz_tick_stopped = 1;
cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
Otherwise I could reproduce the issue and the patch was solving
it,
so:
Tested-by: Pierre Gondois <pierre.gondois@xxxxxxx>
Thanks for testing, really appreciated!
Interesting. Let me check and get back to you later on this. :)
Also, your patch doesn't aim to solve that, but I think there
is an
issue
when updating cpuset.cpus when an isolated partition was
already
created:
// Create an isolated partition containing CPU0
# mkdir cgroup
# mount -t cgroup2 none cgroup/
# mkdir cgroup/Testing
# echo "+cpuset" > cgroup/cgroup.subtree_control
# echo "+cpuset" > cgroup/Testing/cgroup.subtree_control
# echo 0 > cgroup/Testing/cpuset.cpus
# echo isolated > cgroup/Testing/cpuset.cpus.partition
// CPU0's sched domain is detached:
# ls /sys/kernel/debug/sched/domains/cpu0/
# ls /sys/kernel/debug/sched/domains/cpu1/
domain0 domain1
// Change the isolated partition to be CPU1
# echo 1 > cgroup/Testing/cpuset.cpus
// CPU[0-1] sched domains are not updated:
# ls /sys/kernel/debug/sched/domains/cpu0/
# ls /sys/kernel/debug/sched/domains/cpu1/
domain0 domain1
thanks,
rui