Re: [PATCH] sched/debug: initialize sd_sysctl_cpus if !CONFIG_CPUMASK_OFFSTACK

From: Peter Zijlstra
Date: Wed Jan 30 2019 - 15:14:06 EST


On Tue, Jan 29, 2019 at 10:12:45AM -0500, Masayoshi Mizuma wrote:
> From: Hidetoshi Seto <seto.hidetoshi@xxxxxxxxxxxxxx>
>
> register_sched_domain_sysctl() copies the cpu_possible_mask into
> sd_sysctl_cpus, but only if sd_sysctl_cpus hasn't already been
> allocated (ie, CONFIG_CPUMASK_OFFSTACK is set). However, when
> CONFIG_CPUMASK_OFFSTACK is not set, sd_sysctl_cpus is left uninitialized
> (all zeroes) and the kernel may fail to initialize sched_domain sysctl
> entries for all possible cpus.
>
> This is visible to the user if the kernel is booted with maxcpus=n, or
> if ACPI tables have been modified to leave cpus offline, and then
> checking for missing /proc/sys/kernel/sched_domain/cpu* entries.
>
> Fix this by separating the allocataion and initialization, and adding
> a flag to initialize the possible cpu entries while system booting only.
>
> Signed-off-by: Hidetoshi Seto <seto.hidetoshi@xxxxxxxxxxxxxx>
> Reviewed-by: Masayoshi Mizuma <m.mizuma@xxxxxxxxxxxxxx>
> Tested-by: Syuuichirou Ishii <ishii.shuuichir@xxxxxxxxxxxxxx>
> Tested-by: Tarumizu, Kohei <tarumizu.kohei@xxxxxxxxxxxxxx>
> ---
> kernel/sched/debug.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index de3de997e245..9c6637f3e21d 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -310,6 +310,7 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
>
> static cpumask_var_t sd_sysctl_cpus;
> static struct ctl_table_header *sd_sysctl_header;
> +static int register_sched_domain_sysctl_on_boot = 1;
>
> void register_sched_domain_sysctl(void)
> {
> @@ -344,9 +345,12 @@ void register_sched_domain_sysctl(void)
> if (!cpumask_available(sd_sysctl_cpus)) {
> if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
> return;
> + }
>
> + if (register_sched_domain_sysctl_on_boot) {
> /* init to possible to not have holes in @cpu_entries */
> cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
> + register_sched_domain_sysctl_on_boot = 0;
> }
>
> for_each_cpu(i, sd_sysctl_cpus) {

I change it like the below. By keeping the initial value 0 it can go
into .bss instead of .data.

--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -315,6 +315,7 @@ void register_sched_domain_sysctl(void)
{
static struct ctl_table *cpu_entries;
static struct ctl_table **cpu_idx;
+ static bool init_done = false;
char buf[32];
int i;

@@ -344,7 +345,10 @@ void register_sched_domain_sysctl(void)
if (!cpumask_available(sd_sysctl_cpus)) {
if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
return;
+ }

+ if (!init_done) {
+ init_done = true;
/* init to possible to not have holes in @cpu_entries */
cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
}