Re: Linux 5.14

From: Heiko Carstens
Date: Tue Aug 31 2021 - 07:05:07 EST


On Mon, Aug 30, 2021 at 11:28:54PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 30, 2021 at 01:15:37PM -0700, Linus Torvalds wrote:
> > On Mon, Aug 30, 2021 at 1:12 PM Guenter Roeck <linux@xxxxxxxxxxxx> wrote:
> > >
> > > So far so good, but there is a brand new runtime warning, seen when booting
> > > s390 images.
> > >
> > > [ 3.218816] ------------[ cut here ]------------
> > > [ 3.219010] WARNING: CPU: 1 PID: 0 at kernel/sched/core.c:5779 sched_core_cpu_starting+0x172/0x180
> > > [ 3.222845] Call Trace:
> > > [ 3.222992] [<0000000000186e86>] sched_core_cpu_starting+0x176/0x180
> > > [ 3.223114] ([<0000000000186dc4>] sched_core_cpu_starting+0xb4/0x180)
> > > [ 3.223182] [<00000000001963e4>] sched_cpu_starting+0x2c/0x68
> > > [ 3.223243] [<000000000014f288>] cpuhp_invoke_callback+0x318/0x970
> > > [ 3.223304] [<000000000014f970>] cpuhp_invoke_callback_range+0x90/0x108
> > > [ 3.223364] [<000000000015123c>] notify_cpu_starting+0x84/0xa8
> > > [ 3.223426] [<0000000000117bca>] smp_init_secondary+0x72/0xf0
> > > [ 3.223492] [<0000000000117846>] smp_start_secondary+0x86/0x90
> > >
> > > Commit 3c474b3239f12 ("sched: Fix Core-wide rq->lock for uninitialized
> > > CPUs") seems to be the culprit. Indeed, the warning is gone after reverting
> > > this commit.
> >
> > Ouch, not great timing.
> >
> > Adding the s390 people to the cc too, just to make sure everybody
> > involved is aware.
>
> 'Funny' thing, Sven actually tested that on s390. I had already comitted
> the patch which is why his tag isn't on the commit:
>
> https://lkml.kernel.org/r/yt9dy28o8q0o.fsf@xxxxxxxxxxxxx
>
> Anyway, looks like Thomas found something fishy in their topology code.
> Lemme go catch up.

Sven provided the patch below which should fix the topology problem.
If it fixes everything it will go upstream with a stable tag, but it
first needs to see our CI to hopefully make sure it doesn't introduce
new regressions.

From: Sven Schnelle <svens@xxxxxxxxxxxxx>
Subject: [PATCH] s390: fix topology information when calling cpu hotplug notifiers

The cpu hotplug notifiers are called without updating the core/thread
masks when a new CPU is added. This causes problems with code setting
up data structures in a cpu hotplug notifier, and relying on that later
in normal code.

This caused a crash in the new core scheduling code (SCHED_CORE),
where rq->core was set up in a notifier depending on cpu masks.

To fix this, add a cpu_setup_mask which is used in update_cpu_masks()
instead of the cpu_online_mask to determine whether the cpu masks should
be set for a certain cpu. Also move update_cpu_masks() to update the
masks before calling notify_cpu_starting() so that the notifiers are
seeing the updated masks.

Signed-off-by: Sven Schnelle <svens@xxxxxxxxxxxxx>
---
arch/s390/include/asm/smp.h | 1 +
arch/s390/kernel/smp.c | 9 +++++++--
arch/s390/kernel/topology.c | 10 +++++-----
3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index e317fd4866c1..f16f4d054ae2 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -18,6 +18,7 @@ extern struct mutex smp_cpu_state_mutex;
extern unsigned int smp_cpu_mt_shift;
extern unsigned int smp_cpu_mtid;
extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+extern cpumask_t cpu_setup_mask;

extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2a991e43ead3..1a04e5bdf655 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -95,6 +95,7 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
#endif

static unsigned int smp_max_threads __initdata = -1U;
+cpumask_t cpu_setup_mask;

static int __init early_nosmt(char *s)
{
@@ -902,13 +903,14 @@ static void smp_start_secondary(void *cpuvoid)
vtime_init();
vdso_getcpu_init();
pfault_init();
+ cpumask_set_cpu(cpu, &cpu_setup_mask);
+ update_cpu_masks();
notify_cpu_starting(cpu);
if (topology_cpu_dedicated(cpu))
set_cpu_flag(CIF_DEDICATED_CPU);
else
clear_cpu_flag(CIF_DEDICATED_CPU);
set_cpu_online(cpu, true);
- update_cpu_masks();
inc_irq_stat(CPU_RST);
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -950,10 +952,13 @@ early_param("possible_cpus", _setup_possible_cpus);
int __cpu_disable(void)
{
unsigned long cregs[16];
+ int cpu;

/* Handle possible pending IPIs */
smp_handle_ext_call();
- set_cpu_online(smp_processor_id(), false);
+ cpu = smp_processor_id();
+ set_cpu_online(cpu, false);
+ cpumask_clear_cpu(cpu, &cpu_setup_mask);
update_cpu_masks();
/* Disable pseudo page faults on this cpu. */
pfault_fini();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index d2458a29618f..5cc7aeae4610 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -67,9 +67,8 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
static cpumask_t mask;

cpumask_clear(&mask);
- if (!cpu_online(cpu))
+ if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
goto out;
- cpumask_set_cpu(cpu, &mask);
switch (topology_mode) {
case TOPOLOGY_MODE_HW:
while (info) {
@@ -89,6 +88,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
break;
}
cpumask_and(&mask, &mask, cpu_online_mask);
+ cpumask_set_cpu(cpu, &mask);
out:
cpumask_copy(dst, &mask);
}
@@ -99,16 +99,15 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
int i;

cpumask_clear(&mask);
- if (!cpu_online(cpu))
+ if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
goto out;
cpumask_set_cpu(cpu, &mask);
if (topology_mode != TOPOLOGY_MODE_HW)
goto out;
cpu -= cpu % (smp_cpu_mtid + 1);
for (i = 0; i <= smp_cpu_mtid; i++)
- if (cpu_present(cpu + i))
+ if (cpu_online(cpu + i))
cpumask_set_cpu(cpu + i, &mask);
- cpumask_and(&mask, &mask, cpu_online_mask);
out:
cpumask_copy(dst, &mask);
}
@@ -569,6 +568,7 @@ void __init topology_init_early(void)
alloc_masks(info, &book_info, 2);
alloc_masks(info, &drawer_info, 3);
out:
+ cpumask_set_cpu(0, &cpu_setup_mask);
__arch_update_cpu_topology();
__arch_update_dedicated_flag(NULL);
}
--
2.25.1