[PATCH] MIPS: SMP: Implement parallel CPU bring up for EyeQ
From: Gregory CLEMENT
Date: Sun Apr 13 2025 - 15:12:59 EST
Added support for starting CPUs in parallel on EyeQ to speed up boot time.
On EyeQ5, booting 8 CPUs is now ~90ms faster.
On EyeQ6, booting 32 CPUs is now ~650ms faster.
Signed-off-by: Gregory CLEMENT <gregory.clement@xxxxxxxxxxx>
---
Hello,
This patch allows CPUs to start in parallel. It has been tested on
EyeQ5 and EyeQ6, which are both MIPS64 and use the I6500 design. These
systems use CPS to support SMP.
As noted in the commit log, on EyeQ6, booting 32 CPUs is now ~650ms
faster.
Currently, this support is only for EyeQ SoC. However, it should also
work for other CPUs using CPS. I am less sure about MT ASE support,
but this patch can be a good starting point. If anyone wants to add
support for other systems, I can share some ideas, especially for the
MIPS_GENERIC setup that needs to handle both types of SMP setups.
Gregory
---
arch/mips/Kconfig | 2 ++
arch/mips/include/asm/topology.h | 3 +++
arch/mips/kernel/smp-cps.c | 2 ++
arch/mips/kernel/smp.c | 18 ++++++++++++++++++
4 files changed, 25 insertions(+)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index fc0772c1bad4ab736d440a18b972faf66a610783..e0e6ce2592b4168facf337b60fd889d76e81a407 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -617,6 +617,7 @@ config EYEQ
select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
+ select HOTPLUG_PARALLEL if SMP
help
Select this to build a kernel supporting EyeQ SoC from Mobileye.
@@ -2287,6 +2288,7 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPS_PM if HOTPLUG_CPU
select SMP
+ select HOTPLUG_SMT if HOTPLUG_PARALLEL
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h
index 0673d2d0f2e6dd02ed14d650e5af7b8a3c162b6f..5158c802eb6574d292f6ad2512cc7772fece4aae 100644
--- a/arch/mips/include/asm/topology.h
+++ b/arch/mips/include/asm/topology.h
@@ -16,6 +16,9 @@
#define topology_core_id(cpu) (cpu_core(&cpu_data[cpu]))
#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
#define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu])
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
#endif
#endif /* __ASM_TOPOLOGY_H */
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index e85bd087467e8caf0640ad247ee5f8eb65107591..02bbd7ecd1b9557003186b9d3d98ae17eac5eb9f 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -236,6 +236,7 @@ static void __init cps_smp_setup(void)
/* Use the number of VPEs in cluster 0 core 0 for smp_num_siblings */
if (!cl && !c)
smp_num_siblings = core_vpes;
+ cpumask_set_cpu(nvpes, &__cpu_primary_thread_mask);
for (v = 0; v < min_t(int, core_vpes, NR_CPUS - nvpes); v++) {
cpu_set_cluster(&cpu_data[nvpes + v], cl);
@@ -364,6 +365,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
cl = cpu_cluster(¤t_cpu_data);
c = cpu_core(¤t_cpu_data);
cluster_bootcfg = &mips_cps_cluster_bootcfg[cl];
+ cpu_smt_set_num_threads(core_vpes, core_vpes);
core_bootcfg = &cluster_bootcfg->core_config[c];
bitmap_set(cluster_bootcfg->core_power, cpu_core(¤t_cpu_data), 1);
atomic_set(&core_bootcfg->vpe_mask, 1 << cpu_vpe_id(¤t_cpu_data));
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 39e193cad2b9e4f877e920b71bbbb210e52607d0..1726744f2b2ec10a44420a7b9b9cd04f06c4d2f6 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -56,8 +56,10 @@ EXPORT_SYMBOL(cpu_sibling_map);
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_core_map);
+#ifndef CONFIG_HOTPLUG_PARALLEL
static DECLARE_COMPLETION(cpu_starting);
static DECLARE_COMPLETION(cpu_running);
+#endif
/*
* A logical cpu mask containing only one VPE per core to
@@ -74,6 +76,8 @@ static cpumask_t cpu_core_setup_map;
cpumask_t cpu_coherent_mask;
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
unsigned int smp_max_threads __initdata = UINT_MAX;
static int __init early_nosmt(char *s)
@@ -374,10 +378,15 @@ asmlinkage void start_secondary(void)
set_cpu_core_map(cpu);
cpumask_set_cpu(cpu, &cpu_coherent_mask);
+#ifdef CONFIG_HOTPLUG_PARALLEL
+ cpuhp_ap_sync_alive();
+#endif
notify_cpu_starting(cpu);
+#ifndef CONFIG_HOTPLUG_PARALLEL
/* Notify boot CPU that we're starting & ready to sync counters */
complete(&cpu_starting);
+#endif
synchronise_count_slave(cpu);
@@ -386,11 +395,13 @@ asmlinkage void start_secondary(void)
calculate_cpu_foreign_map();
+#ifndef CONFIG_HOTPLUG_PARALLEL
/*
* Notify boot CPU that we're up & online and it can safely return
* from __cpu_up
*/
complete(&cpu_running);
+#endif
/*
* irq will be enabled in ->smp_finish(), enabling it too early
@@ -447,6 +458,12 @@ void __init smp_prepare_boot_cpu(void)
set_cpu_online(0, true);
}
+#ifdef CONFIG_HOTPLUG_PARALLEL
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+ return mp_ops->boot_secondary(cpu, tidle);
+}
+#else
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int err;
@@ -466,6 +483,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
wait_for_completion(&cpu_running);
return 0;
}
+#endif
#ifdef CONFIG_PROFILING
/* Not really SMP stuff ... */
---
base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
change-id: 20250411-parallel-cpu-bringup-78999a9235ea
Best regards,
--
Grégory CLEMENT, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com