[RFC PATCH v1] stop_machine: Add stop_housekeeping_cpuslocked()
From: Costa Shulyupin
Date: Wed Dec 18 2024 - 12:16:09 EST
CPU hotplug interferes with CPU isolation and introduces latency to
real-time tasks.
The test:
rtla timerlat hist -c 1 -a 500 &
echo 0 > /sys/devices/system/cpu/cpu2/online
The RTLA tool reveals the following blocking thread stack trace:
-> multi_cpu_stop
-> cpu_stopper_thread
-> smpboot_thread_fn
This happens because multi_cpu_stop() disables interrupts for EACH online
CPU since takedown_cpu() indirectly invokes take_cpu_down() through
stop_machine_cpuslocked(). I'm omitting the detailed description of the
call chain.
Proposal: Limit the stop operation to housekeeping CPUs.
take_cpu_down() invokes with cpuhp_invoke_callback_range_nofail:
- tick_cpu_dying()
- hrtimers_cpu_dying()
- smpcfd_dying_cpu()
- x86_pmu_dying_cpu()
- rcutree_dying_cpu()
- sched_cpu_dying()
- cache_ap_offline()
Which synchronizations do these functions require instead of stop_machine?
Passed standard regression tests:
- https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6042
- https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/89
Passes primitive CPU hotplug test without warnings.
What tests and test suites do you recommend?
Signed-off-by: Costa Shulyupin <costa.shul@xxxxxxxxxx>
---
include/linux/stop_machine.h | 12 ++++++++++++
kernel/cpu.c | 2 +-
kernel/stop_machine.c | 23 +++++++++++++++++++++++
3 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 3132262a404dc..4c9e709f174c8 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -140,6 +140,18 @@ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *
*/
int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data);
+/**
+ * stop_housekeeping_cpuslocked: freeze housekeeping CPUs and run specified function.
+ * Unlike stop_machine_cpuslocked, it doesn't stop isolated CPUs.
+ * @fn: the function to run
+ * @data: the data ptr for the @fn()
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
+ *
+ * Must be called from with in a cpus_read_lock() protected
+ * region. Avoids nested calls to cpus_read_lock().
+ */
+int stop_housekeeping_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
+
int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6d7c51e7c366c..052f48bebf816 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1310,7 +1310,7 @@ static int takedown_cpu(unsigned int cpu)
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
- err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
+ err = stop_housekeeping_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
/* CPU refused to die */
irq_unlock_sparse();
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index da821ce258ea7..1f36e44d42476 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -22,6 +22,7 @@
#include <linux/atomic.h>
#include <linux/nmi.h>
#include <linux/sched/wake_q.h>
+#include <linux/sched/isolation.h>
/*
* Structure to determine completion condition and record errors. May
@@ -619,6 +620,28 @@ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
}
+int stop_housekeeping_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
+{
+ struct multi_stop_data msdata = { .fn = fn, .data = data, .active_cpus = cpus };
+ cpumask_var_t stop_mask;
+ int ret;
+
+ lockdep_assert_cpus_held();
+
+ if (!alloc_cpumask_var(&stop_mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_and(stop_mask, cpu_online_mask, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ));
+ cpumask_or(stop_mask, stop_mask, cpus);
+ msdata.num_threads = cpumask_weight(stop_mask);
+
+ /* Set the initial state and stop online housekeeping cpus. */
+ set_state(&msdata, MULTI_STOP_PREPARE);
+ ret = stop_cpus(stop_mask, multi_cpu_stop, &msdata);
+ free_cpumask_var(stop_mask);
+
+ return ret;
+}
+
int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
{
int ret;
--
2.47.0