[PATCH v3 04/13] sched/isolation: Fix RCU protection for runtime-mutable cpumask callers
From: Jing Wu
Date: Wed Jun 17 2026 - 23:13:52 EST
housekeeping_update_types() installs new cpumasks via rcu_assign_pointer()
and frees the old ones after synchronize_rcu(); callers that dereference
the old pointer without holding an RCU read lock can access freed memory.
Fix the four call sites:
kernel/sched/core.c (get_nohz_timer_target, HK_TYPE_KERNEL_NOISE):
The guard(rcu)() was acquired after housekeeping_cpumask(). Move it
before the call and switch to housekeeping_cpumask_rcu() so hk_mask
is read inside the RCU read-side critical section. HK_TYPE_KERNEL_NOISE
is updated at runtime by housekeeping_update_types(); this fix is
required for correctness.
drivers/hv/channel_mgmt.c (init_vp_index, HK_TYPE_MANAGED_IRQ):
The function stored the raw pointer in a local variable and used it
across GFP_KERNEL allocations (which can sleep, so an RCU read lock
cannot span them). Allocate both cpumask_var_t buffers first, then
snapshot the housekeeping mask under a brief rcu_read_lock() and use
the snapshot throughout. HK_TYPE_MANAGED_IRQ is updated at runtime;
this fix is required for correctness.
kernel/time/hrtimer.c (get_target_base, HK_TYPE_TIMER):
cpumask_any_and() against housekeeping_cpumask(HK_TYPE_TIMER) was
called without any lock. Wrap with rcu_read_lock()/rcu_read_unlock()
and use housekeeping_cpumask_rcu(). HK_TYPE_TIMER is not changed at
runtime in this series; this is a defensive fix to satisfy the
housekeeping_dereference_check() lockdep annotation for future-proofing.
hrtimers_cpu_dying() is already safe: it runs under the cpu_hotplug_lock
write side, which housekeeping_dereference_check() already permits.
arch/arm64/kernel/topology.c (arch_freq_get_on_cpu, HK_TYPE_TICK):
cpumask_intersects() against housekeeping_cpumask(HK_TYPE_TICK) was
called without any lock. Evaluate under rcu_read_lock() and store
the boolean result before releasing the lock. HK_TYPE_TICK is not
changed at runtime in this series; this is a defensive fix.
Signed-off-by: Jing Wu <realwujing@xxxxxxxxx>
Signed-off-by: Qiliang Yuan <yuanql9@xxxxxxxxxxxxxxx>
---
arch/arm64/kernel/topology.c | 9 ++++++--
drivers/hv/channel_mgmt.c | 50 ++++++++++++++++++++++++++++++--------------
kernel/sched/core.c | 3 +--
kernel/time/hrtimer.c | 5 ++++-
4 files changed, 46 insertions(+), 21 deletions(-)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index b32f13358fbb1..8f4329b57cea7 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -212,8 +212,13 @@ int arch_freq_get_on_cpu(int cpu)
if (!policy)
return -EINVAL;
- if (!cpumask_intersects(policy->related_cpus,
- housekeeping_cpumask(HK_TYPE_TICK))) {
+ bool no_hk_in_policy;
+
+ rcu_read_lock();
+ no_hk_in_policy = !cpumask_intersects(policy->related_cpus,
+ housekeeping_cpumask_rcu(HK_TYPE_TICK));
+ rcu_read_unlock();
+ if (no_hk_in_policy) {
cpufreq_cpu_put(policy);
return -EOPNOTSUPP;
}
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 84eb0a6a0b546..fc5247e92e1b3 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -750,26 +750,43 @@ static void init_vp_index(struct vmbus_channel *channel)
{
bool perf_chn = hv_is_perf_channel(channel);
u32 i, ncpu = num_online_cpus();
- cpumask_var_t available_mask;
+ cpumask_var_t available_mask, hk_snap;
struct cpumask *allocated_mask;
- const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
u32 target_cpu;
int numa_node;
- if (!perf_chn ||
- !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
- cpumask_empty(hk_mask)) {
- /*
- * If the channel is not a performance critical
- * channel, bind it to VMBUS_CONNECT_CPU.
- * In case alloc_cpumask_var() fails, bind it to
- * VMBUS_CONNECT_CPU.
- * If all the cpus are isolated, bind it to
- * VMBUS_CONNECT_CPU.
- */
+ if (!perf_chn) {
+ channel->target_cpu = VMBUS_CONNECT_CPU;
+ return;
+ }
+
+ if (!alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
+ channel->target_cpu = VMBUS_CONNECT_CPU;
+ hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
+ return;
+ }
+
+ /*
+ * Snapshot HK_TYPE_MANAGED_IRQ cpumask under RCU read lock.
+ * housekeeping_update_types() frees the old cpumask after
+ * synchronize_rcu(), so we must not hold the pointer beyond an
+ * RCU read-side critical section.
+ */
+ if (!alloc_cpumask_var(&hk_snap, GFP_KERNEL)) {
+ free_cpumask_var(available_mask);
+ channel->target_cpu = VMBUS_CONNECT_CPU;
+ hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
+ return;
+ }
+ rcu_read_lock();
+ cpumask_copy(hk_snap, housekeeping_cpumask_rcu(HK_TYPE_MANAGED_IRQ));
+ rcu_read_unlock();
+
+ if (cpumask_empty(hk_snap)) {
+ free_cpumask_var(hk_snap);
+ free_cpumask_var(available_mask);
channel->target_cpu = VMBUS_CONNECT_CPU;
- if (perf_chn)
- hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
+ hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
return;
}
@@ -788,7 +805,7 @@ static void init_vp_index(struct vmbus_channel *channel)
retry:
cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
- cpumask_and(available_mask, available_mask, hk_mask);
+ cpumask_and(available_mask, available_mask, hk_snap);
if (cpumask_empty(available_mask)) {
/*
@@ -809,6 +826,7 @@ static void init_vp_index(struct vmbus_channel *channel)
channel->target_cpu = target_cpu;
+ free_cpumask_var(hk_snap);
free_cpumask_var(available_mask);
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8871449d3c69..371b509d92164 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1272,9 +1272,8 @@ int get_nohz_timer_target(void)
default_cpu = cpu;
}
- hk_mask = housekeeping_cpumask(HK_TYPE_KERNEL_NOISE);
-
guard(rcu)();
+ hk_mask = housekeeping_cpumask_rcu(HK_TYPE_KERNEL_NOISE);
for_each_domain(cpu, sd) {
for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5bd6efe598f0f..18e17a9dad67b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -242,8 +242,11 @@ static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_
static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, bool pinned)
{
if (!hrtimer_base_is_online(base)) {
- int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER));
+ int cpu;
+ rcu_read_lock();
+ cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask_rcu(HK_TYPE_TIMER));
+ rcu_read_unlock();
return &per_cpu(hrtimer_bases, cpu);
}
--
2.43.0