[PATCH 2/6] timers/migration: Abstract out hierarchy to prepare for CPU capacity awareness

From: Frederic Weisbecker

Date: Thu Apr 23 2026 - 12:55:06 EST


In order to prepare for separating out CPUs from different capacities in
distinct hierarchies, create a hierarchy structure that group setup
must rely upon.

Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx>
---
kernel/time/timer_migration.c | 100 +++++++++++++++++++++-------------
kernel/time/timer_migration.h | 10 ++++
2 files changed, 72 insertions(+), 38 deletions(-)

diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 1d0d3a4058d5..52e97b880b1c 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -102,7 +102,7 @@
* active CPU/group information atomic_try_cmpxchg() is used instead and only
* the per CPU tmigr_cpu->lock is held.
*
- * During the setup of groups tmigr_level_list is required. It is protected by
+ * During the setup of groups, hier->level_list is required. It is protected by
* @tmigr_mutex.
*
* When @timer_base->lock as well as tmigr related locks are required, the lock
@@ -416,13 +416,12 @@
*/

static DEFINE_MUTEX(tmigr_mutex);
-static struct list_head *tmigr_level_list __read_mostly;
+
+static struct tmigr_hierarchy *hierarchy;

static unsigned int tmigr_hierarchy_levels __read_mostly;
static unsigned int tmigr_crossnode_level __read_mostly;

-static struct tmigr_group *tmigr_root;
-
static DEFINE_PER_CPU(struct tmigr_cpu, tmigr_cpu);

/*
@@ -1653,14 +1652,15 @@ static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
group->groupevt.ignore = true;
}

-static struct tmigr_group *tmigr_get_group(int node, unsigned int lvl)
+static struct tmigr_group *tmigr_get_group(struct tmigr_hierarchy *hier,
+ int node, unsigned int lvl)
{
struct tmigr_group *tmp, *group = NULL;

lockdep_assert_held(&tmigr_mutex);

/* Try to attach to an existing group first */
- list_for_each_entry(tmp, &tmigr_level_list[lvl], list) {
+ list_for_each_entry(tmp, &hier->level_list[lvl], list) {
/*
* If @lvl is below the cross NUMA node level, check whether
* this group belongs to the same NUMA node.
@@ -1694,14 +1694,15 @@ static struct tmigr_group *tmigr_get_group(int node, unsigned int lvl)
tmigr_init_group(group, lvl, node);

/* Setup successful. Add it to the hierarchy */
- list_add(&group->list, &tmigr_level_list[lvl]);
+ list_add(&group->list, &hier->level_list[lvl]);
trace_tmigr_group_set(group);
return group;
}

-static bool tmigr_init_root(struct tmigr_group *group, bool activate)
+static bool tmigr_init_root(struct tmigr_hierarchy *hier,
+ struct tmigr_group *group, bool activate)
{
- if (!group->parent && group != tmigr_root) {
+ if (!group->parent && group != hier->root) {
/*
* This is the new top-level, prepare its groupmask in advance
* to avoid accidents where yet another new top-level is
@@ -1717,11 +1718,12 @@ static bool tmigr_init_root(struct tmigr_group *group, bool activate)

}

-static void tmigr_connect_child_parent(struct tmigr_group *child,
+static void tmigr_connect_child_parent(struct tmigr_hierarchy *hier,
+ struct tmigr_group *child,
struct tmigr_group *parent,
bool activate)
{
- if (tmigr_init_root(parent, activate)) {
+ if (tmigr_init_root(hier, parent, activate)) {
/*
* The previous top level had prepared its groupmask already,
* simply account it in advance as the first child. If some groups
@@ -1757,10 +1759,10 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
trace_tmigr_connect_child_parent(child);
}

-static int tmigr_setup_groups(unsigned int cpu, unsigned int node,
- struct tmigr_group *start, bool activate)
+static int tmigr_setup_groups(struct tmigr_hierarchy *hier, unsigned int cpu,
+ unsigned int node, struct tmigr_group *start, bool activate)
{
- struct tmigr_group *group, *child, **stack;
+ struct tmigr_group *root = hier->root, *group, *child, **stack;
int i, top = 0, err = 0, start_lvl = 0;
bool root_mismatch = false;

@@ -1773,11 +1775,11 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node,
start_lvl = start->level + 1;
}

- if (tmigr_root)
- root_mismatch = tmigr_root->numa_node != node;
+ if (root)
+ root_mismatch = root->numa_node != node;

for (i = start_lvl; i < tmigr_hierarchy_levels; i++) {
- group = tmigr_get_group(node, i);
+ group = tmigr_get_group(hier, node, i);
if (IS_ERR(group)) {
err = PTR_ERR(group);
i--;
@@ -1799,7 +1801,7 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node,
if (group->parent)
break;
if ((!root_mismatch || i >= tmigr_crossnode_level) &&
- list_is_singular(&tmigr_level_list[i]))
+ list_is_singular(&hier->level_list[i]))
break;
}

@@ -1827,7 +1829,7 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node,
tmc->tmgroup = group;
tmc->groupmask = BIT(group->num_children++);

- tmigr_init_root(group, activate);
+ tmigr_init_root(hier, group, activate);

trace_tmigr_connect_cpu_parent(tmc);

@@ -1835,7 +1837,7 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node,
continue;
} else {
child = stack[i - 1];
- tmigr_connect_child_parent(child, group, activate);
+ tmigr_connect_child_parent(hier, child, group, activate);
}
}

@@ -1894,15 +1896,15 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node,
}

/* Root update */
- if (list_is_singular(&tmigr_level_list[top])) {
- group = list_first_entry(&tmigr_level_list[top],
+ if (list_is_singular(&hier->level_list[top])) {
+ group = list_first_entry(&hier->level_list[top],
typeof(*group), list);
WARN_ON_ONCE(group->parent);
- if (tmigr_root) {
+ if (root) {
/* Old root should be the same or below */
- WARN_ON_ONCE(tmigr_root->level > top);
+ WARN_ON_ONCE(root->level > top);
}
- tmigr_root = group;
+ hier->root = group;
}
out:
kfree(stack);
@@ -1910,18 +1912,48 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node,
return err;
}

+static struct tmigr_hierarchy *tmigr_get_hierarchy(void)
+{
+ if (hierarchy)
+ return hierarchy;
+
+ hierarchy = kzalloc(sizeof(*hierarchy), GFP_KERNEL);
+ if (!hierarchy)
+ return ERR_PTR(-ENOMEM);
+
+ hierarchy->level_list = kzalloc_objs(struct list_head,
+ tmigr_hierarchy_levels);
+ if (!hierarchy->level_list) {
+ kfree(hierarchy);
+ hierarchy = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (int i = 0; i < tmigr_hierarchy_levels; i++)
+ INIT_LIST_HEAD(&hierarchy->level_list[i]);
+
+ return hierarchy;
+}
+
static int tmigr_add_cpu(unsigned int cpu)
{
- struct tmigr_group *old_root = tmigr_root;
+ struct tmigr_hierarchy *hier;
+ struct tmigr_group *old_root;
int node = cpu_to_node(cpu);
int ret;

guard(mutex)(&tmigr_mutex);

- ret = tmigr_setup_groups(cpu, node, NULL, false);
+ hier = tmigr_get_hierarchy();
+ if (IS_ERR(hier))
+ return PTR_ERR(hier);
+
+ old_root = hier->root;
+
+ ret = tmigr_setup_groups(hier, cpu, node, NULL, false);

/* Root has changed? Connect the old one to the new */
- if (ret >= 0 && old_root && old_root != tmigr_root) {
+ if (ret >= 0 && old_root && old_root != hier->root) {
/*
* The target CPU must never do the prepare work, except
* on early boot when the boot CPU is the target. Otherwise
@@ -1935,7 +1967,7 @@ static int tmigr_add_cpu(unsigned int cpu)
* otherwise the old root may not be active as expected.
*/
WARN_ON_ONCE(!per_cpu_ptr(&tmigr_cpu, raw_smp_processor_id())->available);
- ret = tmigr_setup_groups(-1, old_root->numa_node, old_root, true);
+ ret = tmigr_setup_groups(hier, -1, old_root->numa_node, old_root, true);
}

return ret;
@@ -1970,7 +2002,7 @@ static int tmigr_cpu_prepare(unsigned int cpu)

static int __init tmigr_init(void)
{
- unsigned int cpulvl, nodelvl, cpus_per_node, i;
+ unsigned int cpulvl, nodelvl, cpus_per_node;
unsigned int nnodes = num_possible_nodes();
unsigned int ncpus = num_possible_cpus();
int ret = -ENOMEM;
@@ -2017,14 +2049,6 @@ static int __init tmigr_init(void)
*/
tmigr_crossnode_level = cpulvl;

- tmigr_level_list = kzalloc_objs(struct list_head,
- tmigr_hierarchy_levels);
- if (!tmigr_level_list)
- goto err;
-
- for (i = 0; i < tmigr_hierarchy_levels; i++)
- INIT_LIST_HEAD(&tmigr_level_list[i]);
-
pr_info("Timer migration: %d hierarchy levels; %d children per group;"
" %d crossnode level\n",
tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP,
diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h
index 70879cde6fdd..77df422e5f9a 100644
--- a/kernel/time/timer_migration.h
+++ b/kernel/time/timer_migration.h
@@ -5,6 +5,16 @@
/* Per group capacity. Must be a power of 2! */
#define TMIGR_CHILDREN_PER_GROUP 8

+/**
+ * struct tmigr_hierarchy - a hierarchy associated to a given CPU capacity.
+ * @level_list: Per level lists of tmigr groups
+ * @root: The current root of the hierarchy
+ */
+struct tmigr_hierarchy {
+ struct list_head *level_list;
+ struct tmigr_group *root;
+};
+
/**
* struct tmigr_event - a timer event associated to a CPU
* @nextevt: The node to enqueue an event in the parent group queue
--
2.53.0