[RFC PATCH] x86/topo: Unify srat_detect_node among amd/intel/hygon
From: Nikola Z. Ivanov
Date: Sun Mar 29 2026 - 08:09:05 EST
This change is provoked by an observed warning after
commit 717b64d58cff ("x86/topo: Replace x86_has_numa_in_package")
when faking numa nodes on intel.
For example:
qemu-system-x86_64 \
-kernel arch/x86/boot/bzImage \
-append "console=ttyS0 root=/dev/sda debug numa=fake=2" \
-hda $IMAGES/unstable.img \
-cpu qemu64,vendor=GenuineIntel \
-nographic \
-m 2G \
-smp 2 \
Will trigger:
[ 0.066755][ T0] ------------[ cut here ]------------
[ 0.066755][ T0] WARNING: arch/x86/kernel/smpboot.c:698 at
set_cpu_sibling_map+0xe41/0x1f90, CPU#1: swapper/1/0
[ 0.066755][ T0] Call Trace:
[ 0.066755][ T0] <TASK>
[ 0.066755][ T0] ap_starting+0x9e/0x140
[ 0.066755][ T0] ? __pfx_ap_starting+0x10/0x10
[ 0.066755][ T0] ? fpu__init_cpu_xstate+0x5c/0x320
[ 0.066755][ T0] start_secondary+0x66/0x110
[ 0.066755][ T0] common_startup_64+0x13e/0x147
[ 0.066755][ T0] </TASK>
smpboot.c suggests that the topology is invalid as
the CPUs are in the same package but different nodes.
Fix this by unifying the srat_detect_node function
among amd/intel/hygon and taking the amd/hygon approach
of falling back to LLC when SRAT is not detected.
Place the function inside common.c and expose it in topology.h
The hygon code is already basically identical to amd
except for the way it obtains the LLC ID.
We can reuse that from the hygon code since we
already have the struct cpuinfo_x86 passed to us.
Signed-off-by: Nikola Z. Ivanov <zlatistiv@xxxxxxxxx>
---
This is marked RFC as I lack the context for the reason
why the intel code looks the way it does. I can see
it went through a few changes in the 2008-2010 year range,
which makes be believe that the comment regarding
"not doing AMD heuristics for now" is long overdue.
Also is a merge like this even desired in the first place?
Any feedback is appreciated!
arch/x86/kernel/cpu/amd.c | 74 ------------------------------------
arch/x86/kernel/cpu/common.c | 74 ++++++++++++++++++++++++++++++++++++
arch/x86/kernel/cpu/hygon.c | 73 -----------------------------------
arch/x86/kernel/cpu/intel.c | 17 ---------
include/linux/topology.h | 1 +
5 files changed, 75 insertions(+), 164 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 09de584e4c8f..7a4c804e6836 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -276,80 +276,6 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
#endif
}
-#ifdef CONFIG_NUMA
-/*
- * To workaround broken NUMA config. Read the comment in
- * srat_detect_node().
- */
-static int nearby_node(int apicid)
-{
- int i, node;
-
- for (i = apicid - 1; i >= 0; i--) {
- node = __apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- node = __apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- return first_node(node_online_map); /* Shouldn't happen */
-}
-#endif
-
-static void srat_detect_node(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_NUMA
- int cpu = smp_processor_id();
- int node;
- unsigned apicid = c->topo.apicid;
-
- node = numa_cpu_node(cpu);
- if (node == NUMA_NO_NODE)
- node = per_cpu_llc_id(cpu);
-
- /*
- * On multi-fabric platform (e.g. Numascale NumaChip) a
- * platform-specific handler needs to be called to fixup some
- * IDs of the CPU.
- */
- if (x86_cpuinit.fixup_cpu_id)
- x86_cpuinit.fixup_cpu_id(c, node);
-
- if (!node_online(node)) {
- /*
- * Two possibilities here:
- *
- * - The CPU is missing memory and no node was created. In
- * that case try picking one from a nearby CPU.
- *
- * - The APIC IDs differ from the HyperTransport node IDs
- * which the K8 northbridge parsing fills in. Assume
- * they are all increased by a constant offset, but in
- * the same order as the HT nodeids. If that doesn't
- * result in a usable node fall back to the path for the
- * previous case.
- *
- * This workaround operates directly on the mapping between
- * APIC ID and NUMA node, assuming certain relationship
- * between APIC ID, HT node ID and NUMA topology. As going
- * through CPU mapping may alter the outcome, directly
- * access __apicid_to_node[].
- */
- int ht_nodeid = c->topo.initial_apicid;
-
- if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = __apicid_to_node[ht_nodeid];
- /* Pick a nearby node */
- if (!node_online(node))
- node = nearby_node(apicid);
- }
- numa_set_node(cpu, node);
-#endif
-}
-
static void bsp_determine_snp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a8ff4376c286..05fcfa7a5cb5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2496,6 +2496,80 @@ void cpu_init(void)
load_fixmap_gdt(cpu);
}
+#ifdef CONFIG_NUMA
+/*
+ * To workaround broken NUMA config. Read the comment in
+ * srat_detect_node().
+ */
+static int nearby_node(int apicid)
+{
+ int i, node;
+
+ for (i = apicid - 1; i >= 0; i--) {
+ node = __apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+ node = __apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
+void srat_detect_node(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_NUMA
+ int cpu = smp_processor_id();
+ int node;
+ unsigned int apicid = c->topo.apicid;
+
+ node = numa_cpu_node(cpu);
+ if (node == NUMA_NO_NODE)
+ node = c->topo.llc_id;
+
+ /*
+ * On multi-fabric platform (e.g. Numascale NumaChip) a
+ * platform-specific handler needs to be called to fixup some
+ * IDs of the CPU.
+ */
+ if (x86_cpuinit.fixup_cpu_id)
+ x86_cpuinit.fixup_cpu_id(c, node);
+
+ if (!node_online(node)) {
+ /*
+ * Two possibilities here:
+ *
+ * - The CPU is missing memory and no node was created. In
+ * that case try picking one from a nearby CPU.
+ *
+ * - The APIC IDs differ from the HyperTransport node IDs
+ * which the K8 northbridge parsing fills in. Assume
+ * they are all increased by a constant offset, but in
+ * the same order as the HT nodeids. If that doesn't
+ * result in a usable node fall back to the path for the
+ * previous case.
+ *
+ * This workaround operates directly on the mapping between
+ * APIC ID and NUMA node, assuming certain relationship
+ * between APIC ID, HT node ID and NUMA topology. As going
+ * through CPU mapping may alter the outcome, directly
+ * access __apicid_to_node[].
+ */
+ int ht_nodeid = c->topo.initial_apicid;
+
+ if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = __apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
+ }
+ numa_set_node(cpu, node);
+#endif
+}
+
#ifdef CONFIG_MICROCODE_LATE_LOADING
/**
* store_cpu_caps() - Store a snapshot of CPU capabilities
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 7f95a74e4c65..a33735094843 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -20,79 +20,6 @@
#include "cpu.h"
-#ifdef CONFIG_NUMA
-/*
- * To workaround broken NUMA config. Read the comment in
- * srat_detect_node().
- */
-static int nearby_node(int apicid)
-{
- int i, node;
-
- for (i = apicid - 1; i >= 0; i--) {
- node = __apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- node = __apicid_to_node[i];
- if (node != NUMA_NO_NODE && node_online(node))
- return node;
- }
- return first_node(node_online_map); /* Shouldn't happen */
-}
-#endif
-
-static void srat_detect_node(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_NUMA
- int cpu = smp_processor_id();
- int node;
- unsigned int apicid = c->topo.apicid;
-
- node = numa_cpu_node(cpu);
- if (node == NUMA_NO_NODE)
- node = c->topo.llc_id;
-
- /*
- * On multi-fabric platform (e.g. Numascale NumaChip) a
- * platform-specific handler needs to be called to fixup some
- * IDs of the CPU.
- */
- if (x86_cpuinit.fixup_cpu_id)
- x86_cpuinit.fixup_cpu_id(c, node);
-
- if (!node_online(node)) {
- /*
- * Two possibilities here:
- *
- * - The CPU is missing memory and no node was created. In
- * that case try picking one from a nearby CPU.
- *
- * - The APIC IDs differ from the HyperTransport node IDs.
- * Assume they are all increased by a constant offset, but
- * in the same order as the HT nodeids. If that doesn't
- * result in a usable node fall back to the path for the
- * previous case.
- *
- * This workaround operates directly on the mapping between
- * APIC ID and NUMA node, assuming certain relationship
- * between APIC ID, HT node ID and NUMA topology. As going
- * through CPU mapping may alter the outcome, directly
- * access __apicid_to_node[].
- */
- int ht_nodeid = c->topo.initial_apicid;
-
- if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = __apicid_to_node[ht_nodeid];
- /* Pick a nearby node */
- if (!node_online(node))
- node = nearby_node(apicid);
- }
- numa_set_node(cpu, node);
-#endif
-}
-
static void bsp_init_hygon(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 646ff33c4651..12eeacb0de4b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -467,23 +467,6 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
}
#endif
-static void srat_detect_node(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_NUMA
- unsigned node;
- int cpu = smp_processor_id();
-
- /* Don't do the funky fallback heuristics the AMD version employs
- for now. */
- node = numa_cpu_node(cpu);
- if (node == NUMA_NO_NODE || !node_online(node)) {
- /* reuse the value from init_cpu_to_node() */
- node = cpu_to_node(cpu);
- }
- numa_set_node(cpu, node);
-#endif
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 6575af39fd10..9f71ad8a6983 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -41,6 +41,7 @@
#endif
int arch_update_cpu_topology(void);
+void srat_detect_node(struct cpuinfo_x86 *c);
/* Conform to ACPI 2.0 SLIT distance definitions */
#define LOCAL_DISTANCE 10
--
2.53.0