[PATCH v19 18/20] x86/resctrl: Enable RMID shared RMID mode on Sub-NUMA Cluster (SNC) systems

From: Tony Luck
Date: Tue May 28 2024 - 18:23:22 EST


There is an MSR which configures how RMIDs are distributed across SNC
nodes. When SNC is enabled bit 0 of this MSR must be cleared.

Add an architecture specific hook into domain_add_cpu_mon() to call
a function to set the MSR.

Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kernel/cpu/resctrl/internal.h | 2 ++
arch/x86/kernel/cpu/resctrl/core.c | 2 ++
arch/x86/kernel/cpu/resctrl/monitor.c | 26 ++++++++++++++++++++++++++
4 files changed, 31 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e022e6eb766c..3cb8dd6311c3 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1164,6 +1164,7 @@
#define MSR_IA32_QM_CTR 0xc8e
#define MSR_IA32_PQR_ASSOC 0xc8f
#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
#define MSR_IA32_L2_CBM_BASE 0xd10
#define MSR_IA32_MBA_THRTL_BASE 0xd50

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 7957fc38b71c..08520321f5d0 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -532,6 +532,8 @@ static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)

int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);

+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
+
/*
* Get the cacheinfo structure of the cache associated with @cpu at level @level.
* cpuhp lock must be held.
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 95ef8fe3cb50..1930fce9dfe9 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -615,6 +615,8 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
}
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);

+ arch_mon_domain_online(r, d);
+
if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
mon_domain_free(hw_dom);
return;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index e7a8e96821e5..c7559735e33a 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -1069,6 +1069,32 @@ static void l3_mon_evt_init(struct rdt_resource *r)
list_add_tail(&mbm_local_event.list, &r->evt_list);
}

+/*
+ * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1
+ * which indicates that RMIDs are configured in legacy mode.
+ * This mode is incompatible with Linux resctrl semantics
+ * as RMIDs are partitioned between SNC nodes, which requires
+ * a user to know which RMID is allocated to a task.
+ * Clearing bit 0 reconfigures the RMID counters for use
+ * in Sub-NUMA Cluster mode. This mode is better for Linux.
+ * The RMID space is divided between all SNC nodes with the
+ * RMIDs renumbered to start from zero in each node when
+ * counting operations from tasks. Code to read the counters
+ * must adjust RMID counter numbers based on SNC node. See
+ * logical_rmid_to_physical_rmid() for code that does this.
+ */
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
+{
+ u64 val;
+
+ if (snc_nodes_per_l3_cache == 1)
+ return;
+
+ rdmsrl(MSR_RMID_SNC_CONFIG, val);
+ val &= ~BIT_ULL(0);
+ wrmsrl(MSR_RMID_SNC_CONFIG, val);
+}
+
int __init rdt_get_mon_l3_config(struct rdt_resource *r)
{
unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
--
2.45.0