[PATCH v7 1/4] x86/resctrl: Make input event for MBA Software Controller configurable
From: Tony Luck
Date: Thu Oct 03 2024 - 15:12:45 EST
The MBA Software Controller(mba_sc) is a feedback loop that uses
measurements of local memory bandwidth to adjust MBA throttling levels
to keep workloads in a resctrl group within a target bandwidth set in
the schemata file.
Users may want to use total memory bandwidth instead of local to handle
workloads that have poor NUMA localization.
Update the once-per-second polling code to pick a chosen event (local
or total memory bandwidth).
Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
include/linux/resctrl.h | 2 +
arch/x86/kernel/cpu/resctrl/monitor.c | 80 ++++++++++++--------------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +
3 files changed, 40 insertions(+), 44 deletions(-)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index d94abba1c716..ccb0f50dc18c 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -161,6 +161,7 @@ enum membw_throttle_mode {
* @throttle_mode: Bandwidth throttling mode when threads request
* different memory bandwidths
* @mba_sc: True if MBA software controller(mba_sc) is enabled
+ * @mba_mbps_event: Monitoring event guiding feedback loop when @mba_sc is true
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct resctrl_membw {
@@ -170,6 +171,7 @@ struct resctrl_membw {
bool arch_needs_linear;
enum membw_throttle_mode throttle_mode;
bool mba_sc;
+ enum resctrl_event_id mba_mbps_event;
u32 *mb_map;
};
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 851b561850e0..2692ce7f708e 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -663,10 +663,11 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
- u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
- struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;
+ struct mbm_state *m;
+ m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
+ WARN_ON(!m);
cur_bytes = rr->val;
bytes = cur_bytes - m->prev_bw_bytes;
m->prev_bw_bytes = cur_bytes;
@@ -752,20 +753,22 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
struct rdt_ctrl_domain *dom_mba;
+ enum resctrl_event_id evt_id;
struct rdt_resource *r_mba;
- u32 cur_bw, user_bw, idx;
struct list_head *head;
struct rdtgroup *entry;
+ u32 cur_bw, user_bw;
- if (!is_mbm_local_enabled())
+ if (!is_mbm_enabled())
return;
r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ evt_id = r_mba->membw.mba_mbps_event;
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- idx = resctrl_arch_rmid_idx_encode(closid, rmid);
- pmbm_data = &dom_mbm->mbm_local[idx];
+ pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
+ WARN_ON(!pmbm_data);
dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -784,7 +787,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
*/
head = &rgrp->mon.crdtgrp_list;
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
+ WARN_ON(!cmbm_data);
cur_bw += cmbm_data->prev_bw;
}
@@ -813,54 +817,42 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
}
-static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid)
+static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid, enum resctrl_event_id evtid)
{
+ struct rdt_resource *rmba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
struct rmid_read rr = {0};
rr.r = r;
rr.d = d;
+ rr.evtid = evtid;
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (IS_ERR(rr.arch_mon_ctx)) {
+ pr_warn_ratelimited("Failed to allocate monitor context: %ld",
+ PTR_ERR(rr.arch_mon_ctx));
+ return;
+ }
+ __mon_event_count(closid, rmid, &rr);
+
+ if (is_mba_sc(NULL) && rr.evtid == rmba->membw.mba_mbps_event)
+ mbm_bw_count(closid, rmid, &rr);
+
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
+}
+
+static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
+ u32 closid, u32 rmid)
+{
/*
* This is protected from concurrent reads from user
* as both the user and we hold the global mutex.
*/
- if (is_mbm_total_enabled()) {
- rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
- rr.val = 0;
- rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
- if (IS_ERR(rr.arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(rr.arch_mon_ctx));
- return;
- }
-
- __mon_event_count(closid, rmid, &rr);
-
- resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
- }
- if (is_mbm_local_enabled()) {
- rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
- rr.val = 0;
- rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
- if (IS_ERR(rr.arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(rr.arch_mon_ctx));
- return;
- }
-
- __mon_event_count(closid, rmid, &rr);
-
- /*
- * Call the MBA software controller only for the
- * control groups and when user has enabled
- * the software controller explicitly.
- */
- if (is_mba_sc(NULL))
- mbm_bw_count(closid, rmid, &rr);
+ if (is_mbm_total_enabled())
+ mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
- resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
- }
+ if (is_mbm_local_enabled())
+ mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
}
/*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index d7163b764c62..aedb30120d50 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2505,6 +2505,7 @@ static void rdt_disable_ctx(void)
static int rdt_enable_ctx(struct rdt_fs_context *ctx)
{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
int ret = 0;
if (ctx->enable_cdpl2) {
@@ -2520,6 +2521,7 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
}
if (ctx->enable_mba_mbps) {
+ r->membw.mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
ret = set_mba_sc(true);
if (ret)
goto out_cdpl3;
--
2.46.1