Re: [PATCH v6 1/3] x86/resctrl: Add mount option "mba_MBps_event"

From: Moger, Babu
Date: Fri Dec 08 2023 - 13:29:34 EST


Hi Tony,

On 12/7/2023 1:56 PM, Tony Luck wrote:
The MBA Software Controller(mba_sc) is a feedback loop that uses
measurements of local memory bandwidth to adjust MBA throttling levels
to keep workloads in a resctrl group within a target bandwidth set in
the schemata file.

Users may want to use total memory bandwidth instead of local to handle
workloads that have poor NUMA localization.

Add a new mount option "mba_MBps_event={event_name}" where event_name
is one of "mbm_Local_bytes" or "mbm_total_bytes" that allows a user to
specify which monitoring event to use.

Update the once-per-second polling code to use the chosen event (local
or total memory bandwidth).

Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
include/linux/resctrl.h | 2 +
arch/x86/kernel/cpu/resctrl/internal.h | 3 +-
arch/x86/kernel/cpu/resctrl/monitor.c | 21 +++++----
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 +++++++++++++++++++++-----
4 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 66942d7fba7f..1feb3b2e64fa 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -129,6 +129,7 @@ enum membw_throttle_mode {
* @throttle_mode: Bandwidth throttling mode when threads request
* different memory bandwidths
* @mba_sc: True if MBA software controller(mba_sc) is enabled
+ * @mba_mbps_event: Event (local or total) for mba_sc
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct resctrl_membw {
@@ -138,6 +139,7 @@ struct resctrl_membw {
bool arch_needs_linear;
enum membw_throttle_mode throttle_mode;
bool mba_sc;
+ enum resctrl_event_id mba_mbps_event;
u32 *mb_map;
};
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index a4f1aa15f0a2..8b9b8f664324 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -58,7 +58,8 @@ struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
bool enable_cdpl3;
- bool enable_mba_mbps;
+ bool enable_mba_mbps_local;
+ bool enable_mba_mbps_total;
bool enable_debug;
};
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f136ac046851..d9e590f1cbc3 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -431,9 +431,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
{
- struct mbm_state *m = &rr->d->mbm_local[rmid];
u64 cur_bw, bytes, cur_bytes;
+ struct mbm_state *m;
+ m = get_mbm_state(rr->d, rmid, rr->evtid);
cur_bytes = rr->val;
bytes = cur_bytes - m->prev_bw_bytes;
m->prev_bw_bytes = cur_bytes;
@@ -521,19 +522,21 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
u32 cur_bw, delta_bw, user_bw;
+ enum resctrl_event_id evt_id;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
struct rdtgroup *entry;
- if (!is_mbm_local_enabled())
+ if (!is_mbm_enabled())
return;
r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ evt_id = r_mba->membw.mba_mbps_event;
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- pmbm_data = &dom_mbm->mbm_local[rmid];
+ pmbm_data = get_mbm_state(dom_mbm, rmid, evt_id);
dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -553,7 +556,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
*/
head = &rgrp->mon.crdtgrp_list;
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cmbm_data = get_mbm_state(dom_mbm, entry->mon.rmid, evt_id);
cur_bw += cmbm_data->prev_bw;
delta_bw += cmbm_data->delta_bw;
}
@@ -616,18 +619,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
rr.val = 0;
__mon_event_count(rmid, &rr);
+ if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
+ mbm_bw_count(rmid, &rr);
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
rr.val = 0;
__mon_event_count(rmid, &rr);
-
- /*
- * Call the MBA software controller only for the
- * control groups and when user has enabled
- * the software controller explicitly.
- */
- if (is_mba_sc(NULL))
+ if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
mbm_bw_count(rmid, &rr);
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 69a1de92384a..5f64a0b2597c 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2294,7 +2294,7 @@ static bool supports_mba_mbps(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
- return (is_mbm_local_enabled() &&
+ return (is_mbm_enabled() &&
r->alloc_capable && is_mba_linear());
}
@@ -2302,7 +2302,7 @@ static bool supports_mba_mbps(void)
* Enable or disable the MBA software controller
* which helps user specify bandwidth in MBps.
*/
-static int set_mba_sc(bool mba_sc)
+static int set_mba_sc(bool mba_sc, enum resctrl_event_id mba_mbps_event)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
u32 num_closid = resctrl_arch_get_num_closid(r);
@@ -2313,6 +2313,7 @@ static int set_mba_sc(bool mba_sc)
return -EINVAL;
r->membw.mba_sc = mba_sc;
+ r->membw.mba_mbps_event = mba_mbps_event;
list_for_each_entry(d, &r->domains, list) {
for (i = 0; i < num_closid; i++)
@@ -2445,13 +2446,14 @@ static void rdt_disable_ctx(void)
{
resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
- set_mba_sc(false);
+ set_mba_sc(false, QOS_L3_MBM_LOCAL_EVENT_ID);

This is kind of miss leading. Why do you pass "QOS_L3_MBM_LOCAL_EVENT_ID" here?

If you move the following initialization to rdt_enable_ctx, then you don't need to pass the second argument.

r->membw.mba_mbps_event = mba_mbps_event;

Thanks
Babu

resctrl_debug = false;
}
static int rdt_enable_ctx(struct rdt_fs_context *ctx)
{
+ enum resctrl_event_id mba_mbps_event;
int ret = 0;
if (ctx->enable_cdpl2) {
@@ -2466,8 +2468,12 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
goto out_cdpl2;
}
- if (ctx->enable_mba_mbps) {
- ret = set_mba_sc(true);
+ if (ctx->enable_mba_mbps_local || ctx->enable_mba_mbps_total) {
+ if (ctx->enable_mba_mbps_total)
+ mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+ else
+ mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+ ret = set_mba_sc(true, mba_mbps_event);
if (ret)
goto out_cdpl3;
}
@@ -2683,15 +2689,17 @@ enum rdt_param {
Opt_cdp,
Opt_cdpl2,
Opt_mba_mbps,
+ Opt_mba_mbps_event,
Opt_debug,
nr__rdt_params
};
static const struct fs_parameter_spec rdt_fs_parameters[] = {
- fsparam_flag("cdp", Opt_cdp),
- fsparam_flag("cdpl2", Opt_cdpl2),
- fsparam_flag("mba_MBps", Opt_mba_mbps),
- fsparam_flag("debug", Opt_debug),
+ fsparam_flag("cdp", Opt_cdp),
+ fsparam_flag("cdpl2", Opt_cdpl2),
+ fsparam_flag("mba_MBps", Opt_mba_mbps),
+ fsparam_string("mba_MBps_event", Opt_mba_mbps_event),
+ fsparam_flag("debug", Opt_debug),
{}
};
@@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_mba_mbps:
if (!supports_mba_mbps())
return -EINVAL;
- ctx->enable_mba_mbps = true;
+ if (is_mbm_local_enabled())
+ ctx->enable_mba_mbps_local = true;
+ else
+ return -EINVAL;
+ return 0;
+ case Opt_mba_mbps_event:
+ if (!supports_mba_mbps())
+ return -EINVAL;
+ if (!strcmp("mbm_local_bytes", param->string)) {
+ if (!is_mbm_local_enabled())
+ return -EINVAL;
+ ctx->enable_mba_mbps_local = true;
+ } else if (!strcmp("mbm_total_bytes", param->string)) {
+ if (!is_mbm_total_enabled())
+ return -EINVAL;
+ ctx->enable_mba_mbps_total = true;
+ } else {
+ return -EINVAL;
+ }
return 0;
case Opt_debug:
ctx->enable_debug = true;
@@ -3780,16 +3806,27 @@ static int rdtgroup_rename(struct kernfs_node *kn,
return ret;
}
+static char *mba_sc_event_opt_name(struct rdt_resource *r)
+{
+ if (r->membw.mba_mbps_event == QOS_L3_MBM_LOCAL_EVENT_ID)
+ return ",mba_MBps_event=mbm_local_bytes";
+ else if (r->membw.mba_mbps_event == QOS_L3_MBM_TOTAL_EVENT_ID)
+ return ",mba_MBps_event=mbm_total_bytes";
+ return "";
+}
+
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
+ struct rdt_resource *r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
seq_puts(seq, ",cdp");
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
seq_puts(seq, ",cdpl2");
- if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
- seq_puts(seq, ",mba_MBps");
+ if (is_mba_sc(r_mba))
+ seq_puts(seq, mba_sc_event_opt_name(r_mba));
if (resctrl_debug)
seq_puts(seq, ",debug");