Re: [PATCH v2 08/23] x86/resctrl: Create mba_sc configuration in the rdt_domain

From: Reinette Chatre
Date: Fri Oct 15 2021 - 18:26:35 EST


Hi James,

On 10/1/2021 9:02 AM, James Morse wrote:
To support resctrl's MBA software controller, the architecture must provide
a second configuration array to hold the mbps_val from user-space.

This complicates the interface between the architecture code.

This complicates the interface between the architecture code and ... ?


Make the filesystem parts of resctrl create an array for the mba_sc
values when is_mba_sc() is set to true. The software controller
can be changed to use this, allowing the architecture code to only
consider the values configured in hardware.

This changes significantly more than just where the mbps_val array is hosted. It also changes how the life cycle of this array is managed. Previously it followed the domain, whether mba_sc was enabled or not. Now that it depends on mba_sc it is managed quite differently.

Could the changelog be upfront about this change and its motivation? Stating this would make this much easier to review and also the later patches where the original mbps_val initialization code is removed without replacement.

Signed-off-by: James Morse <james.morse@xxxxxxx>
---
Changes since v1:
* Added missing error handling to mba_sc_domain_allocate() in
domain_setup_mon_state()
* Added comment about mba_sc_domain_allocate() races
* Squashed out struct resctrl_mba_sc
* Moved mount time alloc/free calls to set_mba_sc().
* Removed mount check in resctrl_offline_domain()
* Reword commit message
---
arch/x86/kernel/cpu/resctrl/internal.h | 1 -
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 67 ++++++++++++++++++++++++++
include/linux/resctrl.h | 6 +++
3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index e12b55f815bf..a7e2cbce29d5 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -36,7 +36,6 @@
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
-#define MBA_MAX_MBPS U32_MAX
#define MAX_MBA_BW_AMD 0x800
#define MBM_CNTR_WIDTH_OFFSET_AMD 20
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 38670bb810cb..9d402bc8bdff 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1889,6 +1889,64 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
l3_qos_cfg_update(&hw_res->cdp_enabled);
}
+static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
+{
+ u32 num_closid = resctrl_arch_get_num_closid(r);
+ int cpu = cpumask_any(&d->cpu_mask);
+ int i;
+
+ /*
+ * d->mbps_val is allocated by a call to this function in set_mba_sc(),
+ * and domain_setup_mon_state(). Both calls are guarded by is_mba_sc(),
+ * which can only return true while the filesystem is mounted. The
+ * two calls are prevented from racing as rdt_get_tree() takes the
+ * cpuhp read lock before calling rdt_enable_ctx(ctx), which prevents
+ * it running concurrently with resctrl_online_domain().
+ */
+ lockdep_assert_cpus_held();
+
+ d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!d->mbps_val)
+ return -ENOMEM;
+
+ for (i = 0; i < num_closid; i++)
+ d->mbps_val[i] = MBA_MAX_MBPS;
+
+ return 0;
+}
+
+static int mba_sc_allocate(struct rdt_resource *r)
+{
+ struct rdt_domain *d;
+ int ret;
+

Please initialize ret.

+ list_for_each_entry(d, &r->domains, list) {
+ ret = mba_sc_domain_allocate(r, d);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void mba_sc_domain_destroy(struct rdt_resource *r,
+ struct rdt_domain *d)
+{
+ kfree(d->mbps_val);
+ d->mbps_val = NULL;
+}
+
+static void mba_sc_destroy(struct rdt_resource *r)
+{
+ struct rdt_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->domains, list)
+ mba_sc_domain_destroy(r, d);
+}
+
/*
* Enable or disable the MBA software controller
* which helps user specify bandwidth in MBps.
@@ -1911,6 +1969,10 @@ static int set_mba_sc(bool mba_sc)
setup_default_ctrlval(r, hw_dom->ctrl_val, hw_dom->mbps_val);
}
+ if (is_mba_sc(r))
+ return mba_sc_allocate(r);
+
+ mba_sc_destroy(r);
return 0;
}
@@ -3259,6 +3321,8 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
__check_limbo(d, true);
cancel_delayed_work(&d->cqm_limbo);
}
+ if (is_mba_sc(r))
+ mba_sc_domain_destroy(r, d);
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
@@ -3291,6 +3355,9 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
}
}
+ if (is_mba_sc(r))
+ return mba_sc_domain_allocate(r, d);
+
return 0;
}

Could this be done symmetrically? That is, allocate in resctrl_online_domain() and free in resctrl_offline_domain().

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 5d283bdd6162..355660d46612 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -15,6 +15,9 @@ int proc_resctrl_show(struct seq_file *m,
#endif
+/* max value for struct resctrl_mba_sc's mbps_val */
+#define MBA_MAX_MBPS U32_MAX

struct resctrl_mba_sc?

+
/**
* enum resctrl_conf_type - The type of configuration.
* @CDP_NONE: No prioritisation, both code and data are controlled or monitored.
@@ -53,6 +56,8 @@ struct resctrl_staged_config {
* @cqm_work_cpu: worker CPU for CQM h/w counters
* @plr: pseudo-locked region (if any) associated with domain
* @staged_config: parsed configuration to be applied
+ * @mbps_val: Array of user specified control values for mba_sc,
+ * indexed by closid

Could this inherit some of the useful kerneldoc associated with the mbps_val being replaced? That is, it exists when mba_sc is enabled and contains bandwidth values in MBps.

*/
struct rdt_domain {
struct list_head list;
@@ -67,6 +72,7 @@ struct rdt_domain {
int cqm_work_cpu;
struct pseudo_lock_region *plr;
struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
+ u32 *mbps_val;
};
/**


Reinette