Re: [PATCH v5 2/8] x86/resctrl: Prepare for different scope for control/monitor operations

From: Reinette Chatre
Date: Mon Sep 25 2023 - 19:25:11 EST


Hi Tony,

On 8/29/2023 4:44 PM, Tony Luck wrote:
> Existing resctrl assumes that control and monitor operations on a

Please remove "Existing", it can just be "resctrl assumes ..."

> resource are performed at the same scope.
>
> Prepare for systems that use different scope (specifically L3 scope
> for cache control and NODE scope for cache occupancy and memory
> bandwidth monitoring).
>
> Create separate domain lists for control and monitor operations.
>
> No important functional change. But note that errors during
> initialization of either control or monitor functions on a domain would
> previously result in that domain being excluded from both control and
> monitor operations. Now the domains are allocated independently it is
> no longer required to disable both control and monitor operations if
> either fail.
>
> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
> include/linux/resctrl.h | 16 +-
> arch/x86/kernel/cpu/resctrl/internal.h | 6 +-
> arch/x86/kernel/cpu/resctrl/core.c | 227 +++++++++++++++-------
> arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +-
> arch/x86/kernel/cpu/resctrl/monitor.c | 2 +-
> arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 2 +-
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 32 +--
> 7 files changed, 199 insertions(+), 88 deletions(-)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 2db1244ae642..33856943a787 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -155,10 +155,12 @@ enum resctrl_scope {
> * @alloc_capable: Is allocation available on this machine
> * @mon_capable: Is monitor feature available on this machine
> * @num_rmid: Number of RMIDs available
> - * @scope: Scope of this resource
> + * @ctrl_scope: Scope of this resource for control functions
> + * @mon_scope: Scope of this resource for monitor functions
> * @cache: Cache allocation related data
> * @membw: If the component has bandwidth controls, their properties.
> - * @domains: All domains for this resource
> + * @domains: Control domains for this resource
> + * @mon_domains: Monitor domains for this resource
> * @name: Name to use in "schemata" file.
> * @data_width: Character width of data when displaying
> * @default_ctrl: Specifies default cache cbm or memory B/W percent.
> @@ -173,10 +175,12 @@ struct rdt_resource {
> bool alloc_capable;
> bool mon_capable;
> int num_rmid;
> - enum resctrl_scope scope;
> + enum resctrl_scope ctrl_scope;
> + enum resctrl_scope mon_scope;
> struct resctrl_cache cache;
> struct resctrl_membw membw;
> struct list_head domains;
> + struct list_head mondomains;

kerneldoc is "mon_domains" while member is "mondomains".
I do think to be consistent with other members that "mon_domains"
would be appropriate. I also think it will be very helpful it
domains is renamed to "ctrl_domains" to more accurately match
"ctrl_scope" and what it represents.

> char *name;
> int data_width;
> u32 default_ctrl;
> @@ -222,8 +226,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
>
> u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
> u32 closid, enum resctrl_conf_type type);
> -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
> -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
> +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
> +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
> +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
> +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
>
> /**
> * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 85ceaf9a31ac..31a5fc3b717f 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -511,8 +511,10 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn);
> int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
> int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
> umode_t mask);
> -struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> - struct list_head **pos);
> +struct rdt_domain *rdt_find_ctrldomain(struct list_head *h, int id,
> + struct list_head **pos);
> +struct rdt_domain *rdt_find_mondomain(struct list_head *h, int id,
> + struct list_head **pos);

This is not what I expected after our previous discussion. It
should not be necessary for caller to be aware of the different
lists. It looks to me like the original parameters of (struct rdt_resource *r,
int id, struct list_head **pos) can be maintained.

> ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
> char *buf, size_t nbytes, loff_t off);
> int rdtgroup_schemata_show(struct kernfs_open_file *of,
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 0d3bae523ecb..97f6f9715fdb 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -57,7 +57,7 @@ static void
> mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
> struct rdt_resource *r);
>
> -#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
> +#define domain_init(id, field) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.field)
>

It may make the code easier to read if this is split into
a "mon_domain_init()" and "ctrl_domain_init()"

> struct rdt_hw_resource rdt_resources_all[] = {
> [RDT_RESOURCE_L3] =
> @@ -65,8 +65,10 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_L3,
> .name = "L3",
> - .scope = RESCTRL_L3_CACHE,
> - .domains = domain_init(RDT_RESOURCE_L3),
> + .ctrl_scope = RESCTRL_L3_CACHE,
> + .mon_scope = RESCTRL_L3_CACHE,
> + .domains = domain_init(RDT_RESOURCE_L3, domains),
> + .mondomains = domain_init(RDT_RESOURCE_L3, mondomains),
> .parse_ctrlval = parse_cbm,
> .format_str = "%d=%0*x",
> .fflags = RFTYPE_RES_CACHE,
> @@ -79,8 +81,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_L2,
> .name = "L2",
> - .scope = RESCTRL_L2_CACHE,
> - .domains = domain_init(RDT_RESOURCE_L2),
> + .ctrl_scope = RESCTRL_L2_CACHE,
> + .domains = domain_init(RDT_RESOURCE_L2, domains),
> .parse_ctrlval = parse_cbm,
> .format_str = "%d=%0*x",
> .fflags = RFTYPE_RES_CACHE,
> @@ -93,8 +95,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_MBA,
> .name = "MB",
> - .scope = RESCTRL_L3_CACHE,
> - .domains = domain_init(RDT_RESOURCE_MBA),
> + .ctrl_scope = RESCTRL_L3_CACHE,
> + .domains = domain_init(RDT_RESOURCE_MBA, domains),
> .parse_ctrlval = parse_bw,
> .format_str = "%d=%*u",
> .fflags = RFTYPE_RES_MB,
> @@ -105,8 +107,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_SMBA,
> .name = "SMBA",
> - .scope = RESCTRL_L3_CACHE,
> - .domains = domain_init(RDT_RESOURCE_SMBA),
> + .ctrl_scope = RESCTRL_L3_CACHE,
> + .domains = domain_init(RDT_RESOURCE_SMBA, domains),
> .parse_ctrlval = parse_bw,
> .format_str = "%d=%*u",
> .fflags = RFTYPE_RES_MB,
> @@ -384,15 +386,16 @@ void rdt_ctrl_update(void *arg)
> }
>
> /*
> - * rdt_find_domain - Find a domain in a resource that matches input resource id
> + * __rdt_find_domain - Find a domain in either the list of control or
> + * monitor domains that matches input resource id
> *
> * Search resource r's domain list to find the resource id. If the resource
> * id is found in a domain, return the domain. Otherwise, if requested by
> * caller, return the first domain whose id is bigger than the input id.
> * The domain list is sorted by id in ascending order.
> */
> -struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> - struct list_head **pos)
> +static void *__rdt_find_domain(struct list_head *h, int id,
> + struct list_head **pos)
> {
> struct rdt_domain *d;
> struct list_head *l;
> @@ -400,7 +403,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> if (id < 0)
> return ERR_PTR(-ENODEV);
>
> - list_for_each(l, &r->domains) {
> + list_for_each(l, h) {
> d = list_entry(l, struct rdt_domain, list);
> /* When id is found, return its domain. */
> if (id == d->id)
> @@ -416,6 +419,18 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> return NULL;
> }
>
> +struct rdt_domain *rdt_find_ctrldomain(struct list_head *h, int id,
> + struct list_head **pos)
> +{
> + return __rdt_find_domain(h, id, pos);
> +}
> +
> +struct rdt_domain *rdt_find_mondomain(struct list_head *h, int id,
> + struct list_head **pos)
> +{
> + return __rdt_find_domain(h, id, pos);
> +}
> +
> static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
> {
> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> @@ -431,10 +446,15 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
> }
>
> static void domain_free(struct rdt_hw_domain *hw_dom)
> +{
> + kfree(hw_dom->ctrl_val);
> + kfree(hw_dom);
> +}
> +
> +static void mondomain_free(struct rdt_hw_domain *hw_dom)
> {
> kfree(hw_dom->arch_mbm_total);
> kfree(hw_dom->arch_mbm_local);
> - kfree(hw_dom->ctrl_val);
> kfree(hw_dom);
> }
>
> @@ -502,6 +522,93 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> return -1;
> }
>
> +static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
> +{
> + int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
> + struct list_head *add_pos = NULL;
> + struct rdt_hw_domain *hw_dom;
> + struct rdt_domain *d;
> + int err;
> +
> + d = rdt_find_ctrldomain(&r->domains, id, &add_pos);
> + if (IS_ERR(d)) {
> + pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);

I am not sure here ... this generates identical error messages
for control and monitor domain. How will the user know what failed?
Also, how does printing id help? If I understand correctly it can
only be -1 when the above prints.

> + return;
> + }
> +
> + if (d) {
> + cpumask_set_cpu(cpu, &d->cpu_mask);
> + if (r->cache.arch_has_per_cpu_cfg)
> + rdt_domain_reconfigure_cdp(r);
> + return;
> + }
> +
> + hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
> + if (!hw_dom)
> + return;
> +
> + d = &hw_dom->d_resctrl;
> + d->id = id;
> + cpumask_set_cpu(cpu, &d->cpu_mask);
> +
> + rdt_domain_reconfigure_cdp(r);
> +
> + if (domain_setup_ctrlval(r, d)) {
> + domain_free(hw_dom);
> + return;
> + }
> +
> + list_add_tail(&d->list, add_pos);
> +
> + err = resctrl_online_ctrl_domain(r, d);
> + if (err) {
> + list_del(&d->list);
> + domain_free(hw_dom);
> + }
> +}
> +
> +static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
> +{
> + int id = get_domain_id_from_scope(cpu, r->mon_scope);
> + struct rdt_hw_domain *hw_mondom;
> + struct list_head *add_pos = NULL;

Please maintain reverse fir ordering. Please check all code.

> + struct rdt_domain *d;
> + int err;
> +
> + d = rdt_find_mondomain(&r->mondomains, id, &add_pos);
> + if (IS_ERR(d)) {
> + pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);
> + return;
> + }
> +
> + if (d) {
> + cpumask_set_cpu(cpu, &d->cpu_mask);
> +

This is an unnecessary empty line that distracts from how the rest
of the function looks.

> + return;
> + }
> +
> + hw_mondom = kzalloc_node(sizeof(*hw_mondom), GFP_KERNEL, cpu_to_node(cpu));
> + if (!hw_mondom)
> + return;
> +
> + d = &hw_mondom->d_resctrl;
> + d->id = id;
> + cpumask_set_cpu(cpu, &d->cpu_mask);
> +
> + if (arch_domain_mbm_alloc(r->num_rmid, hw_mondom)) {
> + mondomain_free(hw_mondom);
> + return;
> + }
> +
> + list_add_tail(&d->list, add_pos);
> +
> + err = resctrl_online_mon_domain(r, d);
> + if (err) {
> + list_del(&d->list);
> + mondomain_free(hw_mondom);
> + }
> +}
> +
> /*
> * domain_add_cpu - Add a cpu to a resource's domain list.
> *

Note that this leaves the comments about list management here while all
the list management code is moved away.

> @@ -517,70 +624,28 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> */
> static void domain_add_cpu(int cpu, struct rdt_resource *r)
> {
> - int id = get_domain_id_from_scope(cpu, r->scope);
> - struct list_head *add_pos = NULL;
> - struct rdt_hw_domain *hw_dom;
> - struct rdt_domain *d;
> - int err;
> -
> - d = rdt_find_domain(r, id, &add_pos);
> - if (IS_ERR(d)) {
> - pr_warn("Couldn't find cache id for CPU %d\n", cpu);
> - return;
> - }
> -
> - if (d) {
> - cpumask_set_cpu(cpu, &d->cpu_mask);
> - if (r->cache.arch_has_per_cpu_cfg)
> - rdt_domain_reconfigure_cdp(r);
> - return;
> - }
> -
> - hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
> - if (!hw_dom)
> - return;
> -
> - d = &hw_dom->d_resctrl;
> - d->id = id;
> - cpumask_set_cpu(cpu, &d->cpu_mask);
> -
> - rdt_domain_reconfigure_cdp(r);
> -
> - if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
> - domain_free(hw_dom);
> - return;
> - }
> -
> - if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
> - domain_free(hw_dom);
> - return;
> - }
> -
> - list_add_tail(&d->list, add_pos);
> -
> - err = resctrl_online_domain(r, d);
> - if (err) {
> - list_del(&d->list);
> - domain_free(hw_dom);
> - }
> + if (r->alloc_capable)
> + domain_add_cpu_ctrl(cpu, r);
> + if (r->mon_capable)
> + domain_add_cpu_mon(cpu, r);
> }
>
> -static void domain_remove_cpu(int cpu, struct rdt_resource *r)
> +static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
> {
> - int id = get_domain_id_from_scope(cpu, r->scope);
> + int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
> struct rdt_hw_domain *hw_dom;
> struct rdt_domain *d;
>
> - d = rdt_find_domain(r, id, NULL);
> + d = rdt_find_ctrldomain(&r->domains, id, NULL);
> if (IS_ERR_OR_NULL(d)) {
> - pr_warn("Couldn't find cache id for CPU %d\n", cpu);
> + pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);
> return;
> }
> hw_dom = resctrl_to_arch_dom(d);
>
> cpumask_clear_cpu(cpu, &d->cpu_mask);
> if (cpumask_empty(&d->cpu_mask)) {
> - resctrl_offline_domain(r, d);
> + resctrl_offline_ctrl_domain(r, d);
> list_del(&d->list);
>
> /*
> @@ -593,6 +658,30 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
>
> return;
> }
> +}
> +
> +static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
> +{
> + int id = get_domain_id_from_scope(cpu, r->mon_scope);
> + struct rdt_hw_domain *hw_mondom;
> + struct rdt_domain *d;
> +
> + d = rdt_find_mondomain(&r->mondomains, id, NULL);
> + if (IS_ERR_OR_NULL(d)) {
> + pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);
> + return;
> + }
> + hw_mondom = resctrl_to_arch_dom(d);
> +
> + cpumask_clear_cpu(cpu, &d->cpu_mask);
> + if (cpumask_empty(&d->cpu_mask)) {
> + resctrl_offline_mon_domain(r, d);
> + list_del(&d->list);
> +

This is an awkward empty line.

> + mondomain_free(hw_mondom);
> +
> + return;
> + }
>
> if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
> if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {

...

Reinette