Re: [PATCH v5 1/8] x86/resctrl: Prepare for new domain scope

From: Reinette Chatre
Date: Mon Sep 25 2023 - 19:24:05 EST


Hi Tony,

On 8/29/2023 4:44 PM, Tony Luck wrote:
> Legacy resctrl features operated on subsets of CPUs in the system with

What is a "legacy" resctrl feature? I am not aware of anything in resctrl
that distinguishes a feature as legacy. Could "resctrl resource" be more
appropriate to match the resctrl implementation?

Please use "operate on" instead of "operated on".

> the defining attribute of each subset being an instance of a particular
> level of cache. E.g. all CPUs sharing an L3 cache would be part of the
> same domain.
>
> In preparation for features that are scoped at the NUMA node level
> change the code from explicit references to "cache_level" to a more
> generic scope. At this point the only options for this scope are groups
> of CPUs that share an L2 cache or L3 cache.
>
> No functional change.
>
> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
> include/linux/resctrl.h | 9 ++++++--
> arch/x86/kernel/cpu/resctrl/core.c | 27 ++++++++++++++++++-----
> arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 15 ++++++++++++-
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 15 ++++++++++++-
> 4 files changed, 56 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 8334eeacfec5..2db1244ae642 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -144,13 +144,18 @@ struct resctrl_membw {
> struct rdt_parse_data;
> struct resctrl_schema;
>
> +enum resctrl_scope {
> + RESCTRL_L3_CACHE,
> + RESCTRL_L2_CACHE,
> +};

I'm curious why L3 appears before L2? This is not a big deal but
I think the additional indirection that this introduces is
not necessary. As you had in an earlier version this could be
RESCTRL_L2_CACHE = 2 and then the value can just be used directly
(after ensuring it is used in a valid context).

> +
> /**
> * struct rdt_resource - attributes of a resctrl resource
> * @rid: The index of the resource
> * @alloc_capable: Is allocation available on this machine
> * @mon_capable: Is monitor feature available on this machine
> * @num_rmid: Number of RMIDs available
> - * @cache_level: Which cache level defines scope of this resource
> + * @scope: Scope of this resource
> * @cache: Cache allocation related data
> * @membw: If the component has bandwidth controls, their properties.
> * @domains: All domains for this resource
> @@ -168,7 +173,7 @@ struct rdt_resource {
> bool alloc_capable;
> bool mon_capable;
> int num_rmid;
> - int cache_level;
> + enum resctrl_scope scope;
> struct resctrl_cache cache;
> struct resctrl_membw membw;
> struct list_head domains;
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 030d3b409768..0d3bae523ecb 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -65,7 +65,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_L3,
> .name = "L3",
> - .cache_level = 3,
> + .scope = RESCTRL_L3_CACHE,
> .domains = domain_init(RDT_RESOURCE_L3),
> .parse_ctrlval = parse_cbm,
> .format_str = "%d=%0*x",
> @@ -79,7 +79,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_L2,
> .name = "L2",
> - .cache_level = 2,
> + .scope = RESCTRL_L2_CACHE,
> .domains = domain_init(RDT_RESOURCE_L2),
> .parse_ctrlval = parse_cbm,
> .format_str = "%d=%0*x",
> @@ -93,7 +93,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_MBA,
> .name = "MB",
> - .cache_level = 3,
> + .scope = RESCTRL_L3_CACHE,
> .domains = domain_init(RDT_RESOURCE_MBA),
> .parse_ctrlval = parse_bw,
> .format_str = "%d=%*u",
> @@ -105,7 +105,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
> .r_resctrl = {
> .rid = RDT_RESOURCE_SMBA,
> .name = "SMBA",
> - .cache_level = 3,
> + .scope = RESCTRL_L3_CACHE,
> .domains = domain_init(RDT_RESOURCE_SMBA),
> .parse_ctrlval = parse_bw,
> .format_str = "%d=%*u",
> @@ -487,6 +487,21 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
> return 0;
> }
>
> +static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> +{
> + switch (scope) {
> + case RESCTRL_L3_CACHE:
> + return get_cpu_cacheinfo_id(cpu, 3);
> + case RESCTRL_L2_CACHE:
> + return get_cpu_cacheinfo_id(cpu, 2);
> + default:
> + WARN_ON_ONCE(1);
> + break;
> + }
> +
> + return -1;
> +}

Looking ahead at the next patch some members of rdt_resources_all[]
are left with a default "0" initialization of mon_scope that is a
valid scope of RESCTRL_L3_CACHE in this implementation that would
not be caught with defensive code like above. For the above to catch
a case like this I think that there should be some default
initialization - but if you do move to something like you
had in v3 then this may not be necessary. If the L2 scope is 2,
L3 scope is 3, node scope is 4, then no initialization will be zero
and the above default can more accurately catch failure cases.

> +
> /*
> * domain_add_cpu - Add a cpu to a resource's domain list.
> *
> @@ -502,7 +517,7 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
> */
> static void domain_add_cpu(int cpu, struct rdt_resource *r)
> {
> - int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
> + int id = get_domain_id_from_scope(cpu, r->scope);
> struct list_head *add_pos = NULL;
> struct rdt_hw_domain *hw_dom;
> struct rdt_domain *d;
> @@ -552,7 +567,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
>
> static void domain_remove_cpu(int cpu, struct rdt_resource *r)
> {
> - int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
> + int id = get_domain_id_from_scope(cpu, r->scope);
> struct rdt_hw_domain *hw_dom;
> struct rdt_domain *d;
>
> diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> index 458cb7419502..e79324676f57 100644
> --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> @@ -279,6 +279,7 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
> static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
> {
> struct cpu_cacheinfo *ci;
> + int cache_level;
> int ret;
> int i;
>
> @@ -296,8 +297,20 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
>
> plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
>
> + switch (plr->s->res->scope) {
> + case RESCTRL_L3_CACHE:
> + cache_level = 3;
> + break;
> + case RESCTRL_L2_CACHE:
> + cache_level = 2;
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + return -ENODEV;
> + }

I think this can be simplified without the indirection - a simplified
WARN can just test for valid values of plr->s->res->scope directly
(exit on failure) and then it can be used directly in the code below
when the enum value corresponds to a cache level.

> +
> for (i = 0; i < ci->num_leaves; i++) {
> - if (ci->info_list[i].level == plr->s->res->cache_level) {
> + if (ci->info_list[i].level == cache_level) {
> plr->line_size = ci->info_list[i].coherency_line_size;
> return 0;
> }
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 725344048f85..f510414bf6ce 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -1343,12 +1343,25 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
> {
> struct cpu_cacheinfo *ci;
> unsigned int size = 0;
> + int cache_level;
> int num_b, i;
>
> + switch (r->scope) {
> + case RESCTRL_L3_CACHE:
> + cache_level = 3;
> + break;
> + case RESCTRL_L2_CACHE:
> + cache_level = 2;
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + return size;
> + }
> +

Same here.

> num_b = bitmap_weight(&cbm, r->cache.cbm_len);
> ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
> for (i = 0; i < ci->num_leaves; i++) {
> - if (ci->info_list[i].level == r->cache_level) {
> + if (ci->info_list[i].level == cache_level) {
> size = ci->info_list[i].size / r->cache.cbm_len * num_b;
> break;
> }


Reinette