Re: [PATCH v18 12/17] x86/resctrl: Create Sub-NUMA (SNC) monitor files

From: Reinette Chatre
Date: Wed May 22 2024 - 17:20:02 EST


Hi Tony,

On 5/15/2024 3:23 PM, Tony Luck wrote:
> When SNC mode is enabled, create subdirectories and file to monitor

"and file" -> "and files"?

> at the SNC node granularity. Monitor files at the L3 granularity are
> tagged with a "sum" attribute to indicate that all SNC nodes sharing
> an L3 cache should be read and summed to provide the result to the
> user.

Why go through effort to create a generic "monitor display scope" and
then just always refer to it as L3 cache scope? One consequence is that
the code and changelog seems to have a disconnect.

>
> Note that the "domid" field for files that must sum across SNC domains
> has the L3 cache instance id, while non-summing files use the domain id.
>
> Also the "sum" files do not need to make a call to mon_event_read() to
> initialize the MBM counters. This will be handled by initializing the
> individual SNC nodes that share the L3.
>
> Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
> ---
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 53 ++++++++++++++++++--------
> 1 file changed, 38 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 7a6c40aefdcc..f0f468babdea 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -3026,7 +3026,8 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> }
>
> static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
> - struct rdt_resource *r, struct rdtgroup *prgrp)
> + struct rdt_resource *r, struct rdtgroup *prgrp,
> + bool do_sum)
> {
> union mon_data_bits priv;
> struct mon_evt *mevt;
> @@ -3037,15 +3038,18 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
> return -EPERM;
>
> priv.u.rid = r->rid;
> - priv.u.domid = d->hdr.id;
> + priv.u.domid = do_sum ? d->display_id : d->hdr.id;
> + priv.u.sum = do_sum;
> list_for_each_entry(mevt, &r->evt_list, list) {
> priv.u.evtid = mevt->evtid;
> ret = mon_addfile(kn, mevt->name, priv.priv);
> if (ret)
> return ret;
>
> - if (is_mbm_event(mevt->evtid))
> + if (!do_sum && is_mbm_event(mevt->evtid)) {
> + rr.sumdomains = 0;
> mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
> + }
> }
>
> return 0;
> @@ -3055,23 +3059,42 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
> struct rdt_mon_domain *d,
> struct rdt_resource *r, struct rdtgroup *prgrp)
> {
> - struct kernfs_node *kn;
> + struct kernfs_node *kn, *ckn;
> char name[32];
> + bool do_sum;
> int ret;
>
> - sprintf(name, "mon_%s_%02d", r->name, d->hdr.id);
> - /* create the directory */
> - kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
> - if (IS_ERR(kn))
> - return PTR_ERR(kn);
> + do_sum = r->mon_scope != r->mon_display_scope;
> + sprintf(name, "mon_%s_%02d", r->name, d->display_id);

Why not just determine "display_id" dynamically here and pass it as parameter
to mon_add_all_files()? Previously you mentioned that error handling is a problem
but this flow can surely handle errors, no?

> + kn = kernfs_find_and_get_ns(parent_kn, name, NULL);
> + if (!kn) {
> + /* create the directory */
> + kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
> + if (IS_ERR(kn))
> + return PTR_ERR(kn);
>
> - ret = rdtgroup_kn_set_ugid(kn);
> - if (ret)
> - goto out_destroy;
> + ret = rdtgroup_kn_set_ugid(kn);
> + if (ret)
> + goto out_destroy;
> + ret = mon_add_all_files(kn, d, r, prgrp, do_sum);
> + if (ret)
> + goto out_destroy;
> + }
>
> - ret = mon_add_all_files(kn, d, r, prgrp);
> - if (ret)
> - goto out_destroy;
> + if (do_sum) {
> + sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
> + ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
> + if (IS_ERR(ckn))
> + goto out_destroy;
> +
> + ret = rdtgroup_kn_set_ugid(ckn);
> + if (ret)
> + goto out_destroy;
> +
> + ret = mon_add_all_files(ckn, d, r, prgrp, false);
> + if (ret)
> + goto out_destroy;
> + }
>
> kernfs_activate(kn);
> return 0;

Reinette