Re: [PATCH v21 14/18] x86/resctrl: Fill out rmid_read structure for smp_call*() to read a counter

From: Reinette Chatre
Date: Tue Jun 25 2024 - 19:34:01 EST


Hi Tony,

On 6/21/24 3:38 PM, Tony Luck wrote:
mon_event_read() fills out most fields of the struct rmid_read that is
passed via an smp_call*() function to a CPU that is part of the correct
domain to read the monitor counters.

With Sub-NUMA Cluster (SNC) mode there are now two cases to handle:

1) Reading a file that returns a value for a single domain.
+ Choose the CPU to execute from the domain cpu_mask

2) Reading a file that must sum across domains sharing an L3 cache
instance.
+ Indicate to called code that a sum is needed by passing a NULL
rdt_mon_domain pointer.
+ Choose the CPU from the L3 shared_cpu_map.

Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/kernel/cpu/resctrl/internal.h | 2 +-
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 40 ++++++++++++++++++-----
arch/x86/kernel/cpu/resctrl/monitor.c | 1 +
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +-
4 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index f66c7f12e91e..e5013c8dac84 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -632,7 +632,7 @@ void mon_event_count(void *info);
int rdtgroup_mondata_show(struct seq_file *m, void *arg);
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
- int evtid, int first);
+ cpumask_t *cpumask, int evtid, int first);
void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
unsigned long delay_ms,
int exclude_cpu);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 3b9383612c35..7da2992e2c45 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -515,7 +515,7 @@ static int smp_mon_event_count(void *arg)
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
- int evtid, int first)
+ cpumask_t *cpumask, int evtid, int first)
{
int cpu;
@@ -537,7 +537,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
return;
}
- cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU);
+ cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
/*
* cpumask_any_housekeeping() prefers housekeeping CPUs, but
@@ -546,7 +546,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
* counters on some platforms if its called in IRQ context.
*/
if (tick_nohz_full_cpu(cpu))
- smp_call_function_any(&d->hdr.cpu_mask, mon_event_count, rr, 1);
+ smp_call_function_any(cpumask, mon_event_count, rr, 1);
else
smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
@@ -575,16 +575,40 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
resid = md.u.rid;
domid = md.u.domid;
evtid = md.u.evtid;
-
r = &rdt_resources_all[resid].r_resctrl;
- hdr = rdt_find_domain(&r->mon_domains, domid, NULL);
- if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
+
+ if (md.u.sum) {
+ /*
+ * This file requires summing across all SNC domains that share

nit: this is resctrl fs code so "all SNC domains" -> "all domains"?

+ * the L3 cache id that was provided in the "domid" field of the
+ * mon_data_bits union. Search all domains in the resource for
+ * one that matches this cache id.
+ */
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ if (d->ci->id == domid) {
+ rr.ci = d->ci;
+ mon_event_read(&rr, r, NULL, rdtgrp,
+ &d->ci->shared_cpu_map, evtid, false);
+ goto checkresult;
+ }
+ }
ret = -ENOENT;
goto out;
+ } else {
+ /*
+ * This file provides data from a single domain. Search
+ * the resource to find the domain with "domid".
+ */
+ hdr = rdt_find_domain(&r->mon_domains, domid, NULL);
+ if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
}
- d = container_of(hdr, struct rdt_mon_domain, hdr);
- mon_event_read(&rr, r, d, rdtgrp, evtid, false);
+checkresult:
if (rr.err == -EIO)
seq_puts(m, "Error\n");
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index ff4e74594a19..877d898e8fd0 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -785,6 +785,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
rr.first = false;
rr.r = r;
rr.d = d;
+ rr.ci = NULL;

This keeps using a struct rmid_read with random data from stack and initialize members based on
knowledge about how the called functions use this struct. Could you please add initialization to
all these places that use struct rmid_read with whatever is on the stack? This includes
mon_add_all_files() introduced in this series.
Something like below should do (in mon_add_all_files() - done as part of patch 10, mbm_update(),
and mon_add_all_files():
struct rmid_read rr = {0};
/*
* This is protected from concurrent reads from user
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 14482d88b68c..5b980cb05c5f 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3072,7 +3072,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
return ret;
if (!do_sum && is_mbm_event(mevt->evtid))
- mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
+ mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
}
return 0;

Reinette