Re: [PATCH v3 08/10] x86/resctrl: Add the sysfs interface to read the event configuration

From: Reinette Chatre
Date: Wed Aug 24 2022 - 17:16:41 EST


Hi Babu,

On 8/22/2022 6:44 AM, Babu Moger wrote:
> The current event configuration can be viewed by the user by reading
> the sysfs configuration file.
>
> Following are the types of events supported.
> ====================================================================
> Bits Description
> 6 Dirty Victims from the QOS domain to all types of memory
> 5 Reads to slow memory in the non-local NUMA domain
> 4 Reads to slow memory in the local NUMA domain
> 3 Non-temporal writes to non-local NUMA domain
> 2 Non-temporal writes to local NUMA domain
> 1 Reads to memory in the non-local NUMA domain
> 0 Reads to memory in the local NUMA domain
>
> By default the mbm_total_bytes configuration is set to 0x7f to count
> all the types of events and mbm_local_bytes configuration is set to
> 0x15 to count all the local memory events.
>
> $cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_total_config
> 0x7f
>
> $cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_config
> 0x15
>
> Signed-off-by: Babu Moger <babu.moger@xxxxxxx>
> Reviewed-by: Ingo Molnar <mingo@xxxxxxxxxx>
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 21 ++++++++++
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 70 ++++++++++++++++++++++++++++++++
> 2 files changed, 91 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index fc725f5e9024..457666709386 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -15,6 +15,7 @@
> #define MSR_IA32_MBA_THRTL_BASE 0xd50
> #define MSR_IA32_MBA_BW_BASE 0xc0000200
> #define MSR_IA32_SMBA_BW_BASE 0xc0000280
> +#define MSR_IA32_EVT_CFG_BASE 0xc0000400
>
> #define MSR_IA32_QM_CTR 0x0c8e
> #define MSR_IA32_QM_EVTSEL 0x0c8d
> @@ -50,6 +51,26 @@
> */
> #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
>
> +/* Reads to Local DRAM Memory */
> +#define READS_TO_LOCAL_MEM BIT(0)
> +
> +/* Reads to Remote DRAM Memory */
> +#define READS_TO_REMOTE_MEM BIT(1)
> +
> +/* Non-Temporal Writes to Local Memory */
> +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2)
> +
> +/* Non-Temporal Writes to Remote Memory */
> +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3)
> +
> +/* Reads to Local Memory the system identifies as "Slow Memory" */
Seems unexpected character slipped into "identifies".

> +#define READS_TO_LOCAL_S_MEM BIT(4)
> +
> +/* Reads to Remote Memory the system identifies as "Slow Memory" */

here also

> +#define READS_TO_REMOTE_S_MEM BIT(5)
> +
> +/* Dirty Victims to All Types of Memory */
> +#define DIRTY_VICTIS_TO_ALL_MEM BIT(6)

Is this intended to be "DIRTY_VICTIMS_TO_ALL_MEM" ?

>
> struct rdt_fs_context {
> struct kernfs_fs_context kfc;
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 30d2182d4fda..e1847d49fa15 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -254,8 +254,78 @@ static const struct kernfs_ops kf_mondata_ops = {
> .seq_show = rdtgroup_mondata_show,
> };
>
> +/*
> + * This is called via IPI to read the CQM/MBM counters
> + * in a domain.
> + */
> +void mon_event_config_read(void *info)
> +{
> + union mon_data_bits *md = info;
> + u32 evtid = md->u.evtid;
> + u32 h, msr_index;
> +
> + switch (evtid) {
> + case QOS_L3_MBM_TOTAL_EVENT_ID:
> + msr_index = 0;
> + break;
> + case QOS_L3_MBM_LOCAL_EVENT_ID:
> + msr_index = 1;
> + break;
> + default:
> + return; /* Not expected to come here */

Please no inline comments.

> + }
> +
> + rdmsr(MSR_IA32_EVT_CFG_BASE + msr_index, md->u.mon_config, h);
> +}
> +
> +void mondata_config_read(struct rdt_domain *d, union mon_data_bits *md)
> +{
> + smp_call_function_any(&d->cpu_mask, mon_event_config_read, md, 1);
> +}
> +
> +int rdtgroup_mondata_config_show(struct seq_file *m, void *arg)
> +{
> + struct kernfs_open_file *of = m->private;
> + struct rdt_hw_resource *hw_res;
> + u32 resid, evtid, domid;
> + struct rdtgroup *rdtgrp;
> + struct rdt_resource *r;
> + union mon_data_bits md;
> + struct rdt_domain *d;
> + int ret = 0;
> +
> + rdtgrp = rdtgroup_kn_lock_live(of->kn);
> + if (!rdtgrp) {
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + md.priv = of->kn->priv;
> + resid = md.u.rid;
> + domid = md.u.domid;
> + evtid = md.u.evtid;
> +
> + hw_res = &rdt_resources_all[resid];
> + r = &hw_res->r_resctrl;
> +
> + d = rdt_find_domain(r, domid, NULL);
> + if (IS_ERR_OR_NULL(d)) {
> + ret = -ENOENT;
> + goto out;
> + }
> +
> + mondata_config_read(d, &md);
> +
> + seq_printf(m, "0x%x\n", md.u.mon_config);

Looking at this patch and the next, the sysfs files are introduced to read
from and write to the configuration register. From what I can tell the
data is never used internally (what did I miss?). Why is the value of the
configuration register stored?

> +
> +out:
> + rdtgroup_kn_unlock(of->kn);
> + return ret;
> +}
> +
> static const struct kernfs_ops kf_mondata_config_ops = {
> .atomic_write_len = PAGE_SIZE,
> + .seq_show = rdtgroup_mondata_config_show,
> };
>
> static bool is_cpu_list(struct kernfs_open_file *of)
>
>


Reinette