Re: [PATCH v4 10/13] x86/resctrl: Add the sysfs interface to read the event configuration
From: Moger, Babu
Date: Mon Sep 19 2022 - 12:07:52 EST
Hi Reinette,
On 9/16/22 10:59, Reinette Chatre wrote:
> Hi Babu,
>
> On 9/7/2022 11:01 AM, Babu Moger wrote:
>> The current event configuration can be viewed by the user by reading
>> the sysfs configuration file.
>>
>> Following are the types of events supported:
>>
>> ==== ===========================================================
>> Bits Description
>> ==== ===========================================================
>> 6 Dirty Victims from the QOS domain to all types of memory
>> 5 Reads to slow memory in the non-local NUMA domain
>> 4 Reads to slow memory in the local NUMA domain
>> 3 Non-temporal writes to non-local NUMA domain
>> 2 Non-temporal writes to local NUMA domain
>> 1 Reads to memory in the non-local NUMA domain
>> 0 Reads to memory in the local NUMA domain
>> ==== ===========================================================
>>
> ...
>
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 45923eb4022f..96f439324d78 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -15,6 +15,7 @@
>> #define MSR_IA32_MBA_THRTL_BASE 0xd50
>> #define MSR_IA32_MBA_BW_BASE 0xc0000200
>> #define MSR_IA32_SMBA_BW_BASE 0xc0000280
>> +#define MSR_IA32_EVT_CFG_BASE 0xc0000400
>>
>> #define MSR_IA32_QM_CTR 0x0c8e
>> #define MSR_IA32_QM_EVTSEL 0x0c8d
>> @@ -50,6 +51,29 @@
>> */
>> #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
>>
>> +/* Reads to Local DRAM Memory */
> What prompted the terminology switch between the
> changelog ("local NUMA") and the comments ("DRAM")?
oh. ok. Will change it.
>
>> +#define READS_TO_LOCAL_MEM BIT(0)
>> +
>> +/* Reads to Remote DRAM Memory */
>> +#define READS_TO_REMOTE_MEM BIT(1)
>> +
>> +/* Non-Temporal Writes to Local Memory */
>> +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2)
>> +
>> +/* Non-Temporal Writes to Remote Memory */
>> +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3)
>> +
>> +/* Reads to Local Memory the system identifies as "Slow Memory" */
>> +#define READS_TO_LOCAL_S_MEM BIT(4)
>> +
>> +/* Reads to Remote Memory the system identifies as "Slow Memory" */
>> +#define READS_TO_REMOTE_S_MEM BIT(5)
>> +
>> +/* Dirty Victims to All Types of Memory */
>> +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6)
>> +
> Could you please fixup the comments to only capitalize
> the first word of each sentence (unless it is an acronym
> or required for some other reason)?
Sure.
>
>> +/* Max event bits supported */
>> +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>>
>> struct rdt_fs_context {
>> struct kernfs_fs_context kfc;
>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index da11fdad204d..6f067c1ac7c1 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -254,8 +254,85 @@ static const struct kernfs_ops kf_mondata_ops = {
>> .seq_show = rdtgroup_mondata_show,
>> };
>>
>> +struct mon_config_info {
>> + u32 evtid;
>> + u32 mon_config;
>> +};
>> +
>> +/*
>> + * This is called via IPI to read the CQM/MBM counters
>> + * in a domain.
> This comment does not seem accurate - it is not reading the
> actual counters but the configuration of the counters?
Yes, That is correct.
>
>> + */
>> +void mon_event_config_read(void *info)
>> +{
>> + struct mon_config_info *mon_info = info;
>> + u32 h, msr_index;
>> +
>> + switch (mon_info->evtid) {
>> + case QOS_L3_MBM_TOTAL_EVENT_ID:
>> + msr_index = 0;
>> + break;
>> + case QOS_L3_MBM_LOCAL_EVENT_ID:
>> + msr_index = 1;
>> + break;
>> + default:
>> + /* Not expected to come here */
>> + return;
>> + }
>> +
>> + rdmsr(MSR_IA32_EVT_CFG_BASE + msr_index, mon_info->mon_config, h);
>> +}
>> +
>> +void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
>> +{
>> + smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
>> +}
>> +
>> +int rdtgroup_mondata_config_show(struct seq_file *m, void *arg)
>> +{
>> + struct kernfs_open_file *of = m->private;
>> + struct mon_config_info mon_info;
> Could you please initialize this struct? I think this is important considering
> that there is an (albeit unlikely) chance that uninitialized data can be returned
> to user space.
Sure.
Thanks
Babu
>> + struct rdt_hw_resource *hw_res;
>> + u32 resid, evtid, domid;
>> + struct rdtgroup *rdtgrp;
>> + struct rdt_resource *r;
>> + union mon_data_bits md;
>> + struct rdt_domain *d;
>> + int ret = 0;
>> +
>> + rdtgrp = rdtgroup_kn_lock_live(of->kn);
>> + if (!rdtgrp) {
>> + ret = -ENOENT;
>> + goto out;
>> + }
>> +
>> + md.priv = of->kn->priv;
>> + resid = md.u.rid;
>> + domid = md.u.domid;
>> + evtid = md.u.evtid;
>> +
>> + hw_res = &rdt_resources_all[resid];
>> + r = &hw_res->r_resctrl;
>> +
>> + d = rdt_find_domain(r, domid, NULL);
>> + if (IS_ERR_OR_NULL(d)) {
>> + ret = -ENOENT;
>> + goto out;
>> + }
>> +
>> + mon_info.evtid = evtid;
>> + mondata_config_read(d, &mon_info);
>> +
>> + seq_printf(m, "0x%x\n", mon_info.mon_config);
>> +
>> +out:
>> + rdtgroup_kn_unlock(of->kn);
>> + return ret;
>> +}
>> +
>> static const struct kernfs_ops kf_mondata_config_ops = {
>> .atomic_write_len = PAGE_SIZE,
>> + .seq_show = rdtgroup_mondata_config_show,
>> };
>
>> static bool is_cpu_list(struct kernfs_open_file *of)
>>
>>
> Reinette
--
Thanks
Babu Moger