Re: [RFC] perf/x86/rapl: Getting zero on energy-cores event
From: Jiri Olsa
Date: Mon Mar 04 2019 - 08:05:04 EST
On Fri, Mar 01, 2019 at 11:59:40AM -0500, Liang, Kan wrote:
>
>
> On 3/1/2019 6:42 AM, Jiri Olsa wrote:
> > hi,
> > I'm getting zero counts for energy-cores event on broadwell-x
> > server (model 0x4f)
> >
> > I checked intel_rapl powercap driver and it won't export the
> > counter if it rdmsr returns zero on it
> >
> > the SDM also says the rdmsr returns zero for some models
> >
> > I made changes on perf rapl pmu below to remove sysfs events
> > if their rdmsr returns zero just to ilustrate the case
> >
> > I think there's probably better fix, but I'm not sure if
> > there's a reason for zero counters to be exposed..?
>
> +Stephane, who is the author.
>
> I'm OK to hide the zero counters.
>
>
> >
> > thoughts? thanks,
> > jirka
> >
> >
> > ---
> > diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
> > index 94dc564146ca..effb9a9d2368 100644
> > --- a/arch/x86/events/intel/rapl.c
> > +++ b/arch/x86/events/intel/rapl.c
> > @@ -54,6 +54,7 @@
> > #include <linux/module.h>
> > #include <linux/slab.h>
> > #include <linux/perf_event.h>
> > +#include <linux/nospec.h>
> > #include <asm/cpu_device_id.h>
> > #include <asm/intel-family.h>
> > #include "../perf_event.h"
> > @@ -346,10 +347,18 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags)
> > rapl_pmu_event_stop(event, PERF_EF_UPDATE);
> > }
> > +static unsigned int event_msr[NR_RAPL_DOMAINS] = {
> > + MSR_PP0_ENERGY_STATUS,
> > + MSR_PKG_ENERGY_STATUS,
> > + MSR_DRAM_ENERGY_STATUS,
> > + MSR_PP1_ENERGY_STATUS,
> > + MSR_PLATFORM_ENERGY_STATUS,
> > +};
> > +
> > static int rapl_pmu_event_init(struct perf_event *event)
> > {
> > u64 cfg = event->attr.config & RAPL_EVENT_MASK;
> > - int bit, msr, ret = 0;
> > + int bit, ret = 0;
> > struct rapl_pmu *pmu;
> > /* only look at RAPL events */
> > @@ -365,33 +374,12 @@ static int rapl_pmu_event_init(struct perf_event *event)
> > event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
> > - /*
> > - * check event is known (determines counter)
> > - */
> > - switch (cfg) {
> > - case INTEL_RAPL_PP0:
> > - bit = RAPL_IDX_PP0_NRG_STAT;
> > - msr = MSR_PP0_ENERGY_STATUS;
> > - break;
> > - case INTEL_RAPL_PKG:
> > - bit = RAPL_IDX_PKG_NRG_STAT;
> > - msr = MSR_PKG_ENERGY_STATUS;
> > - break;
> > - case INTEL_RAPL_RAM:
> > - bit = RAPL_IDX_RAM_NRG_STAT;
> > - msr = MSR_DRAM_ENERGY_STATUS;
> > - break;
> > - case INTEL_RAPL_PP1:
> > - bit = RAPL_IDX_PP1_NRG_STAT;
> > - msr = MSR_PP1_ENERGY_STATUS;
> > - break;
> > - case INTEL_RAPL_PSYS:
> > - bit = RAPL_IDX_PSYS_NRG_STAT;
> > - msr = MSR_PLATFORM_ENERGY_STATUS;
> > - break;
> > - default:
> > + if (!cfg || cfg >= NR_RAPL_DOMAINS)
> > return -EINVAL;
> > - }
> > +
> > + cfg = array_index_nospec(cfg, NR_RAPL_DOMAINS);
> > + bit = cfg - 1;
> > +
> > /* check event supported */
> > if (!(rapl_cntr_mask & (1 << bit)))
> > return -EINVAL;
> > @@ -406,7 +394,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
> > return -EINVAL;
> > event->cpu = pmu->cpu;
> > event->pmu_private = pmu;
> > - event->hw.event_base = msr;
> > + event->hw.event_base = event_msr[bit];
> > event->hw.config = cfg;
> > event->hw.idx = bit;
> > @@ -435,11 +423,27 @@ static struct attribute_group rapl_pmu_attr_group = {
> > .attrs = rapl_pmu_attrs,
> > };
> > -RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
> > -RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
> > -RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
> > -RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
> > -RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
> > +static ssize_t
> > +rapl_event_sysfs_show(struct device *dev, struct device_attribute *attr,
> > + char *page)
> > +{
> > + struct perf_pmu_events_attr *pmu_attr =
> > + container_of(attr, struct perf_pmu_events_attr, attr);
> > +
> > + return sprintf(page, "event=%llu\n", pmu_attr->id);
> > +}
> > +
> > +#define RAPL_EVENT_ATTR(_name, v, _id) \
> > +static struct perf_pmu_events_attr event_attr_##v = { \
> > + .attr = __ATTR(_name, 0444, rapl_event_sysfs_show, NULL), \
> > + .id = RAPL_IDX_##_id##_NRG_STAT, \
> > +};
> > +
> > +RAPL_EVENT_ATTR(energy-cores, rapl_cores, PP0);
> > +RAPL_EVENT_ATTR(energy-pkg , rapl_pkg, PKG);
> > +RAPL_EVENT_ATTR(energy-ram , rapl_ram, RAM);
> > +RAPL_EVENT_ATTR(energy-gpu , rapl_gpu, PP1);
> > +RAPL_EVENT_ATTR(energy-psys , rapl_psys, PSYS);
> > RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
> > RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
> > @@ -780,6 +784,59 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
> > MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
> > +static void __init remove_name(struct attribute **attrs, const char *name)
> > +{
> > + struct device_attribute *attr;
> > + int i, j;
> > +
> > + for (i = 0; attrs[i]; i++) {
> > + attr = (struct device_attribute *) attrs[i];
> > +
> > + if (strncmp(name, attr->attr.name, strlen(name)))
> > + continue;
> > +
> > + for (j = i; attrs[j]; j++)
> > + attrs[j] = attrs[j + 1];
> > +
> > + /* Check the shifted attr. */
> > + i--;
> > + }
> > +}
> > +
> > +static void __init check_events(struct attribute **attrs)
> > +{
> > + struct perf_pmu_events_attr *pmu_attr;
> > + struct device_attribute *attr;
> > + int i, j;
> > +
> > + for (i = 0; attrs[i]; i++) {
> > + u64 val = 0;
> > + u64 bit;
> > +
> > + attr = (struct device_attribute *) attrs[i];
> > + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
> > +
> > + if (pmu_attr->event_str)
> > + continue;
> > +
> > + bit = pmu_attr->id;
> > +
> > + if (WARN_ON_ONCE(bit >= NR_RAPL_DOMAINS))
> > + continue;
> > +
> > + if (!rdmsrl_safe(event_msr[bit], &val) && val)
> > + continue;
> > +
> > + remove_name(&attrs[i + 1], attr->attr.name);
>
> In perf cstate, we check and insert the available events for sysfs attrs
> I think we should use the same way here. It's better to factor out a common
> function for both cstate and rapl.
right, we will need to add some support to add multiple
attributes (for .unit and .scale files) but it seems
doable.. I'll try to put something together
thanks,
jirka
>
>
> Peter,
>
> The cstate, rapl and uncore are similar. I think it should be the right
> direction to abstract several common functions for them.
> Here, the sysfs attrs is an example.
> The common topology related functions I proposed is another example.
> https://lore.kernel.org/lkml/9631a92f-5b24-26a6-e160-4e4c0b4697c1@xxxxxxxxxxxxxxx/
>
> Thanks,
> Kan